Example 010 - Excel to Python

How to count number of cells that are not equal to many classes?

Formula --> SUMPRODUCT(--(ISNA(MATCH(E5:E21,G5:G6,0))))

In [1]:
import pandas as pd

#-----------------------------------------------
# load data
#-----------------------------------------------
df = pd.read_csv("example-010.csv")
In [7]:
#-----------------------------------------------
# print the dataset
#-----------------------------------------------
print(); print(df.columns)
print(); print(df.shape)
print(); print(df.head(5))
print(); print(df.tail(5))

print("\n\n\n Print the entire dataset") 
print(); print(df)
Index(['Order', 'Item', 'Colour'], dtype='object')

(17, 3)

   Order     Item Colour
0  11011  T-Shirt   BLUE
1  11012  T-Shirt    RED
2  11013  T-Shirt  GREEN
3  11014  T-Shirt    RED
4  11015  T-Shirt  GREEN

    Order     Item  Colour
12  11023  T-Shirt  YELLOW
13  11024  T-Shirt    BLUE
14  11025  T-Shirt     RED
15  11026  T-Shirt    BLUE
16  11027  T-Shirt     RED



 Print the entire dataset

    Order     Item  Colour
0   11011  T-Shirt    BLUE
1   11012  T-Shirt     RED
2   11013  T-Shirt   GREEN
3   11014  T-Shirt     RED
4   11015  T-Shirt   GREEN
5   11016  T-Shirt   GREEN
6   11017  T-Shirt   GREEN
7   11018  T-Shirt   GREEN
8   11019  T-Shirt   GREEN
9   11020  T-Shirt  YELLOW
10  11021  T-Shirt  YELLOW
11  11022  T-Shirt  YELLOW
12  11023  T-Shirt  YELLOW
13  11024  T-Shirt    BLUE
14  11025  T-Shirt     RED
15  11026  T-Shirt    BLUE
16  11027  T-Shirt     RED
In [8]:
#-----------------------------------------------
# count by Colour
#-----------------------------------------------
df1 = df.groupby([df['Colour']]).agg({'count'})
print(df1['Order'])

seriesObj = []
        count
Colour       
BLUE        3
GREEN       6
RED         4
YELLOW      4
In [9]:
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE' and 'RED')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'RED'])) else False, 
                axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10]]
In [10]:
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE' and 'GREEN')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'GREEN'])) else False,
                axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10], [8]]
In [11]:
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE', 'RED' and 'GREEN')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'RED', 'GREEN'])) else False,
                axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10], [8], [4]]
In [12]:
# --------------------------------------------------------------
# Count number of rows where exclude ('YELLOW' and 'GREEN')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['YELLOW', 'GREEN'])) else False,
                axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10], [8], [4], [7]]
In [13]:
df2 = pd.DataFrame(seriesObj, 
                   columns = ['Item Count'],
                   index=['Excluding BLUE and RED', 
                          'Excluding BLUE and GREEN', 
                          'Excluding BLUE, RED and GREEN', 
                          'Excluding GREEN and YELLOW'
                          ])
print(df2)
                               Item Count
Excluding BLUE and RED                 10
Excluding BLUE and GREEN                8
Excluding BLUE, RED and GREEN           4
Excluding GREEN and YELLOW              7

Equivalent codes in R Programming language

In [ ]:
# ---------------------------------------------------------
# Example - 010 in R
# ---------------------------------------------------------

# load library
library(dplyr)

# set working directory where CSV is located
getwd()
setwd("EE-010")
getwd()

# ------------------------------------------------
# Load data in a dataframe
# ------------------------------------------------

df <- read.csv("example-010.csv") 

# print column names
colnames(df)
# Print top 10 rows in the dataSet
head(df, 5)
# Print last 10 rows in the dataSet
tail(df, 5)
# Dimention of Dataset
dim(df)
# Check Data types of each column
table(unlist(lapply(df, class)))

# -------------------------------------------
# count using group by
# -------------------------------------------
df1 <- df %>%
  group_by(df$Colour) %>%
  summarise(count=n())

print(df1)


# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE' and 'RED'
# -------------------------------------------------------------

seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0

for(i in seq){
  if (df1$`df$Colour`[i] == 'BLUE' || df1$`df$Colour`[i] == 'RED'){
    count = count + 0
  } else {
          count = count + df1$count[i]
      }
}

Y1 <- c('not match to BLUE or RED', count)

# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE' and 'GREEN'
# -------------------------------------------------------------

seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0

for(i in seq){
  if (df1$`df$Colour`[i] == 'BLUE' || df1$`df$Colour`[i] == 'GREEN'){
    count = count + 0
  } else {
    count = count + df1$count[i]
  }
}

Y2 <- c('not match to BLUE or GREEN', count)

# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE', 'YELLOW' and 'RED'
# -------------------------------------------------------------

seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0

for(i in seq){
  if (df1$`df$Colour`[i] == 'BLUE' || df1$`df$Colour`[i] == 'RED' || 
      df1$`df$Colour`[i] == 'YELLOW'){
    count = count + 0
  } else {
    count = count + df1$count[i]
  }
}

Y3 <- c('not match to BLUE, YELLOW or RED', count)

# -------------------------------------------------------------
# count how many rows for Colour excluding 'YELLOW' and 'GREEN'
# -------------------------------------------------------------

seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0

for(i in seq){
  if (df1$`df$Colour`[i] == 'YELLOW' || df1$`df$Colour`[i] == 'GREEN'){
    count = count + 0
  } else {
    count = count + df1$count[i]
  }
}

Y4 <- c('not match to YELLOW or GREEN', count)

# -----------------------------
# print Results
# -----------------------------

Y0 <- c('Not match to T-Shirt Colour', 'Count')

Y <- rbind(Y0, Y1, Y2, Y3, Y4)
print(Y)