# Example 010 - Excel to Python¶

### How to count number of cells that are not equal to many classes?¶

#### Formula --> SUMPRODUCT(--(ISNA(MATCH(E5:E21,G5:G6,0))))¶

In [1]:
import pandas as pd

#-----------------------------------------------
#-----------------------------------------------
In [7]:
#-----------------------------------------------
# print the dataset
#-----------------------------------------------
print(); print(df.columns)
print(); print(df.shape)
print(); print(df.tail(5))

print("\n\n\n Print the entire dataset")
print(); print(df)
Index(['Order', 'Item', 'Colour'], dtype='object')

(17, 3)

Order     Item Colour
0  11011  T-Shirt   BLUE
1  11012  T-Shirt    RED
2  11013  T-Shirt  GREEN
3  11014  T-Shirt    RED
4  11015  T-Shirt  GREEN

Order     Item  Colour
12  11023  T-Shirt  YELLOW
13  11024  T-Shirt    BLUE
14  11025  T-Shirt     RED
15  11026  T-Shirt    BLUE
16  11027  T-Shirt     RED

Print the entire dataset

Order     Item  Colour
0   11011  T-Shirt    BLUE
1   11012  T-Shirt     RED
2   11013  T-Shirt   GREEN
3   11014  T-Shirt     RED
4   11015  T-Shirt   GREEN
5   11016  T-Shirt   GREEN
6   11017  T-Shirt   GREEN
7   11018  T-Shirt   GREEN
8   11019  T-Shirt   GREEN
9   11020  T-Shirt  YELLOW
10  11021  T-Shirt  YELLOW
11  11022  T-Shirt  YELLOW
12  11023  T-Shirt  YELLOW
13  11024  T-Shirt    BLUE
14  11025  T-Shirt     RED
15  11026  T-Shirt    BLUE
16  11027  T-Shirt     RED
In [8]:
#-----------------------------------------------
# count by Colour
#-----------------------------------------------
df1 = df.groupby([df['Colour']]).agg({'count'})
print(df1['Order'])

seriesObj = []
count
Colour
BLUE        3
GREEN       6
RED         4
YELLOW      4
In [9]:
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE' and 'RED')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'RED'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10]]
In [10]:
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE' and 'GREEN')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'GREEN'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10], [8]]
In [11]:
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE', 'RED' and 'GREEN')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'RED', 'GREEN'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10], [8], [4]]
In [12]:
# --------------------------------------------------------------
# Count number of rows where exclude ('YELLOW' and 'GREEN')
# --------------------------------------------------------------

Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['YELLOW', 'GREEN'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)

seriesObj.append([numOfRows1])
print(seriesObj)
[[10], [8], [4], [7]]
In [13]:
df2 = pd.DataFrame(seriesObj,
columns = ['Item Count'],
index=['Excluding BLUE and RED',
'Excluding BLUE and GREEN',
'Excluding BLUE, RED and GREEN',
'Excluding GREEN and YELLOW'
])
print(df2)
Item Count
Excluding BLUE and RED                 10
Excluding BLUE and GREEN                8
Excluding BLUE, RED and GREEN           4
Excluding GREEN and YELLOW              7

# Equivalent codes in R Programming language¶

In [ ]:
# ---------------------------------------------------------
# Example - 010 in R
# ---------------------------------------------------------

library(dplyr)

# set working directory where CSV is located
getwd()
setwd("EE-010")
getwd()

# ------------------------------------------------
# Load data in a dataframe
# ------------------------------------------------

# print column names
colnames(df)
# Print top 10 rows in the dataSet
# Print last 10 rows in the dataSet
tail(df, 5)
# Dimention of Dataset
dim(df)
# Check Data types of each column
table(unlist(lapply(df, class)))

# -------------------------------------------
# count using group by
# -------------------------------------------
df1 <- df %>%
group_by(df$Colour) %>% summarise(count=n()) print(df1) # ------------------------------------------------------------- # count how many rows for Colour excluding 'BLUE' and 'RED' # ------------------------------------------------------------- seq <- c(1: length(df1$df$Colour)) i <- 1 count <- 0 for(i in seq){ if (df1$df$Colour[i] == 'BLUE' || df1$df$Colour[i] == 'RED'){ count = count + 0 } else { count = count + df1$count[i]
}
}

Y1 <- c('not match to BLUE or RED', count)

# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE' and 'GREEN'
# -------------------------------------------------------------

seq <- c(1: length(df1$df$Colour))
i <- 1
count <- 0

for(i in seq){
if (df1$df$Colour[i] == 'BLUE' || df1$df$Colour[i] == 'GREEN'){
count = count + 0
} else {
count = count + df1$count[i] } } Y2 <- c('not match to BLUE or GREEN', count) # ------------------------------------------------------------- # count how many rows for Colour excluding 'BLUE', 'YELLOW' and 'RED' # ------------------------------------------------------------- seq <- c(1: length(df1$df$Colour)) i <- 1 count <- 0 for(i in seq){ if (df1$df$Colour[i] == 'BLUE' || df1$df$Colour[i] == 'RED' || df1$df$Colour[i] == 'YELLOW'){ count = count + 0 } else { count = count + df1$count[i]
}
}

Y3 <- c('not match to BLUE, YELLOW or RED', count)

# -------------------------------------------------------------
# count how many rows for Colour excluding 'YELLOW' and 'GREEN'
# -------------------------------------------------------------

seq <- c(1: length(df1$df$Colour))
i <- 1
count <- 0

for(i in seq){
if (df1$df$Colour[i] == 'YELLOW' || df1$df$Colour[i] == 'GREEN'){
count = count + 0
} else {
count = count + df1\$count[i]
}
}

Y4 <- c('not match to YELLOW or GREEN', count)

# -----------------------------
# print Results
# -----------------------------

Y0 <- c('Not match to T-Shirt Colour', 'Count')

Y <- rbind(Y0, Y1, Y2, Y3, Y4)
print(Y)