import pandas as pd
#-----------------------------------------------
# load data
#-----------------------------------------------
df = pd.read_csv("example-010.csv")
#-----------------------------------------------
# print the dataset
#-----------------------------------------------
print(); print(df.columns)
print(); print(df.shape)
print(); print(df.head(5))
print(); print(df.tail(5))
print("\n\n\n Print the entire dataset")
print(); print(df)
#-----------------------------------------------
# count by Colour
#-----------------------------------------------
df1 = df.groupby([df['Colour']]).agg({'count'})
print(df1['Order'])
seriesObj = []
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE' and 'RED')
# --------------------------------------------------------------
Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'RED'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)
seriesObj.append([numOfRows1])
print(seriesObj)
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE' and 'GREEN')
# --------------------------------------------------------------
Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'GREEN'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)
seriesObj.append([numOfRows1])
print(seriesObj)
# --------------------------------------------------------------
# Count number of rows where exclude ('BLUE', 'RED' and 'GREEN')
# --------------------------------------------------------------
Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['BLUE', 'RED', 'GREEN'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)
seriesObj.append([numOfRows1])
print(seriesObj)
# --------------------------------------------------------------
# Count number of rows where exclude ('YELLOW' and 'GREEN')
# --------------------------------------------------------------
Obj1 = df.apply(lambda x: True if (x['Colour'] not in (['YELLOW', 'GREEN'])) else False,
axis=1)
# Count number of True in series
numOfRows1 = len(Obj1[Obj1 == True].index)
seriesObj.append([numOfRows1])
print(seriesObj)
df2 = pd.DataFrame(seriesObj,
columns = ['Item Count'],
index=['Excluding BLUE and RED',
'Excluding BLUE and GREEN',
'Excluding BLUE, RED and GREEN',
'Excluding GREEN and YELLOW'
])
print(df2)
# ---------------------------------------------------------
# Example - 010 in R
# ---------------------------------------------------------
# load library
library(dplyr)
# set working directory where CSV is located
getwd()
setwd("EE-010")
getwd()
# ------------------------------------------------
# Load data in a dataframe
# ------------------------------------------------
df <- read.csv("example-010.csv")
# print column names
colnames(df)
# Print top 10 rows in the dataSet
head(df, 5)
# Print last 10 rows in the dataSet
tail(df, 5)
# Dimention of Dataset
dim(df)
# Check Data types of each column
table(unlist(lapply(df, class)))
# -------------------------------------------
# count using group by
# -------------------------------------------
df1 <- df %>%
group_by(df$Colour) %>%
summarise(count=n())
print(df1)
# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE' and 'RED'
# -------------------------------------------------------------
seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0
for(i in seq){
if (df1$`df$Colour`[i] == 'BLUE' || df1$`df$Colour`[i] == 'RED'){
count = count + 0
} else {
count = count + df1$count[i]
}
}
Y1 <- c('not match to BLUE or RED', count)
# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE' and 'GREEN'
# -------------------------------------------------------------
seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0
for(i in seq){
if (df1$`df$Colour`[i] == 'BLUE' || df1$`df$Colour`[i] == 'GREEN'){
count = count + 0
} else {
count = count + df1$count[i]
}
}
Y2 <- c('not match to BLUE or GREEN', count)
# -------------------------------------------------------------
# count how many rows for Colour excluding 'BLUE', 'YELLOW' and 'RED'
# -------------------------------------------------------------
seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0
for(i in seq){
if (df1$`df$Colour`[i] == 'BLUE' || df1$`df$Colour`[i] == 'RED' ||
df1$`df$Colour`[i] == 'YELLOW'){
count = count + 0
} else {
count = count + df1$count[i]
}
}
Y3 <- c('not match to BLUE, YELLOW or RED', count)
# -------------------------------------------------------------
# count how many rows for Colour excluding 'YELLOW' and 'GREEN'
# -------------------------------------------------------------
seq <- c(1: length(df1$`df$Colour`))
i <- 1
count <- 0
for(i in seq){
if (df1$`df$Colour`[i] == 'YELLOW' || df1$`df$Colour`[i] == 'GREEN'){
count = count + 0
} else {
count = count + df1$count[i]
}
}
Y4 <- c('not match to YELLOW or GREEN', count)
# -----------------------------
# print Results
# -----------------------------
Y0 <- c('Not match to T-Shirt Colour', 'Count')
Y <- rbind(Y0, Y1, Y2, Y3, Y4)
print(Y)