# -----------------------------------------------------------
# How to compute CLARA (Clustering Large Applications) in R
# -----------------------------------------------------------
# load library
library(cluster)
library(factoextra)
# generate Simulated Data
# Generate 500 objects, divided into 2 clusters.
df <- rbind(cbind(rnorm(200,0,8), rnorm(200,0,8)),
cbind(rnorm(300,50,8), rnorm(300,50,8)),
cbind(rnorm(400,100,8), rnorm(400,100,8)))
# Specify column and row names
colnames(df) <- c("x", "y")
rownames(df) <- paste0("S", 1:nrow(df))
# Previewing the data
head(df, nrow = 6)
# optimal number of clusters
fviz_nbclust(df, clara, method = "silhouette")+
theme_classic()
# Compute CLARA
clara.res <- clara(df, 3, samples = 50, pamLike = TRUE)
# Print components of clara.res
print(clara.res)
# Add clustering result to the Data
dd <- cbind(df, cluster = clara.res$cluster)
head(dd, n = 4)
# Visualise clusters
fviz_cluster(clara.res,
palette = c("#00AFBB", "#FC4E07", "#E7B800"), # color palette
ellipse.type = "t", # Concentration ellipse
geom = "point", pointsize = 1,
ggtheme = theme_classic()
)