SlideShare a Scribd company logo
1 of 6
Prepared by Volkan OBAN
CLUSTERGRAM
SOURCE:
 https://gist.github.com/hadley/439761 (hadley/clustergram-had.r)
 http://www.r-statistics.com/tag/large-data/
CODES:
> clustergram.kmeans <- function(Data, k, ...)
+ {
+ # this is the type of function that the clustergram
+ # function takes for the clustering.
+ # using similar structure will allow implementation of differe
nt clustering algorithms
+
+ # It returns a list with two elements:
+ # cluster = a vector of length of n (the number of subjects/items)
+ # indicating to which cluster each item belong
s.
+ # centers = a k dimensional vector. Each element is 1 number that re
present that cluster
+ # In our case, we are using the weighted mean
of the cluster dimensions by
+ # Using the first component (loading) of the P
CA of the Data.
+
+ cl <- kmeans(Data, k,...)
+
+ cluster <- cl$cluster
+ centers <- cl$centers %*% princomp(Data)$loadings[,1] # 1 number per
center
+ # here we are using the weighted mean for each
+
+ return(list(
+ cluster = cluster,
+ centers = centers
+ ))
+ }
>
> clustergram.plot.matlines <- function(X,Y, k.range,
+ x.range, y.range , COL,
+ add.center.points , centers.points)
+ {
+ plot(0,0, col = "white", xlim = x.range, ylim = y.range,
+ axes = F,
+ xlab = "Number of clusters (k)", ylab = "PCA weighted Mean of th
e clusters", main = c("Clustergram of the PCA-weighted Mean of" ,"the clust
ers k-mean clusters vs number of clusters (k)"))
+ axis(side =1, at = k.range)
+ axis(side =2)
+ abline(v = k.range, col = "grey")
+
+ matlines(t(X), t(Y), pch = 19, col = COL, lty = 1, lwd = 1.5)
+
+ if(add.center.points)
+ {
+ require(plyr)
+
+ xx <- ldply(centers.points, rbind)
+ points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3)
+
+ # add points
+ # temp <- l_ply(centers.points, function(xx) {
+ # with(xx,points(y~x, pch = 19, col = "red", cex = 1.3))
+ # points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3)
+ # return(1)
+ # })
+ # We assign the lapply to a variable (temp) only to suppress the
lapply "NULL" output
+ }
+ }
>
>
>
> clustergram <- function(Data, k.range = 2:10 ,
+ clustering.function = clustergram.kmeans,
+ clustergram.plot = clustergram.plot.matlines,
+ line.width = .004, add.center.points = T)
+ {
+ # Data - should be a scales matrix. Where each column belongs to a d
ifferent dimension of the observations
+ # k.range - is a vector with the number of clusters to plot the clust
ergram for
+ # clustering.function - this is not really used, but offers a bases t
o later extend the function to other algorithms
+ # Although that would more work on the code
+ # line.width - is the amount to lift each line in the plot so they wo
n't superimpose eachother
+ # add.center.points - just assures that we want to plot points of the
cluster means
+
+ n <- dim(Data)[1]
+
+ PCA.1 <- Data %*% princomp(Data)$loadings[,1] # first principal comp
onent of our data
+
+ if(require(colorspace)) {
+ COL <- heat_hcl(n)[order(PCA.1)] # line colors
+ } else {
+ COL <- rainbow(n)[order(PCA.1)] # line colors
+ warning('Please consider installing the package "colorspace" for
prittier colors')
+ }
+
+ line.width <- rep(line.width, n)
+
+ Y <- NULL # Y matrix
+ X <- NULL # X matrix
+
+ centers.points <- list()
+
+ for(k in k.range)
+ {
+ k.clusters <- clustering.function(Data, k)
+
+ clusters.vec <- k.clusters$cluster
+ # the.centers <- apply(cl$centers,1, mean)
+ the.centers <- k.clusters$centers
+
+ noise <- unlist(tapply(line.width, clusters.vec, cumsum))[order(s
eq_along(clusters.vec)[order(clusters.vec)])]
+ # noise <- noise - mean(range(noise))
+ y <- the.centers[clusters.vec] + noise
+ Y <- cbind(Y, y)
+ x <- rep(k, length(y))
+ X <- cbind(X, x)
+
+ centers.points[[k]] <- data.frame(y = the.centers , x = rep(k , k
))
+ # points(the.centers ~ rep(k , k), pch = 19, col = "red", cex
= 1.5)
+ }
+
+
+ x.range <- range(k.range)
+ y.range <- range(PCA.1)
+
+ clustergram.plot(X,Y, k.range,
+ x.range, y.range , COL,
+ add.center.points , centers.points)
+
+
+ }
>
>
>
>
> if(F) {
+
+ #Examples:
+
+ png("d:clustergram_plots_%03d.png",650,650, pointsize = 15)
+
+ data(iris)
+ set.seed(250)
+ par(cex.lab = 1.5, cex.main = 1.2)
+ Data <- scale(iris[,-5]) # notice I am scaling the vectors)
+ clustergram(Data, k.range = 2:8, line.width = 0.004) # notice how I a
m using line.width. Play with it on your problem, according to the scale o
f Y.
+
+ set.seed(500)
+ Data <- scale(iris[,-5]) # notice I am scaling the vectors)
+ par(cex.lab = 1.2, cex.main = .7)
+ par(mfrow = c(3,2))
+ for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, ad
d.center.points = T)
+ par(mfrow = c(1,1))
+
+ set.seed(250)
+ Data <- rbind(
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0,
sd = 0.3)),
+ cbind(rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1,
sd = 0.3)),
+ cbind(rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3),rnorm(100,2,
sd = 0.3))
+ )
+ clustergram(Data, k.range = 2:5 , line.width = .004, add.center.point
s = T)
+
+ set.seed(250)
+ Data <- rbind(
+ cbind(rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0,
sd = 0.3),rnorm(100,0, sd = 0.3)),
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0,
sd = 0.3),rnorm(100,0, sd = 0.3)),
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1,
sd = 0.3),rnorm(100,0, sd = 0.3)),
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0,
sd = 0.3),rnorm(100,1, sd = 0.3))
+ )
+ clustergram(Data, k.range = 2:8 , line.width = .004, add.center.point
s = T)
+
+ dev.off()
+ }
>source("http://www.r-statistics.com/wp-content/uploads/2012/01/source_http
s.r.txt") # Making sure we can source code from github
>source_https("https://raw.github.com/talgalili/R-code-snippets/master/clus
tergram.r")
> data(iris)
> set.seed(250)
> par(cex.lab = 1.5, cex.main = 1.2)
> Data <- scale(iris[,-5]) # notice I am scaling the vectors)
> clustergram(Data, k.range = 2:8, line.width = 0.004)
source("http://www.r-statistics.com/wp-
content/uploads/2012/01/source_https.r.txt") #
Making sure we can source code from github
source_https("https://raw.github.com/talgalili/
R-code-snippets/master/clustergram.r")
set.seed(500)
Data <- scale(iris[,-5]) # notice I am scaling
the vectors)
par(cex.lab = 1.2, cex.main = .7)
par(mfrow = c(3,2))
for(i in 1:6) clustergram(Data, k.range = 2:8 ,
line.width = .004, add.center.points = T)
CLUSTERGRAM

More Related Content

What's hot

R-ggplot2 package Examples
R-ggplot2 package ExamplesR-ggplot2 package Examples
R-ggplot2 package ExamplesDr. Volkan OBAN
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting SpatialFAO
 
Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Dr. Volkan OBAN
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.Dr. Volkan OBAN
 
Programs in array using SWIFT
Programs in array using SWIFTPrograms in array using SWIFT
Programs in array using SWIFTvikram mahendra
 
Pandas pythonfordatascience
Pandas pythonfordatasciencePandas pythonfordatascience
Pandas pythonfordatascienceNishant Upadhyay
 
Statistics - ArgMax Equation
Statistics - ArgMax EquationStatistics - ArgMax Equation
Statistics - ArgMax EquationAndrew Ferlitsch
 
K10692 control theory
K10692 control theoryK10692 control theory
K10692 control theorysaagar264
 
Pandas Cheat Sheet
Pandas Cheat SheetPandas Cheat Sheet
Pandas Cheat SheetACASH1011
 
Day 4b iteration and functions for-loops.pptx
Day 4b   iteration and functions  for-loops.pptxDay 4b   iteration and functions  for-loops.pptx
Day 4b iteration and functions for-loops.pptxAdrien Melquiond
 
Day 4a iteration and functions.pptx
Day 4a   iteration and functions.pptxDay 4a   iteration and functions.pptx
Day 4a iteration and functions.pptxAdrien Melquiond
 
The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212Mahmoud Samir Fayed
 
Data visualization using the grammar of graphics
Data visualization using the grammar of graphicsData visualization using the grammar of graphics
Data visualization using the grammar of graphicsRupak Roy
 
Python matplotlib cheat_sheet
Python matplotlib cheat_sheetPython matplotlib cheat_sheet
Python matplotlib cheat_sheetNishant Upadhyay
 

What's hot (20)

R-ggplot2 package Examples
R-ggplot2 package ExamplesR-ggplot2 package Examples
R-ggplot2 package Examples
 
NumPy Refresher
NumPy RefresherNumPy Refresher
NumPy Refresher
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting Spatial
 
Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.
 
Numpy python cheat_sheet
Numpy python cheat_sheetNumpy python cheat_sheet
Numpy python cheat_sheet
 
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
 
Programs in array using SWIFT
Programs in array using SWIFTPrograms in array using SWIFT
Programs in array using SWIFT
 
Pandas pythonfordatascience
Pandas pythonfordatasciencePandas pythonfordatascience
Pandas pythonfordatascience
 
Statistics - ArgMax Equation
Statistics - ArgMax EquationStatistics - ArgMax Equation
Statistics - ArgMax Equation
 
Scala.io
Scala.ioScala.io
Scala.io
 
K10692 control theory
K10692 control theoryK10692 control theory
K10692 control theory
 
Pandas Cheat Sheet
Pandas Cheat SheetPandas Cheat Sheet
Pandas Cheat Sheet
 
R
RR
R
 
Day 4b iteration and functions for-loops.pptx
Day 4b   iteration and functions  for-loops.pptxDay 4b   iteration and functions  for-loops.pptx
Day 4b iteration and functions for-loops.pptx
 
Day 4a iteration and functions.pptx
Day 4a   iteration and functions.pptxDay 4a   iteration and functions.pptx
Day 4a iteration and functions.pptx
 
The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212
 
Seminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mmeSeminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mme
 
Data visualization using the grammar of graphics
Data visualization using the grammar of graphicsData visualization using the grammar of graphics
Data visualization using the grammar of graphics
 
Python matplotlib cheat_sheet
Python matplotlib cheat_sheetPython matplotlib cheat_sheet
Python matplotlib cheat_sheet
 

Similar to CLUSTERGRAM

Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavVyacheslav Arbuzov
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisSpbDotNet Community
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Dr. Volkan OBAN
 
Basic R Data Manipulation
Basic R Data ManipulationBasic R Data Manipulation
Basic R Data ManipulationChu An
 
A quick introduction to R
A quick introduction to RA quick introduction to R
A quick introduction to RAngshuman Saha
 
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...Revolution Analytics
 
An overview of Python 2.7
An overview of Python 2.7An overview of Python 2.7
An overview of Python 2.7decoupled
 
BUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdfBUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdfkarthikaparthasarath
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryDatabricks
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryDatabricks
 
An example of R code for Data visualization
An example of R code for Data visualizationAn example of R code for Data visualization
An example of R code for Data visualizationLiang (Leon) Zhou
 

Similar to CLUSTERGRAM (20)

Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data Analysis
 
Joclad 2010 d
Joclad 2010 dJoclad 2010 d
Joclad 2010 d
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
 
Oh Composable World!
Oh Composable World!Oh Composable World!
Oh Composable World!
 
R programming
R programmingR programming
R programming
 
Scala by Luc Duponcheel
Scala by Luc DuponcheelScala by Luc Duponcheel
Scala by Luc Duponcheel
 
Basic R Data Manipulation
Basic R Data ManipulationBasic R Data Manipulation
Basic R Data Manipulation
 
R Language Introduction
R Language IntroductionR Language Introduction
R Language Introduction
 
A quick introduction to R
A quick introduction to RA quick introduction to R
A quick introduction to R
 
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
 
An overview of Python 2.7
An overview of Python 2.7An overview of Python 2.7
An overview of Python 2.7
 
A tour of Python
A tour of PythonA tour of Python
A tour of Python
 
BUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdfBUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdf
 
RHadoop の紹介
RHadoop の紹介RHadoop の紹介
RHadoop の紹介
 
R programming language
R programming languageR programming language
R programming language
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
An example of R code for Data visualization
An example of R code for Data visualizationAn example of R code for Data visualization
An example of R code for Data visualization
 

More from Dr. Volkan OBAN

Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...Dr. Volkan OBAN
 
Covid19py Python Package - Example
Covid19py  Python Package - ExampleCovid19py  Python Package - Example
Covid19py Python Package - ExampleDr. Volkan OBAN
 
Object detection with Python
Object detection with Python Object detection with Python
Object detection with Python Dr. Volkan OBAN
 
Python - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) ParametreleriPython - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) ParametreleriDr. Volkan OBAN
 
Linear Programming wi̇th R - Examples
Linear Programming wi̇th R - ExamplesLinear Programming wi̇th R - Examples
Linear Programming wi̇th R - ExamplesDr. Volkan OBAN
 
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ..."optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...Dr. Volkan OBAN
 
k-means Clustering in Python
k-means Clustering in Pythonk-means Clustering in Python
k-means Clustering in PythonDr. Volkan OBAN
 
Naive Bayes Example using R
Naive Bayes Example using  R Naive Bayes Example using  R
Naive Bayes Example using R Dr. Volkan OBAN
 
Data Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision MakingData Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision MakingDr. Volkan OBAN
 
Scikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-PythonScikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-PythonDr. Volkan OBAN
 
Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Dr. Volkan OBAN
 
Pandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetPandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetDr. Volkan OBAN
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleDr. Volkan OBAN
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleDr. Volkan OBAN
 
R Machine Learning packages( generally used)
R Machine Learning packages( generally used)R Machine Learning packages( generally used)
R Machine Learning packages( generally used)Dr. Volkan OBAN
 
treemap package in R and examples.
treemap package in R and examples.treemap package in R and examples.
treemap package in R and examples.Dr. Volkan OBAN
 
R-Data table Cheat Sheet
R-Data table Cheat SheetR-Data table Cheat Sheet
R-Data table Cheat SheetDr. Volkan OBAN
 
Rcommands-for those who interested in R.
Rcommands-for those who interested in R.Rcommands-for those who interested in R.
Rcommands-for those who interested in R.Dr. Volkan OBAN
 

More from Dr. Volkan OBAN (20)

Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
 
Covid19py Python Package - Example
Covid19py  Python Package - ExampleCovid19py  Python Package - Example
Covid19py Python Package - Example
 
Object detection with Python
Object detection with Python Object detection with Python
Object detection with Python
 
Python - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) ParametreleriPython - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) Parametreleri
 
Linear Programming wi̇th R - Examples
Linear Programming wi̇th R - ExamplesLinear Programming wi̇th R - Examples
Linear Programming wi̇th R - Examples
 
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ..."optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
 
k-means Clustering in Python
k-means Clustering in Pythonk-means Clustering in Python
k-means Clustering in Python
 
Naive Bayes Example using R
Naive Bayes Example using  R Naive Bayes Example using  R
Naive Bayes Example using R
 
R forecasting Example
R forecasting ExampleR forecasting Example
R forecasting Example
 
Data Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision MakingData Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision Making
 
Scikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-PythonScikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-Python
 
Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet
 
Pandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetPandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheet
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an example
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an example
 
R Machine Learning packages( generally used)
R Machine Learning packages( generally used)R Machine Learning packages( generally used)
R Machine Learning packages( generally used)
 
treemap package in R and examples.
treemap package in R and examples.treemap package in R and examples.
treemap package in R and examples.
 
Mosaic plot in R.
Mosaic plot in R.Mosaic plot in R.
Mosaic plot in R.
 
R-Data table Cheat Sheet
R-Data table Cheat SheetR-Data table Cheat Sheet
R-Data table Cheat Sheet
 
Rcommands-for those who interested in R.
Rcommands-for those who interested in R.Rcommands-for those who interested in R.
Rcommands-for those who interested in R.
 

Recently uploaded

English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdfEnglish-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdfblazblazml
 
Cyber awareness ppt on the recorded data
Cyber awareness ppt on the recorded dataCyber awareness ppt on the recorded data
Cyber awareness ppt on the recorded dataTecnoIncentive
 
SMOTE and K-Fold Cross Validation-Presentation.pptx
SMOTE and K-Fold Cross Validation-Presentation.pptxSMOTE and K-Fold Cross Validation-Presentation.pptx
SMOTE and K-Fold Cross Validation-Presentation.pptxHaritikaChhatwal1
 
The Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptx
The Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptxThe Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptx
The Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptxTasha Penwell
 
Data Factory in Microsoft Fabric (MsBIP #82)
Data Factory in Microsoft Fabric (MsBIP #82)Data Factory in Microsoft Fabric (MsBIP #82)
Data Factory in Microsoft Fabric (MsBIP #82)Cathrine Wilhelmsen
 
Bank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis ProjectBank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis ProjectBoston Institute of Analytics
 
Real-Time AI Streaming - AI Max Princeton
Real-Time AI  Streaming - AI Max PrincetonReal-Time AI  Streaming - AI Max Princeton
Real-Time AI Streaming - AI Max PrincetonTimothy Spann
 
Decoding Patterns: Customer Churn Prediction Data Analysis Project
Decoding Patterns: Customer Churn Prediction Data Analysis ProjectDecoding Patterns: Customer Churn Prediction Data Analysis Project
Decoding Patterns: Customer Churn Prediction Data Analysis ProjectBoston Institute of Analytics
 
Networking Case Study prepared by teacher.pptx
Networking Case Study prepared by teacher.pptxNetworking Case Study prepared by teacher.pptx
Networking Case Study prepared by teacher.pptxHimangsuNath
 
Semantic Shed - Squashing and Squeezing.pptx
Semantic Shed - Squashing and Squeezing.pptxSemantic Shed - Squashing and Squeezing.pptx
Semantic Shed - Squashing and Squeezing.pptxMike Bennett
 
Digital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing worksDigital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing worksdeepakthakur548787
 
Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...
Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...
Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...Thomas Poetter
 
IBEF report on the Insurance market in India
IBEF report on the Insurance market in IndiaIBEF report on the Insurance market in India
IBEF report on the Insurance market in IndiaManalVerma4
 
Principles and Practices of Data Visualization
Principles and Practices of Data VisualizationPrinciples and Practices of Data Visualization
Principles and Practices of Data VisualizationKianJazayeri1
 
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...Dr Arash Najmaei ( Phd., MBA, BSc)
 
Student Profile Sample report on improving academic performance by uniting gr...
Student Profile Sample report on improving academic performance by uniting gr...Student Profile Sample report on improving academic performance by uniting gr...
Student Profile Sample report on improving academic performance by uniting gr...Seán Kennedy
 
FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024
FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024
FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024Susanna-Assunta Sansone
 
Student profile product demonstration on grades, ability, well-being and mind...
Student profile product demonstration on grades, ability, well-being and mind...Student profile product demonstration on grades, ability, well-being and mind...
Student profile product demonstration on grades, ability, well-being and mind...Seán Kennedy
 

Recently uploaded (20)

Data Analysis Project: Stroke Prediction
Data Analysis Project: Stroke PredictionData Analysis Project: Stroke Prediction
Data Analysis Project: Stroke Prediction
 
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdfEnglish-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
 
Cyber awareness ppt on the recorded data
Cyber awareness ppt on the recorded dataCyber awareness ppt on the recorded data
Cyber awareness ppt on the recorded data
 
SMOTE and K-Fold Cross Validation-Presentation.pptx
SMOTE and K-Fold Cross Validation-Presentation.pptxSMOTE and K-Fold Cross Validation-Presentation.pptx
SMOTE and K-Fold Cross Validation-Presentation.pptx
 
The Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptx
The Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptxThe Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptx
The Power of Data-Driven Storytelling_ Unveiling the Layers of Insight.pptx
 
Data Factory in Microsoft Fabric (MsBIP #82)
Data Factory in Microsoft Fabric (MsBIP #82)Data Factory in Microsoft Fabric (MsBIP #82)
Data Factory in Microsoft Fabric (MsBIP #82)
 
Bank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis ProjectBank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis Project
 
Real-Time AI Streaming - AI Max Princeton
Real-Time AI  Streaming - AI Max PrincetonReal-Time AI  Streaming - AI Max Princeton
Real-Time AI Streaming - AI Max Princeton
 
Decoding Patterns: Customer Churn Prediction Data Analysis Project
Decoding Patterns: Customer Churn Prediction Data Analysis ProjectDecoding Patterns: Customer Churn Prediction Data Analysis Project
Decoding Patterns: Customer Churn Prediction Data Analysis Project
 
Networking Case Study prepared by teacher.pptx
Networking Case Study prepared by teacher.pptxNetworking Case Study prepared by teacher.pptx
Networking Case Study prepared by teacher.pptx
 
Semantic Shed - Squashing and Squeezing.pptx
Semantic Shed - Squashing and Squeezing.pptxSemantic Shed - Squashing and Squeezing.pptx
Semantic Shed - Squashing and Squeezing.pptx
 
Digital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing worksDigital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing works
 
Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...
Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...
Minimizing AI Hallucinations/Confabulations and the Path towards AGI with Exa...
 
Insurance Churn Prediction Data Analysis Project
Insurance Churn Prediction Data Analysis ProjectInsurance Churn Prediction Data Analysis Project
Insurance Churn Prediction Data Analysis Project
 
IBEF report on the Insurance market in India
IBEF report on the Insurance market in IndiaIBEF report on the Insurance market in India
IBEF report on the Insurance market in India
 
Principles and Practices of Data Visualization
Principles and Practices of Data VisualizationPrinciples and Practices of Data Visualization
Principles and Practices of Data Visualization
 
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
 
Student Profile Sample report on improving academic performance by uniting gr...
Student Profile Sample report on improving academic performance by uniting gr...Student Profile Sample report on improving academic performance by uniting gr...
Student Profile Sample report on improving academic performance by uniting gr...
 
FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024
FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024
FAIR, FAIRsharing, FAIR Cookbook and ELIXIR - Sansone SA - Boston 2024
 
Student profile product demonstration on grades, ability, well-being and mind...
Student profile product demonstration on grades, ability, well-being and mind...Student profile product demonstration on grades, ability, well-being and mind...
Student profile product demonstration on grades, ability, well-being and mind...
 

CLUSTERGRAM

  • 1. Prepared by Volkan OBAN CLUSTERGRAM SOURCE:  https://gist.github.com/hadley/439761 (hadley/clustergram-had.r)  http://www.r-statistics.com/tag/large-data/ CODES: > clustergram.kmeans <- function(Data, k, ...) + { + # this is the type of function that the clustergram + # function takes for the clustering. + # using similar structure will allow implementation of differe nt clustering algorithms + + # It returns a list with two elements: + # cluster = a vector of length of n (the number of subjects/items) + # indicating to which cluster each item belong s. + # centers = a k dimensional vector. Each element is 1 number that re present that cluster + # In our case, we are using the weighted mean of the cluster dimensions by + # Using the first component (loading) of the P CA of the Data. + + cl <- kmeans(Data, k,...) + + cluster <- cl$cluster + centers <- cl$centers %*% princomp(Data)$loadings[,1] # 1 number per center + # here we are using the weighted mean for each + + return(list( + cluster = cluster, + centers = centers + )) + } > > clustergram.plot.matlines <- function(X,Y, k.range, + x.range, y.range , COL, + add.center.points , centers.points) + { + plot(0,0, col = "white", xlim = x.range, ylim = y.range, + axes = F, + xlab = "Number of clusters (k)", ylab = "PCA weighted Mean of th e clusters", main = c("Clustergram of the PCA-weighted Mean of" ,"the clust ers k-mean clusters vs number of clusters (k)")) + axis(side =1, at = k.range) + axis(side =2)
  • 2. + abline(v = k.range, col = "grey") + + matlines(t(X), t(Y), pch = 19, col = COL, lty = 1, lwd = 1.5) + + if(add.center.points) + { + require(plyr) + + xx <- ldply(centers.points, rbind) + points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3) + + # add points + # temp <- l_ply(centers.points, function(xx) { + # with(xx,points(y~x, pch = 19, col = "red", cex = 1.3)) + # points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3) + # return(1) + # }) + # We assign the lapply to a variable (temp) only to suppress the lapply "NULL" output + } + } > > > > clustergram <- function(Data, k.range = 2:10 , + clustering.function = clustergram.kmeans, + clustergram.plot = clustergram.plot.matlines, + line.width = .004, add.center.points = T) + { + # Data - should be a scales matrix. Where each column belongs to a d ifferent dimension of the observations + # k.range - is a vector with the number of clusters to plot the clust ergram for + # clustering.function - this is not really used, but offers a bases t o later extend the function to other algorithms + # Although that would more work on the code + # line.width - is the amount to lift each line in the plot so they wo n't superimpose eachother + # add.center.points - just assures that we want to plot points of the cluster means + + n <- dim(Data)[1] + + PCA.1 <- Data %*% princomp(Data)$loadings[,1] # first principal comp onent of our data + + if(require(colorspace)) { + COL <- heat_hcl(n)[order(PCA.1)] # line colors + } else { + COL <- rainbow(n)[order(PCA.1)] # line colors + warning('Please consider installing the package "colorspace" for prittier colors') + } + + line.width <- rep(line.width, n) + + Y <- NULL # Y matrix + X <- NULL # X matrix + + centers.points <- list() + + for(k in k.range)
  • 3. + { + k.clusters <- clustering.function(Data, k) + + clusters.vec <- k.clusters$cluster + # the.centers <- apply(cl$centers,1, mean) + the.centers <- k.clusters$centers + + noise <- unlist(tapply(line.width, clusters.vec, cumsum))[order(s eq_along(clusters.vec)[order(clusters.vec)])] + # noise <- noise - mean(range(noise)) + y <- the.centers[clusters.vec] + noise + Y <- cbind(Y, y) + x <- rep(k, length(y)) + X <- cbind(X, x) + + centers.points[[k]] <- data.frame(y = the.centers , x = rep(k , k )) + # points(the.centers ~ rep(k , k), pch = 19, col = "red", cex = 1.5) + } + + + x.range <- range(k.range) + y.range <- range(PCA.1) + + clustergram.plot(X,Y, k.range, + x.range, y.range , COL, + add.center.points , centers.points) + + + } > > > > > if(F) { + + #Examples: + + png("d:clustergram_plots_%03d.png",650,650, pointsize = 15) + + data(iris) + set.seed(250) + par(cex.lab = 1.5, cex.main = 1.2) + Data <- scale(iris[,-5]) # notice I am scaling the vectors) + clustergram(Data, k.range = 2:8, line.width = 0.004) # notice how I a m using line.width. Play with it on your problem, according to the scale o f Y. + + set.seed(500) + Data <- scale(iris[,-5]) # notice I am scaling the vectors) + par(cex.lab = 1.2, cex.main = .7) + par(mfrow = c(3,2)) + for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, ad d.center.points = T) + par(mfrow = c(1,1)) + + set.seed(250) + Data <- rbind( + cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)),
  • 4. + cbind(rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3)), + cbind(rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3)) + ) + clustergram(Data, k.range = 2:5 , line.width = .004, add.center.point s = T) + + set.seed(250) + Data <- rbind( + cbind(rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)), + cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)), + cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3)), + cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3)) + ) + clustergram(Data, k.range = 2:8 , line.width = .004, add.center.point s = T) + + dev.off() + } >source("http://www.r-statistics.com/wp-content/uploads/2012/01/source_http s.r.txt") # Making sure we can source code from github >source_https("https://raw.github.com/talgalili/R-code-snippets/master/clus tergram.r") > data(iris) > set.seed(250) > par(cex.lab = 1.5, cex.main = 1.2) > Data <- scale(iris[,-5]) # notice I am scaling the vectors) > clustergram(Data, k.range = 2:8, line.width = 0.004)
  • 5. source("http://www.r-statistics.com/wp- content/uploads/2012/01/source_https.r.txt") # Making sure we can source code from github source_https("https://raw.github.com/talgalili/ R-code-snippets/master/clustergram.r") set.seed(500) Data <- scale(iris[,-5]) # notice I am scaling the vectors) par(cex.lab = 1.2, cex.main = .7) par(mfrow = c(3,2)) for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, add.center.points = T)