SlideShare uma empresa Scribd logo
1 de 9
Baixar para ler offline
plyer	
  for	
  Split-­‐Apply-­‐Combine	
  	
  
Automa4ng	
  one	
  pa6ern	
  of	
  data	
  munging	
  and	
  analysis	
  
	
  
Eric	
  Klusman	
  
2013-­‐11-­‐14	
  
What	
  is	
  plyr?	
  
•  A	
  library	
  of	
  func4ons	
  for	
  R	
  for	
  doing	
  analysis	
  in	
  
a	
  split-­‐apply-­‐combine	
  pa6ern	
  
–  Split	
  the	
  data	
  into	
  subgroups	
  
–  Apply	
  some	
  func4on	
  to	
  summarize,	
  model,	
  or	
  plot	
  each	
  
subgroup	
  
–  Combine	
  the	
  results	
  of	
  the	
  subgroups	
  back	
  together	
  

•  Automate	
  the	
  loops	
  and	
  avoid	
  the	
  
bookkeeping	
  code	
  
•  Assump4on:	
  data	
  can	
  be	
  processed	
  piecewise	
  	
  
2	
  
Example:	
  	
  Baby	
  Names	
  
• 
• 
• 
• 

From	
  Hadley	
  Wickham,	
  h6p://plyr.had.co.nz/09-­‐user/	
  	
  
Top	
  1000	
  U.S.	
  boy	
  and	
  girl	
  baby	
  names	
  from	
  1880	
  to	
  2008	
  
Derived	
  from	
  Social	
  Security	
  Administra4on	
  dataset	
  
1000	
  *	
  2	
  *	
  129	
  =	
  258000	
  obs	
  on	
  4	
  vars	
  

>	
  head(bnames)	
  

>	
  tail(bnames)	
  

	
  year	
  	
  	
  	
  name	
  	
  percent	
  sex	
  
1	
  1880	
  	
  	
  	
  John	
  0.081541	
  boy	
  
2	
  1880	
  William	
  0.080511	
  boy	
  
3	
  1880	
  	
  	
  James	
  0.050057	
  boy	
  
4	
  1880	
  Charles	
  0.045167	
  boy	
  
5	
  1880	
  	
  George	
  0.043292	
  boy	
  
6	
  1880	
  	
  	
  Frank	
  0.027380	
  boy	
  
	
  

	
  year	
  	
  	
  	
  	
  name	
  	
  percent	
  	
  sex	
  
257995	
  2008	
  	
  	
  	
  	
  Diya	
  0.000128	
  girl	
  
257996	
  2008	
  Carleigh	
  0.000128	
  girl	
  
257997	
  2008	
  	
  	
  	
  Iyana	
  0.000128	
  girl	
  
257998	
  2008	
  	
  	
  Kenley	
  0.000127	
  girl	
  
257999	
  2008	
  	
  	
  Sloane	
  0.000127	
  girl	
  
258000	
  2008	
  	
  Elianna	
  0.000127	
  girl	
  

3	
  
Groupwise	
  summaries	
  
•  What	
  if	
  we	
  want	
  to	
  compute	
  the	
  rank	
  of	
  a	
  
name	
  within	
  a	
  sex	
  and	
  year?	
  
•  Easy	
  for	
  a	
  single	
  year	
  and	
  sex;	
  hard	
  in	
  general.	
  
#	
  Split	
  
pieces	
  <-­‐	
  split(bnames,	
  list(bnames$sex,	
  bnames$year))	
  
	
  
#	
  Apply	
  
results	
  <=	
  vector(“list”,	
  length(pieces))	
  
for(i	
  in	
  seq_along(pieces))	
  {	
  
	
  	
  	
  	
  piece	
  <-­‐	
  pieces[[i]]	
  
	
  	
  	
  	
  piece	
  <-­‐	
  transform(piece,	
  rank	
  =	
  rank(-­‐percent,	
  ties.method=“first”))	
  
	
  	
  	
  	
  results[[i]]	
  <-­‐	
  piece	
  
}	
  
	
  
#	
  Combine	
  
result	
  <-­‐	
  do.call(“rbind”,	
  results)	
  
4	
  
Using	
  plyr	
  

bnames	
  <-­‐	
  ddply(bnames,	
  c(“sex”,	
  “year”),	
  transform,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  rank	
  =	
  rank(-­‐percent,	
  ties.method=“first”))	
  

5	
  
One-­‐line	
  summaries	
  	
  
ddply(bnames,	
  c(“name”),	
  summarize,	
  tot	
  =	
  sum(percent))	
  
	
  
ddply(bnames,	
  c(“length”),	
  summarize,	
  tot	
  =	
  sum(percent))	
  
	
  
ddply(bnames,	
  c(“year”,	
  “sex”),	
  summarize,	
  tot	
  =	
  sum(percent))	
  
	
  
fl	
  <-­‐	
  ddply(bnames,	
  c(“year”,	
  “sex”,	
  “first”),	
  summarize,	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  tot	
  =	
  sum(percent))	
  
	
  
library(ggplot2)	
  
	
  
qplot(year,	
  tot,	
  data	
  =	
  fl,	
  geom	
  =	
  “line”,	
  color	
  =	
  “sex”,	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  facets	
  =	
  ~	
  first)	
  

6	
  
7	
  
plyr	
  func4ons	
  are	
  named	
  by	
  their	
  
input	
  and	
  output	
  types	
  
ioply	
  where	
  i	
  is	
  the	
  input	
  type	
  and	
  o	
  is	
  the	
  output	
  type	
  
Func%on	
  

Input	
  data	
  type	
  

Output	
  data	
  type	
  

ddply	
  

Data	
  frame	
  

Data	
  frame	
  

aaply	
  

Array	
  

Array	
  

daply	
  

Dataframe	
  

Array	
  

d_ply	
  

Dataframe	
  

None;	
  used	
  for	
  plo`ng	
  or	
  
prin4ng	
  

ldply	
  

List	
  

Dataframe	
  

alply	
  

Array	
  

List	
  

8	
  
Base	
  R	
  vs.	
  plyr	
  
Base	
  
func-on	
  
aggregate	
  

d	
  

d	
  

ddply	
  +	
  colwise	
  

apply	
  

a	
  

a/l	
  

aaply	
  /	
  alply	
  

by	
  

l	
  

l	
  

dlply	
  

lapply	
  

l	
  

l	
  

llply	
  

mapply	
  

a	
  

a/l	
  

maply	
  /	
  mlply	
  

replicate	
  

r	
  

a/l	
  

raply	
  /	
  rlply	
  

sapply	
  

l	
  

a	
  

laply	
  

sweep	
  

a	
  

a	
  

-­‐	
  

tapply	
  
• 

	
  
	
  
	
  
Input	
   Output	
   plyr	
  func-on	
  

a	
  

a	
  

-­‐	
  

Input	
  and	
  output	
  types	
  are	
  indicated	
  by	
  first	
  le6er:	
  	
  array,	
  data	
  frame,	
  list,	
  replica4on	
  
9	
  

Mais conteúdo relacionado

Mais procurados

Data wrangling with dplyr
Data wrangling with dplyrData wrangling with dplyr
Data wrangling with dplyrC. Tobin Magle
 
Grouping & Summarizing Data in R
Grouping & Summarizing Data in RGrouping & Summarizing Data in R
Grouping & Summarizing Data in RJeffrey Breen
 
Introduction to pandas
Introduction to pandasIntroduction to pandas
Introduction to pandasPiyush rai
 
Tutorial for Circular and Rectangular Manhattan plots
Tutorial for Circular and Rectangular Manhattan plotsTutorial for Circular and Rectangular Manhattan plots
Tutorial for Circular and Rectangular Manhattan plotsAvjinder (Avi) Kaler
 
Tutorial for Estimating Broad and Narrow Sense Heritability using R
Tutorial for Estimating Broad and Narrow Sense Heritability using RTutorial for Estimating Broad and Narrow Sense Heritability using R
Tutorial for Estimating Broad and Narrow Sense Heritability using RAvjinder (Avi) Kaler
 
Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Ram Narasimhan
 
RDataMining slides-r-programming
RDataMining slides-r-programmingRDataMining slides-r-programming
RDataMining slides-r-programmingYanchang Zhao
 
Frequent itemset mining methods
Frequent itemset mining methodsFrequent itemset mining methods
Frequent itemset mining methodsProf.Nilesh Magar
 
heap Sort Algorithm
heap  Sort Algorithmheap  Sort Algorithm
heap Sort AlgorithmLemia Algmri
 
R programming groundup-basic-section-i
R programming groundup-basic-section-iR programming groundup-basic-section-i
R programming groundup-basic-section-iDr. Awase Khirni Syed
 
D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...
D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...
D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...Binh Vu
 
R-programming-training-in-mumbai
R-programming-training-in-mumbaiR-programming-training-in-mumbai
R-programming-training-in-mumbaiUnmesh Baile
 

Mais procurados (17)

Data wrangling with dplyr
Data wrangling with dplyrData wrangling with dplyr
Data wrangling with dplyr
 
Heap sort
Heap sortHeap sort
Heap sort
 
Grouping & Summarizing Data in R
Grouping & Summarizing Data in RGrouping & Summarizing Data in R
Grouping & Summarizing Data in R
 
Introduction to pandas
Introduction to pandasIntroduction to pandas
Introduction to pandas
 
Pandas
PandasPandas
Pandas
 
Tutorial for Circular and Rectangular Manhattan plots
Tutorial for Circular and Rectangular Manhattan plotsTutorial for Circular and Rectangular Manhattan plots
Tutorial for Circular and Rectangular Manhattan plots
 
Tutorial for Estimating Broad and Narrow Sense Heritability using R
Tutorial for Estimating Broad and Narrow Sense Heritability using RTutorial for Estimating Broad and Narrow Sense Heritability using R
Tutorial for Estimating Broad and Narrow Sense Heritability using R
 
Datastructureitstypes
DatastructureitstypesDatastructureitstypes
Datastructureitstypes
 
Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)
 
RDataMining slides-r-programming
RDataMining slides-r-programmingRDataMining slides-r-programming
RDataMining slides-r-programming
 
Frequent itemset mining methods
Frequent itemset mining methodsFrequent itemset mining methods
Frequent itemset mining methods
 
heap Sort Algorithm
heap  Sort Algorithmheap  Sort Algorithm
heap Sort Algorithm
 
Fp growth
Fp growthFp growth
Fp growth
 
R programming groundup-basic-section-i
R programming groundup-basic-section-iR programming groundup-basic-section-i
R programming groundup-basic-section-i
 
D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...
D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...
D-REPR: A Language For Describing And Mapping Diversely-Structured Data Sourc...
 
R language
R languageR language
R language
 
R-programming-training-in-mumbai
R-programming-training-in-mumbaiR-programming-training-in-mumbai
R-programming-training-in-mumbai
 

Destaque

useR2011 - Whitcher
useR2011 - WhitcheruseR2011 - Whitcher
useR2011 - Whitcherrusersla
 
Los Angeles R users group - Dec 14 2010 - Part 3
Los Angeles R users group - Dec 14 2010 - Part 3Los Angeles R users group - Dec 14 2010 - Part 3
Los Angeles R users group - Dec 14 2010 - Part 3rusersla
 
useR2011 - Edlefsen
useR2011 - EdlefsenuseR2011 - Edlefsen
useR2011 - Edlefsenrusersla
 
Los Angeles R users group - July 12 2011 - Part 2
Los Angeles R users group - July 12 2011 - Part 2Los Angeles R users group - July 12 2011 - Part 2
Los Angeles R users group - July 12 2011 - Part 2rusersla
 
useR2011 - Rougier
useR2011 - RougieruseR2011 - Rougier
useR2011 - Rougierrusersla
 
useR2011 - Gromping
useR2011 - Gromping useR2011 - Gromping
useR2011 - Gromping rusersla
 
useR2011 - Huber
useR2011 - HuberuseR2011 - Huber
useR2011 - Huberrusersla
 
Los Angeles R users group - July 12 2011 - Part 1
Los Angeles R users group - July 12 2011 - Part 1Los Angeles R users group - July 12 2011 - Part 1
Los Angeles R users group - July 12 2011 - Part 1rusersla
 
Statistical Modeling: The Two Cultures
Statistical Modeling: The Two CulturesStatistical Modeling: The Two Cultures
Statistical Modeling: The Two CulturesChristoph Molnar
 

Destaque (9)

useR2011 - Whitcher
useR2011 - WhitcheruseR2011 - Whitcher
useR2011 - Whitcher
 
Los Angeles R users group - Dec 14 2010 - Part 3
Los Angeles R users group - Dec 14 2010 - Part 3Los Angeles R users group - Dec 14 2010 - Part 3
Los Angeles R users group - Dec 14 2010 - Part 3
 
useR2011 - Edlefsen
useR2011 - EdlefsenuseR2011 - Edlefsen
useR2011 - Edlefsen
 
Los Angeles R users group - July 12 2011 - Part 2
Los Angeles R users group - July 12 2011 - Part 2Los Angeles R users group - July 12 2011 - Part 2
Los Angeles R users group - July 12 2011 - Part 2
 
useR2011 - Rougier
useR2011 - RougieruseR2011 - Rougier
useR2011 - Rougier
 
useR2011 - Gromping
useR2011 - Gromping useR2011 - Gromping
useR2011 - Gromping
 
useR2011 - Huber
useR2011 - HuberuseR2011 - Huber
useR2011 - Huber
 
Los Angeles R users group - July 12 2011 - Part 1
Los Angeles R users group - July 12 2011 - Part 1Los Angeles R users group - July 12 2011 - Part 1
Los Angeles R users group - July 12 2011 - Part 1
 
Statistical Modeling: The Two Cultures
Statistical Modeling: The Two CulturesStatistical Modeling: The Two Cultures
Statistical Modeling: The Two Cultures
 

Semelhante a LA R meetup - Nov 2013 - Eric Klusman

Classification decision tree
Classification  decision treeClassification  decision tree
Classification decision treeyazad dumasia
 
decison tree and rules in data mining techniques
decison tree and rules in data mining techniquesdecison tree and rules in data mining techniques
decison tree and rules in data mining techniquesALIZAIB KHAN
 
Intro to plyr for Davis R Users' Group, by Steve Culman
Intro to plyr for Davis R Users' Group, by Steve CulmanIntro to plyr for Davis R Users' Group, by Steve Culman
Intro to plyr for Davis R Users' Group, by Steve CulmanNoam Ross
 
AiCore Brochure 27-Mar-2023-205529.pdf
AiCore Brochure 27-Mar-2023-205529.pdfAiCore Brochure 27-Mar-2023-205529.pdf
AiCore Brochure 27-Mar-2023-205529.pdfAjayRawat829497
 
More SQL in MySQL 8.0
More SQL in MySQL 8.0More SQL in MySQL 8.0
More SQL in MySQL 8.0Norvald Ryeng
 
An algorithm for building
An algorithm for buildingAn algorithm for building
An algorithm for buildingajmal_fuuast
 
Is your excel production code?
Is your excel production code?Is your excel production code?
Is your excel production code?ProCogia
 
Scylla Summit 2017: Planning Your Queries for Maximum Performance
Scylla Summit 2017: Planning Your Queries for Maximum PerformanceScylla Summit 2017: Planning Your Queries for Maximum Performance
Scylla Summit 2017: Planning Your Queries for Maximum PerformanceScyllaDB
 
Introduction to Machine Learning
Introduction to Machine LearningIntroduction to Machine Learning
Introduction to Machine LearningAndrew Ferlitsch
 
Data Manipulation with Numpy and Pandas in PythonStarting with N
Data Manipulation with Numpy and Pandas in PythonStarting with NData Manipulation with Numpy and Pandas in PythonStarting with N
Data Manipulation with Numpy and Pandas in PythonStarting with NOllieShoresna
 
India software developers conference 2013 Bangalore
India software developers conference 2013 BangaloreIndia software developers conference 2013 Bangalore
India software developers conference 2013 BangaloreSatnam Singh
 
Musings of kaggler
Musings of kagglerMusings of kaggler
Musings of kagglerKai Xin Thia
 
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptxfINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptxdataKarthik
 
Slide3.ppt
Slide3.pptSlide3.ppt
Slide3.pptbutest
 
K-Means Algorithm Implementation In python
K-Means Algorithm Implementation In pythonK-Means Algorithm Implementation In python
K-Means Algorithm Implementation In pythonAfzal Ahmad
 

Semelhante a LA R meetup - Nov 2013 - Eric Klusman (20)

Classification decision tree
Classification  decision treeClassification  decision tree
Classification decision tree
 
decison tree and rules in data mining techniques
decison tree and rules in data mining techniquesdecison tree and rules in data mining techniques
decison tree and rules in data mining techniques
 
Intro to plyr for Davis R Users' Group, by Steve Culman
Intro to plyr for Davis R Users' Group, by Steve CulmanIntro to plyr for Davis R Users' Group, by Steve Culman
Intro to plyr for Davis R Users' Group, by Steve Culman
 
AiCore Brochure 27-Mar-2023-205529.pdf
AiCore Brochure 27-Mar-2023-205529.pdfAiCore Brochure 27-Mar-2023-205529.pdf
AiCore Brochure 27-Mar-2023-205529.pdf
 
Decision Tree.pptx
Decision Tree.pptxDecision Tree.pptx
Decision Tree.pptx
 
More SQL in MySQL 8.0
More SQL in MySQL 8.0More SQL in MySQL 8.0
More SQL in MySQL 8.0
 
An algorithm for building
An algorithm for buildingAn algorithm for building
An algorithm for building
 
Is your excel production code?
Is your excel production code?Is your excel production code?
Is your excel production code?
 
Python revision tour II
Python revision tour IIPython revision tour II
Python revision tour II
 
Scylla Summit 2017: Planning Your Queries for Maximum Performance
Scylla Summit 2017: Planning Your Queries for Maximum PerformanceScylla Summit 2017: Planning Your Queries for Maximum Performance
Scylla Summit 2017: Planning Your Queries for Maximum Performance
 
Data structure
Data structureData structure
Data structure
 
Introduction to Machine Learning
Introduction to Machine LearningIntroduction to Machine Learning
Introduction to Machine Learning
 
Data Manipulation with Numpy and Pandas in PythonStarting with N
Data Manipulation with Numpy and Pandas in PythonStarting with NData Manipulation with Numpy and Pandas in PythonStarting with N
Data Manipulation with Numpy and Pandas in PythonStarting with N
 
India software developers conference 2013 Bangalore
India software developers conference 2013 BangaloreIndia software developers conference 2013 Bangalore
India software developers conference 2013 Bangalore
 
Musings of kaggler
Musings of kagglerMusings of kaggler
Musings of kaggler
 
Sas practice programs
Sas practice programsSas practice programs
Sas practice programs
 
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptxfINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
 
Slide3.ppt
Slide3.pptSlide3.ppt
Slide3.ppt
 
K-Means Algorithm Implementation In python
K-Means Algorithm Implementation In pythonK-Means Algorithm Implementation In python
K-Means Algorithm Implementation In python
 
lec1.ppt
lec1.pptlec1.ppt
lec1.ppt
 

Último

(南达科他州立大学毕业证学位证成绩单-永久存档)
(南达科他州立大学毕业证学位证成绩单-永久存档)(南达科他州立大学毕业证学位证成绩单-永久存档)
(南达科他州立大学毕业证学位证成绩单-永久存档)oannq
 
西伦敦大学毕业证学位证成绩单-怎么样做
西伦敦大学毕业证学位证成绩单-怎么样做西伦敦大学毕业证学位证成绩单-怎么样做
西伦敦大学毕业证学位证成绩单-怎么样做j5bzwet6
 
E J Waggoner against Kellogg's Pantheism 8.pptx
E J Waggoner against Kellogg's Pantheism 8.pptxE J Waggoner against Kellogg's Pantheism 8.pptx
E J Waggoner against Kellogg's Pantheism 8.pptxJackieSparrow3
 
Inspiring Through Words Power of Inspiration.pptx
Inspiring Through Words Power of Inspiration.pptxInspiring Through Words Power of Inspiration.pptx
Inspiring Through Words Power of Inspiration.pptxShubham Rawat
 
Call Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 Avilable
Call Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 AvilableCall Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 Avilable
Call Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 Avilabledollysharma2066
 
南新罕布什尔大学毕业证学位证成绩单-学历认证
南新罕布什尔大学毕业证学位证成绩单-学历认证南新罕布什尔大学毕业证学位证成绩单-学历认证
南新罕布什尔大学毕业证学位证成绩单-学历认证kbdhl05e
 
(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts
(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts
(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ EscortsDelhi Escorts Service
 
Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...
Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...
Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...Authentic No 1 Amil Baba In Pakistan
 

Último (9)

(南达科他州立大学毕业证学位证成绩单-永久存档)
(南达科他州立大学毕业证学位证成绩单-永久存档)(南达科他州立大学毕业证学位证成绩单-永久存档)
(南达科他州立大学毕业证学位证成绩单-永久存档)
 
西伦敦大学毕业证学位证成绩单-怎么样做
西伦敦大学毕业证学位证成绩单-怎么样做西伦敦大学毕业证学位证成绩单-怎么样做
西伦敦大学毕业证学位证成绩单-怎么样做
 
E J Waggoner against Kellogg's Pantheism 8.pptx
E J Waggoner against Kellogg's Pantheism 8.pptxE J Waggoner against Kellogg's Pantheism 8.pptx
E J Waggoner against Kellogg's Pantheism 8.pptx
 
Inspiring Through Words Power of Inspiration.pptx
Inspiring Through Words Power of Inspiration.pptxInspiring Through Words Power of Inspiration.pptx
Inspiring Through Words Power of Inspiration.pptx
 
Call Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 Avilable
Call Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 AvilableCall Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 Avilable
Call Girls In Karkardooma 83770 87607 Just-Dial Escorts Service 24X7 Avilable
 
南新罕布什尔大学毕业证学位证成绩单-学历认证
南新罕布什尔大学毕业证学位证成绩单-学历认证南新罕布什尔大学毕业证学位证成绩单-学历认证
南新罕布什尔大学毕业证学位证成绩单-学历认证
 
(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts
(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts
(No.1)↠Young Call Girls in Sikanderpur (Gurgaon) ꧁❤ 9711911712 ❤꧂ Escorts
 
Model Call Girl in Lado Sarai Delhi reach out to us at 🔝9953056974🔝
Model Call Girl in Lado Sarai Delhi reach out to us at 🔝9953056974🔝Model Call Girl in Lado Sarai Delhi reach out to us at 🔝9953056974🔝
Model Call Girl in Lado Sarai Delhi reach out to us at 🔝9953056974🔝
 
Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...
Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...
Authentic No 1 Amil Baba In Pakistan Amil Baba In Faisalabad Amil Baba In Kar...
 

LA R meetup - Nov 2013 - Eric Klusman

  • 1. plyer  for  Split-­‐Apply-­‐Combine     Automa4ng  one  pa6ern  of  data  munging  and  analysis     Eric  Klusman   2013-­‐11-­‐14  
  • 2. What  is  plyr?   •  A  library  of  func4ons  for  R  for  doing  analysis  in   a  split-­‐apply-­‐combine  pa6ern   –  Split  the  data  into  subgroups   –  Apply  some  func4on  to  summarize,  model,  or  plot  each   subgroup   –  Combine  the  results  of  the  subgroups  back  together   •  Automate  the  loops  and  avoid  the   bookkeeping  code   •  Assump4on:  data  can  be  processed  piecewise     2  
  • 3. Example:    Baby  Names   •  •  •  •  From  Hadley  Wickham,  h6p://plyr.had.co.nz/09-­‐user/     Top  1000  U.S.  boy  and  girl  baby  names  from  1880  to  2008   Derived  from  Social  Security  Administra4on  dataset   1000  *  2  *  129  =  258000  obs  on  4  vars   >  head(bnames)   >  tail(bnames)    year        name    percent  sex   1  1880        John  0.081541  boy   2  1880  William  0.080511  boy   3  1880      James  0.050057  boy   4  1880  Charles  0.045167  boy   5  1880    George  0.043292  boy   6  1880      Frank  0.027380  boy      year          name    percent    sex   257995  2008          Diya  0.000128  girl   257996  2008  Carleigh  0.000128  girl   257997  2008        Iyana  0.000128  girl   257998  2008      Kenley  0.000127  girl   257999  2008      Sloane  0.000127  girl   258000  2008    Elianna  0.000127  girl   3  
  • 4. Groupwise  summaries   •  What  if  we  want  to  compute  the  rank  of  a   name  within  a  sex  and  year?   •  Easy  for  a  single  year  and  sex;  hard  in  general.   #  Split   pieces  <-­‐  split(bnames,  list(bnames$sex,  bnames$year))     #  Apply   results  <=  vector(“list”,  length(pieces))   for(i  in  seq_along(pieces))  {          piece  <-­‐  pieces[[i]]          piece  <-­‐  transform(piece,  rank  =  rank(-­‐percent,  ties.method=“first”))          results[[i]]  <-­‐  piece   }     #  Combine   result  <-­‐  do.call(“rbind”,  results)   4  
  • 5. Using  plyr   bnames  <-­‐  ddply(bnames,  c(“sex”,  “year”),  transform,                                  rank  =  rank(-­‐percent,  ties.method=“first”))   5  
  • 6. One-­‐line  summaries     ddply(bnames,  c(“name”),  summarize,  tot  =  sum(percent))     ddply(bnames,  c(“length”),  summarize,  tot  =  sum(percent))     ddply(bnames,  c(“year”,  “sex”),  summarize,  tot  =  sum(percent))     fl  <-­‐  ddply(bnames,  c(“year”,  “sex”,  “first”),  summarize,                                  tot  =  sum(percent))     library(ggplot2)     qplot(year,  tot,  data  =  fl,  geom  =  “line”,  color  =  “sex”,                    facets  =  ~  first)   6  
  • 8. plyr  func4ons  are  named  by  their   input  and  output  types   ioply  where  i  is  the  input  type  and  o  is  the  output  type   Func%on   Input  data  type   Output  data  type   ddply   Data  frame   Data  frame   aaply   Array   Array   daply   Dataframe   Array   d_ply   Dataframe   None;  used  for  plo`ng  or   prin4ng   ldply   List   Dataframe   alply   Array   List   8  
  • 9. Base  R  vs.  plyr   Base   func-on   aggregate   d   d   ddply  +  colwise   apply   a   a/l   aaply  /  alply   by   l   l   dlply   lapply   l   l   llply   mapply   a   a/l   maply  /  mlply   replicate   r   a/l   raply  /  rlply   sapply   l   a   laply   sweep   a   a   -­‐   tapply   •        Input   Output   plyr  func-on   a   a   -­‐   Input  and  output  types  are  indicated  by  first  le6er:    array,  data  frame,  list,  replica4on   9