SlideShare uma empresa Scribd logo
1 de 37
Baixar para ler offline
PLYR & DPLYR
Mathew Hall
PLYR: LOADING DATA
myDT <- data.frame(
number=1:3,
letter=c('a','b','c')
)
myDT
## number letter
## 1 1 a
## 2 2 b
## 3 3 c
PLYR: LOADING DATA
D <-
read.csv("TB_burden_countries_2014-11-16.csv")
head(names(D))
## [1] "country" "iso2" "iso3" "iso_numeric" "g_whoregion"
## [6] "year"
PLYR: FILTERING
mean(D[D$country=='Afghanistan','e_prev_100k'])
## [1] 376.4
PLYR: FILTERING
with(D, mean(e_prev_100k[country=='Afghanistan']))
## [1] 376.4
PLYR: SELECTING COLUMNS
head(D[,c('e_prev_100k','e_prev_100k_lo',
'e_prev_100k_hi')])
## e_prev_100k e_prev_100k_lo e_prev_100k_hi
## 1 306 156 506
## 2 343 178 562
## 3 371 189 614
## 4 392 194 657
## 5 410 198 697
## 6 424 199 733
PLYR: SUMMARISING
ddply(D,
.(country),
summarise,
mid=mean(e_prev_100k)
)
## country mid
## 1 Afghanistan 376.42
## 2 Albania 29.33
## 3 Algeria 124.38
## 4 American Samoa 14.57
## 5 Andorra 29.92
## 6 Angola 388.58
PLYR: SUMMARISING
ddply(D,
.(country, year > 2000),
summarise,
lo=mean(e_prev_100k_lo),
hi=mean(e_prev_100k_hi)
)
## country year > 2000 lo hi
## 1 Afghanistan FALSE 193.45 695.00
## 2 Afghanistan TRUE 181.92 576.23
## 3 Albania FALSE 15.18 60.27
## 4 Albania TRUE 11.11 45.38
## 5 Algeria FALSE 57.00 184.36
## 6 Algeria TRUE 69.23 223.15
PLYR: SORTING
plot(arrange(D, e_prev_100k)$e_prev_100k)
PLYR:ADDING COLUMS
mutate(D,
country_t = paste0(country,year))
## country iso2 iso3 iso_numeric g_whoregion year e_pop_num e_prev_100k
## 1 Afghanistan AF AFG 4 EMR 1990 11731193 306
## 2 Afghanistan AF AFG 4 EMR 1991 12612043 343
## 3 Afghanistan AF AFG 4 EMR 1992 13811876 371
## 4 Afghanistan AF AFG 4 EMR 1993 15175325 392
## 5 Afghanistan AF AFG 4 EMR 1994 16485018 410
## 6 Afghanistan AF AFG 4 EMR 1995 17586073 424
…
## e_inc_tbhiv_num_hi source_tbhiv c_cdr c_cdr_lo c_cdr_hi country_t
## 1 16 NA 20 15 24 Afghanistan1990
## 2 20 NA 96 80 110 Afghanistan1991
## 3 24 NA NA NA NA Afghanistan1992
## 4 31 NA NA NA NA Afghanistan1993
## 5 39 NA NA NA NA Afghanistan1994
## 6 47 NA NA NA NA Afghanistan1995
PLYR:ADDING COLUMS
ddply(D,.(),transform,
country_t = paste0(country,year))
## country iso2 iso3 iso_numeric g_whoregion year e_pop_num e_prev_100k
## 1 Afghanistan AF AFG 4 EMR 1990 11731193 306
## 2 Afghanistan AF AFG 4 EMR 1991 12612043 343
## 3 Afghanistan AF AFG 4 EMR 1992 13811876 371
## 4 Afghanistan AF AFG 4 EMR 1993 15175325 392
## 5 Afghanistan AF AFG 4 EMR 1994 16485018 410
## 6 Afghanistan AF AFG 4 EMR 1995 17586073 424
…
## e_inc_tbhiv_num_hi source_tbhiv c_cdr c_cdr_lo c_cdr_hi country_t
## 1 16 NA 20 15 24 Afghanistan1990
## 2 20 NA 96 80 110 Afghanistan1991
## 3 24 NA NA NA NA Afghanistan1992
## 4 31 NA NA NA NA Afghanistan1993
## 5 39 NA NA NA NA Afghanistan1994
## 6 47 NA NA NA NA Afghanistan1995
DPLYR
split
apply
combine
summarise
mutate
Take a data frame, compute summaries,
produce a new data frame
PIPES
data
group by country
create column: incidence/population
summarise: mean rate
summarise: mean rate
WITHOUT PIPES
data
group by country
create column: incidence/population
data
data$rate <- incidence/population
summarise(group_by(data,country),mean.rate=mean(rate))
summarise: mean rate
PIPES
data
group by country
create column: incidence/population
data
data$rate <- incidence/population
data %>% group_by(country) %>% summarise(mean.rate=mean(rate))
summarise: mean rate
PIPES
group by country
create column: incidence/population
data
data %>%
mutate(rate = incidence/population) %>%
group_by(country) %>%
summarise(mean.rate=mean(rate))
PIPES
@romain_francois
DPLYR: LOAD
D <-
read.csv("TB_burden_countries_2014-11-16.csv")
D <- tbl_df(D)
DPLYR: LOAD
D <-
read.csv("TB_burden_countries_2014-11-16.csv")
D <- tbl_df(D)
DPLYR: SELECTING
D %>%
select(e_prev_100k,e_prev_100k_lo,e_prev_100k_hi)
## Source: local data frame [5,120 x 3]
##
## e_prev_100k e_prev_100k_lo e_prev_100k_hi
## 1 306 156 506
## 2 343 178 562
## 3 371 189 614
## 4 392 194 657
## 5 410 198 697
## 6 424 199 733
## 7 438 202 764
## 8 448 203 788
## 9 454 204 800
## 10 446 203 782
## .. ... ... ...
DPLYR: FILTER & SUMMARISE
D %>%
filter(country == 'Afghanistan') %>%
summarise(
mid=mean(e_prev_100k),
lo=mean(e_prev_100k_lo),
hi=mean(e_prev_100k_hi)
)
## Source: local data frame [1 x 3]
##
## mid lo hi
## 1 376.4 187.2 630.7
DPLYR: FILTER & SUMMARISE
D %>%
group_by(country) %>%
summarise(mid=mean(e_prev_100k))
## Source: local data frame [219 x 2]
##
## country mid
## 1 Afghanistan 376.417
## 2 Albania 29.333
## 3 Algeria 124.375
## 4 American Samoa 14.567
## 5 Andorra 29.921
## 6 Angola 388.583
## 7 Anguilla 52.417
## 8 Antigua and Barbuda 8.725
## 9 Argentina 55.500
## 10 Armenia 76.875
## .. ... ...
DPLYR: PLOTTING
D %>%
arrange(e_prev_100k) %>%
mutate(id=row_number()) %>%
select(id, e_prev_100k) %>% plot
DPLYR:ADDING COLUMNS
D %>%
mutate(country_t = paste0(country,year)) %>%
select(country_t)
## Source: local data frame [5,120 x 1]
##
## country_t
## 1 Afghanistan1990
## 2 Afghanistan1991
## 3 Afghanistan1992
## 4 Afghanistan1993
## 5 Afghanistan1994
## 6 Afghanistan1995
## 7 Afghanistan1996
## 8 Afghanistan1997
## 9 Afghanistan1998
## 10 Afghanistan1999
## .. ...
DPLYR: DATABASES
data <- data.frame(x=1:200000000,
y=runif(4), z=runif(50))
format(object.size(data), units="GB")
## [1] "3.7 Gb"
DPLYR: DATABASES
src <- src_sqlite(“data.sqlite")
data <- tbl(src, "data")
DPLYR: DATABASES
data %>%
summarise(mean(x), max(y), mean(z))
## Source: sqlite 3.7.17 [data.sqlite]
## From: <derived table> [?? x 3]
##
## mean(x) max(y) mean(z)
## 1 1e+08 0.9008 0.4501
## .. ... ... ...
DPLYR: DATABASES
data %>%
summarise(mean(x), max(y), mean(z))
## Source: sqlite 3.7.17 [data.sqlite]
## From: <derived table> [?? x 3]
##
## mean(x) max(y) mean(z)
## 1 1e+08 0.9008 0.4501
## .. ... ... ...
Use data as if it were local data.frame
Processing done in the database
DPLYR
• Similar to data.table & sqldf
• (Almost) transparently use a DB for speed
• Works with pipes
• Lazy
POSTSCRIPT: PLYR
• Most functions provided by dplyr, except:
• **ply, nicer than sapply, tapply, etc.
PLYR: SUMMARISING
library(plyr)
E <- D[with(D, country=='Afghanistan'),]
ddply(E, .(country), summarise,
mid=mean(e_prev_100k),
lo=mean(e_prev_100k_lo),
hi=mean(e_prev_100k_hi) )
## country mid lo hi
## 1 Afghanistan 376.4 187.2 630.7
return data frame
PLYR: SUMMARISING
library(plyr)
E <- D[with(D, country=='Afghanistan'),]
ddply(E, .(country), summarise,
mid=mean(e_prev_100k),
lo=mean(e_prev_100k_lo),
hi=mean(e_prev_100k_hi) )
take data frame
return data frame
PLYR: SUMMARISING
library(plyr)
E <- D[with(D, country=='Afghanistan'),]
ddply(E, .(country), summarise,
mid=mean(e_prev_100k),
lo=mean(e_prev_100k_lo),
hi=mean(e_prev_100k_hi) )
take data frame
grouping columnsinput
return data frame
PLYR: SUMMARISING
library(plyr)
E <- D[with(D, country=='Afghanistan'),]
ddply(E, .(country), summarise,
mid=mean(e_prev_100k),
lo=mean(e_prev_100k_lo),
hi=mean(e_prev_100k_hi) )
take data frame
grouping columnsinput
new columns
PLYR: READ ALL CSV FILES
ldply(
list.files(pattern="test.*.csv"),
read.csv
)
## X a b
## 1 1 0.005018 0.329372
## 2 2 0.145043 -1.065981
## 3 3 0.930621 0.005956
## 4 4 0.047814 0.271862
## 5 5 0.916632 -0.630735
## 6 6 0.746737 0.385569
## 7 7 0.223346 1.312936
## 8 8 0.904864 -0.433001
## 9 9 0.653918 -1.065692
## 10 10 0.770291 -0.209569
## 11 11 0.387646 1.820283
## 12 12 0.361888 -0.164768
## 13 13 0.799788 -1.141612
## 14 14 0.960051 -0.368712
PLYR: PLOT EACH GROUP
d_ply(
long,
.(Sensor),
function(df){
pdf(paste0(df$Sensor[1],".pdf"));
plot(df$value);
dev.off()
})
RESOURCES
• dplyr vignettes: http://cran.r-project.org/web/packages/dplyr/vignettes
• plyr: StackOverflow, google

Mais conteúdo relacionado

Mais procurados

Next Generation Programming in R
Next Generation Programming in RNext Generation Programming in R
Next Generation Programming in RFlorian Uhlitz
 
4 R Tutorial DPLYR Apply Function
4 R Tutorial DPLYR Apply Function4 R Tutorial DPLYR Apply Function
4 R Tutorial DPLYR Apply FunctionSakthi Dasans
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting SpatialFAO
 
Data preparation covariates
Data preparation covariatesData preparation covariates
Data preparation covariatesFAO
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization Sourabh Sahu
 
Data preparation, depth function
Data preparation, depth functionData preparation, depth function
Data preparation, depth functionFAO
 
Introduction to pandas
Introduction to pandasIntroduction to pandas
Introduction to pandasPiyush rai
 
R getting spatial
R getting spatialR getting spatial
R getting spatialFAO
 
Pandas Cheat Sheet
Pandas Cheat SheetPandas Cheat Sheet
Pandas Cheat SheetACASH1011
 
5. R basics
5. R basics5. R basics
5. R basicsFAO
 
Introduction to Pandas and Time Series Analysis [PyCon DE]
Introduction to Pandas and Time Series Analysis [PyCon DE]Introduction to Pandas and Time Series Analysis [PyCon DE]
Introduction to Pandas and Time Series Analysis [PyCon DE]Alexander Hendorf
 
3 R Tutorial Data Structure
3 R Tutorial Data Structure3 R Tutorial Data Structure
3 R Tutorial Data StructureSakthi Dasans
 
Pandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetPandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetDr. Volkan OBAN
 
Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Dr. Volkan OBAN
 
Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)
Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)
Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)Serban Tanasa
 
R data-import, data-export
R data-import, data-exportR data-import, data-export
R data-import, data-exportFAO
 

Mais procurados (20)

Next Generation Programming in R
Next Generation Programming in RNext Generation Programming in R
Next Generation Programming in R
 
4 R Tutorial DPLYR Apply Function
4 R Tutorial DPLYR Apply Function4 R Tutorial DPLYR Apply Function
4 R Tutorial DPLYR Apply Function
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting Spatial
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
Data preparation covariates
Data preparation covariatesData preparation covariates
Data preparation covariates
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization
 
Data preparation, depth function
Data preparation, depth functionData preparation, depth function
Data preparation, depth function
 
Introduction to pandas
Introduction to pandasIntroduction to pandas
Introduction to pandas
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
R getting spatial
R getting spatialR getting spatial
R getting spatial
 
Pandas Cheat Sheet
Pandas Cheat SheetPandas Cheat Sheet
Pandas Cheat Sheet
 
5. R basics
5. R basics5. R basics
5. R basics
 
Introduction to Pandas and Time Series Analysis [PyCon DE]
Introduction to Pandas and Time Series Analysis [PyCon DE]Introduction to Pandas and Time Series Analysis [PyCon DE]
Introduction to Pandas and Time Series Analysis [PyCon DE]
 
3 R Tutorial Data Structure
3 R Tutorial Data Structure3 R Tutorial Data Structure
3 R Tutorial Data Structure
 
R language introduction
R language introductionR language introduction
R language introduction
 
Pandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetPandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheet
 
Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet
 
Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)
Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)
Get up to Speed (Quick Guide to data.table in R and Pentaho PDI)
 
R data-import, data-export
R data-import, data-exportR data-import, data-export
R data-import, data-export
 
R programming language
R programming languageR programming language
R programming language
 

Destaque

Chunked, dplyr for large text files
Chunked, dplyr for large text filesChunked, dplyr for large text files
Chunked, dplyr for large text filesEdwin de Jonge
 
Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...
Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...
Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...Paul Richards
 
How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)
How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)
How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)Paul Richards
 
Sheffield_R_ July meeting - Interacting with R - IDEs, Git and workflow
Sheffield_R_ July meeting - Interacting with R - IDEs, Git and workflowSheffield_R_ July meeting - Interacting with R - IDEs, Git and workflow
Sheffield_R_ July meeting - Interacting with R - IDEs, Git and workflowPaul Richards
 
Plyr, one data analytic strategy
Plyr, one data analytic strategyPlyr, one data analytic strategy
Plyr, one data analytic strategyHadley Wickham
 
Introduction to knitr - May Sheffield R Users group
Introduction to knitr - May Sheffield R Users groupIntroduction to knitr - May Sheffield R Users group
Introduction to knitr - May Sheffield R Users groupPaul Richards
 
WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016
WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016
WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016Penn State University
 
20160611 kintone Café 高知 Vol.3 LT資料
20160611 kintone Café 高知 Vol.3 LT資料20160611 kintone Café 高知 Vol.3 LT資料
20160611 kintone Café 高知 Vol.3 LT資料安隆 沖
 
R Brown-bag seminars : Seminar-8
R Brown-bag seminars : Seminar-8R Brown-bag seminars : Seminar-8
R Brown-bag seminars : Seminar-8Muhammad Nabi Ahmad
 
Paquete ggplot - Potencia y facilidad para generar gráficos en R
Paquete ggplot - Potencia y facilidad para generar gráficos en RPaquete ggplot - Potencia y facilidad para generar gráficos en R
Paquete ggplot - Potencia y facilidad para generar gráficos en RNestor Montaño
 
Análisis espacial con R (asignatura de Master - UPM)
Análisis espacial con R (asignatura de Master - UPM)Análisis espacial con R (asignatura de Master - UPM)
Análisis espacial con R (asignatura de Master - UPM)Vladimir Gutierrez, PhD
 
Learn to use dplyr (Feb 2015 Philly R User Meetup)
Learn to use dplyr (Feb 2015 Philly R User Meetup)Learn to use dplyr (Feb 2015 Philly R User Meetup)
Learn to use dplyr (Feb 2015 Philly R User Meetup)Fan Li
 
WF ED 540, Class Meeting 3 - mutate and summarise, 2016
WF ED 540, Class Meeting 3 - mutate and summarise, 2016WF ED 540, Class Meeting 3 - mutate and summarise, 2016
WF ED 540, Class Meeting 3 - mutate and summarise, 2016Penn State University
 

Destaque (20)

01 Intro
01 Intro01 Intro
01 Intro
 
02 Ddply
02 Ddply02 Ddply
02 Ddply
 
03 Modelling
03 Modelling03 Modelling
03 Modelling
 
Chunked, dplyr for large text files
Chunked, dplyr for large text filesChunked, dplyr for large text files
Chunked, dplyr for large text files
 
Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...
Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...
Sheffield R Jan 2015 - Using R to investigate parasite infections in Asian el...
 
How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)
How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)
How to win $10m - analysing DOTA2 data in R (Sheffield R Users Group - May)
 
Sheffield_R_ July meeting - Interacting with R - IDEs, Git and workflow
Sheffield_R_ July meeting - Interacting with R - IDEs, Git and workflowSheffield_R_ July meeting - Interacting with R - IDEs, Git and workflow
Sheffield_R_ July meeting - Interacting with R - IDEs, Git and workflow
 
Plyr, one data analytic strategy
Plyr, one data analytic strategyPlyr, one data analytic strategy
Plyr, one data analytic strategy
 
Introduction to knitr - May Sheffield R Users group
Introduction to knitr - May Sheffield R Users groupIntroduction to knitr - May Sheffield R Users group
Introduction to knitr - May Sheffield R Users group
 
27 development
27 development27 development
27 development
 
23 data-structures
23 data-structures23 data-structures
23 data-structures
 
WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016
WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016
WF ED 540, Class Meeting 3 - Introduction to dplyr, 2016
 
20160611 kintone Café 高知 Vol.3 LT資料
20160611 kintone Café 高知 Vol.3 LT資料20160611 kintone Café 高知 Vol.3 LT資料
20160611 kintone Café 高知 Vol.3 LT資料
 
Rlecturenotes
RlecturenotesRlecturenotes
Rlecturenotes
 
R Brown-bag seminars : Seminar-8
R Brown-bag seminars : Seminar-8R Brown-bag seminars : Seminar-8
R Brown-bag seminars : Seminar-8
 
Paquete ggplot - Potencia y facilidad para generar gráficos en R
Paquete ggplot - Potencia y facilidad para generar gráficos en RPaquete ggplot - Potencia y facilidad para generar gráficos en R
Paquete ggplot - Potencia y facilidad para generar gráficos en R
 
Análisis espacial con R (asignatura de Master - UPM)
Análisis espacial con R (asignatura de Master - UPM)Análisis espacial con R (asignatura de Master - UPM)
Análisis espacial con R (asignatura de Master - UPM)
 
Learn to use dplyr (Feb 2015 Philly R User Meetup)
Learn to use dplyr (Feb 2015 Philly R User Meetup)Learn to use dplyr (Feb 2015 Philly R User Meetup)
Learn to use dplyr (Feb 2015 Philly R User Meetup)
 
R seminar dplyr package
R seminar dplyr packageR seminar dplyr package
R seminar dplyr package
 
WF ED 540, Class Meeting 3 - mutate and summarise, 2016
WF ED 540, Class Meeting 3 - mutate and summarise, 2016WF ED 540, Class Meeting 3 - mutate and summarise, 2016
WF ED 540, Class Meeting 3 - mutate and summarise, 2016
 

Semelhante a Dplyr and Plyr

Introduction to R
Introduction to RIntroduction to R
Introduction to RStacy Irwin
 
M12 random forest-part01
M12 random forest-part01M12 random forest-part01
M12 random forest-part01Raman Kannan
 
Data Mining & Analytics for U.S. Airlines On-Time Performance
Data Mining & Analytics for U.S. Airlines On-Time Performance Data Mining & Analytics for U.S. Airlines On-Time Performance
Data Mining & Analytics for U.S. Airlines On-Time Performance Mingxuan Li
 
MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709Min-hyung Kim
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with RYanchang Zhao
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeWim Godden
 
Writing Readable Code with Pipes
Writing Readable Code with PipesWriting Readable Code with Pipes
Writing Readable Code with PipesRsquared Academy
 
SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrRomain Francois
 
Data manipulation and visualization in r 20190711 myanmarucsy
Data manipulation and visualization in r 20190711 myanmarucsyData manipulation and visualization in r 20190711 myanmarucsy
Data manipulation and visualization in r 20190711 myanmarucsySmartHinJ
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisUniversity of Illinois,Chicago
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisUniversity of Illinois,Chicago
 
令和から本気出す
令和から本気出す令和から本気出す
令和から本気出すTakashi Kitano
 
Forecasting Revenue With Stationary Time Series Models
Forecasting Revenue With Stationary Time Series ModelsForecasting Revenue With Stationary Time Series Models
Forecasting Revenue With Stationary Time Series ModelsGeoffery Mullings
 
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)Wataru Shito
 
Postgres Conference (PgCon) New York 2019
Postgres Conference (PgCon) New York 2019Postgres Conference (PgCon) New York 2019
Postgres Conference (PgCon) New York 2019Ibrar Ahmed
 
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptxfINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptxdataKarthik
 

Semelhante a Dplyr and Plyr (20)

Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Introduction to tibbles
Introduction to tibblesIntroduction to tibbles
Introduction to tibbles
 
10. R getting spatial
10.  R getting spatial10.  R getting spatial
10. R getting spatial
 
M12 random forest-part01
M12 random forest-part01M12 random forest-part01
M12 random forest-part01
 
Data Mining & Analytics for U.S. Airlines On-Time Performance
Data Mining & Analytics for U.S. Airlines On-Time Performance Data Mining & Analytics for U.S. Airlines On-Time Performance
Data Mining & Analytics for U.S. Airlines On-Time Performance
 
MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with R
 
CLIM Undergraduate Workshop: (Attachment) Performing Extreme Value Analysis (...
CLIM Undergraduate Workshop: (Attachment) Performing Extreme Value Analysis (...CLIM Undergraduate Workshop: (Attachment) Performing Extreme Value Analysis (...
CLIM Undergraduate Workshop: (Attachment) Performing Extreme Value Analysis (...
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the code
 
Writing Readable Code with Pipes
Writing Readable Code with PipesWriting Readable Code with Pipes
Writing Readable Code with Pipes
 
SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittr
 
Data manipulation and visualization in r 20190711 myanmarucsy
Data manipulation and visualization in r 20190711 myanmarucsyData manipulation and visualization in r 20190711 myanmarucsy
Data manipulation and visualization in r 20190711 myanmarucsy
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency Analysis
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency Analysis
 
令和から本気出す
令和から本気出す令和から本気出す
令和から本気出す
 
Forecasting Revenue With Stationary Time Series Models
Forecasting Revenue With Stationary Time Series ModelsForecasting Revenue With Stationary Time Series Models
Forecasting Revenue With Stationary Time Series Models
 
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
 
Big Data Analytics Lab File
Big Data Analytics Lab FileBig Data Analytics Lab File
Big Data Analytics Lab File
 
Postgres Conference (PgCon) New York 2019
Postgres Conference (PgCon) New York 2019Postgres Conference (PgCon) New York 2019
Postgres Conference (PgCon) New York 2019
 
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptxfINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
fINAL Lesson_5_Data_Manipulation_using_R_v1.pptx
 

Mais de Paul Richards

SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...
SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...
SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...Paul Richards
 
Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...
Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...
Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...Paul Richards
 
Querying open data with R - Talk at April SheffieldR Users Gp
Querying open data with R - Talk at April SheffieldR Users GpQuerying open data with R - Talk at April SheffieldR Users Gp
Querying open data with R - Talk at April SheffieldR Users GpPaul Richards
 
OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...
OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...
OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...Paul Richards
 
Phylogeny in R - Bianca Santini Sheffield R Users March 2015
Phylogeny in R - Bianca Santini Sheffield R Users March 2015Phylogeny in R - Bianca Santini Sheffield R Users March 2015
Phylogeny in R - Bianca Santini Sheffield R Users March 2015Paul Richards
 
Intro to ggplot2 - Sheffield R Users Group, Feb 2015
Intro to ggplot2 - Sheffield R Users Group, Feb 2015Intro to ggplot2 - Sheffield R Users Group, Feb 2015
Intro to ggplot2 - Sheffield R Users Group, Feb 2015Paul Richards
 
Introduction to Shiny for building web apps in R
Introduction to Shiny for building web apps in RIntroduction to Shiny for building web apps in R
Introduction to Shiny for building web apps in RPaul Richards
 

Mais de Paul Richards (7)

SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...
SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...
SheffieldR July Meeting - Multiple Imputation with Chained Equations (MICE) p...
 
Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...
Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...
Preparing and submitting a package to CRAN - June Sanderson, Sheffield R User...
 
Querying open data with R - Talk at April SheffieldR Users Gp
Querying open data with R - Talk at April SheffieldR Users GpQuerying open data with R - Talk at April SheffieldR Users Gp
Querying open data with R - Talk at April SheffieldR Users Gp
 
OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...
OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...
OrienteeRing - using R to optimise mini mountain marathon routes - Pete Dodd ...
 
Phylogeny in R - Bianca Santini Sheffield R Users March 2015
Phylogeny in R - Bianca Santini Sheffield R Users March 2015Phylogeny in R - Bianca Santini Sheffield R Users March 2015
Phylogeny in R - Bianca Santini Sheffield R Users March 2015
 
Intro to ggplot2 - Sheffield R Users Group, Feb 2015
Intro to ggplot2 - Sheffield R Users Group, Feb 2015Intro to ggplot2 - Sheffield R Users Group, Feb 2015
Intro to ggplot2 - Sheffield R Users Group, Feb 2015
 
Introduction to Shiny for building web apps in R
Introduction to Shiny for building web apps in RIntroduction to Shiny for building web apps in R
Introduction to Shiny for building web apps in R
 

Último

Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsArshad QA
 
Direct Style Effect Systems - The Print[A] Example - A Comprehension Aid
Direct Style Effect Systems -The Print[A] Example- A Comprehension AidDirect Style Effect Systems -The Print[A] Example- A Comprehension Aid
Direct Style Effect Systems - The Print[A] Example - A Comprehension AidPhilip Schwarz
 
AI Mastery 201: Elevating Your Workflow with Advanced LLM Techniques
AI Mastery 201: Elevating Your Workflow with Advanced LLM TechniquesAI Mastery 201: Elevating Your Workflow with Advanced LLM Techniques
AI Mastery 201: Elevating Your Workflow with Advanced LLM TechniquesVictorSzoltysek
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...panagenda
 
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfThe Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfkalichargn70th171
 
Right Money Management App For Your Financial Goals
Right Money Management App For Your Financial GoalsRight Money Management App For Your Financial Goals
Right Money Management App For Your Financial GoalsJhone kinadey
 
Exploring the Best Video Editing App.pdf
Exploring the Best Video Editing App.pdfExploring the Best Video Editing App.pdf
Exploring the Best Video Editing App.pdfproinshot.com
 
Unlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language ModelsUnlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language Modelsaagamshah0812
 
The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...
The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...
The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...kalichargn70th171
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...ICS
 
Introducing Microsoft’s new Enterprise Work Management (EWM) Solution
Introducing Microsoft’s new Enterprise Work Management (EWM) SolutionIntroducing Microsoft’s new Enterprise Work Management (EWM) Solution
Introducing Microsoft’s new Enterprise Work Management (EWM) SolutionOnePlan Solutions
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsAndolasoft Inc
 
Define the academic and professional writing..pdf
Define the academic and professional writing..pdfDefine the academic and professional writing..pdf
Define the academic and professional writing..pdfPearlKirahMaeRagusta1
 
Unveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time ApplicationsUnveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time ApplicationsAlberto González Trastoy
 
Azure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdf
Azure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdfAzure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdf
Azure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdfryanfarris8
 
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Steffen Staab
 
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...OnePlan Solutions
 
5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdfWave PLM
 
introduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdf
introduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdfintroduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdf
introduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdfVishalKumarJha10
 

Último (20)

Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview Questions
 
Direct Style Effect Systems - The Print[A] Example - A Comprehension Aid
Direct Style Effect Systems -The Print[A] Example- A Comprehension AidDirect Style Effect Systems -The Print[A] Example- A Comprehension Aid
Direct Style Effect Systems - The Print[A] Example - A Comprehension Aid
 
AI Mastery 201: Elevating Your Workflow with Advanced LLM Techniques
AI Mastery 201: Elevating Your Workflow with Advanced LLM TechniquesAI Mastery 201: Elevating Your Workflow with Advanced LLM Techniques
AI Mastery 201: Elevating Your Workflow with Advanced LLM Techniques
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
 
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfThe Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
 
Right Money Management App For Your Financial Goals
Right Money Management App For Your Financial GoalsRight Money Management App For Your Financial Goals
Right Money Management App For Your Financial Goals
 
Exploring the Best Video Editing App.pdf
Exploring the Best Video Editing App.pdfExploring the Best Video Editing App.pdf
Exploring the Best Video Editing App.pdf
 
Unlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language ModelsUnlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language Models
 
The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...
The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...
The Guide to Integrating Generative AI into Unified Continuous Testing Platfo...
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
 
Introducing Microsoft’s new Enterprise Work Management (EWM) Solution
Introducing Microsoft’s new Enterprise Work Management (EWM) SolutionIntroducing Microsoft’s new Enterprise Work Management (EWM) Solution
Introducing Microsoft’s new Enterprise Work Management (EWM) Solution
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.js
 
Define the academic and professional writing..pdf
Define the academic and professional writing..pdfDefine the academic and professional writing..pdf
Define the academic and professional writing..pdf
 
Unveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time ApplicationsUnveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
 
Azure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdf
Azure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdfAzure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdf
Azure_Native_Qumulo_High_Performance_Compute_Benchmarks.pdf
 
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
 
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
 
5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf
 
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICECHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
 
introduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdf
introduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdfintroduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdf
introduction-to-automotive Andoid os-csimmonds-ndctechtown-2021.pdf
 

Dplyr and Plyr

  • 2. PLYR: LOADING DATA myDT <- data.frame( number=1:3, letter=c('a','b','c') ) myDT ## number letter ## 1 1 a ## 2 2 b ## 3 3 c
  • 3. PLYR: LOADING DATA D <- read.csv("TB_burden_countries_2014-11-16.csv") head(names(D)) ## [1] "country" "iso2" "iso3" "iso_numeric" "g_whoregion" ## [6] "year"
  • 6. PLYR: SELECTING COLUMNS head(D[,c('e_prev_100k','e_prev_100k_lo', 'e_prev_100k_hi')]) ## e_prev_100k e_prev_100k_lo e_prev_100k_hi ## 1 306 156 506 ## 2 343 178 562 ## 3 371 189 614 ## 4 392 194 657 ## 5 410 198 697 ## 6 424 199 733
  • 7. PLYR: SUMMARISING ddply(D, .(country), summarise, mid=mean(e_prev_100k) ) ## country mid ## 1 Afghanistan 376.42 ## 2 Albania 29.33 ## 3 Algeria 124.38 ## 4 American Samoa 14.57 ## 5 Andorra 29.92 ## 6 Angola 388.58
  • 8. PLYR: SUMMARISING ddply(D, .(country, year > 2000), summarise, lo=mean(e_prev_100k_lo), hi=mean(e_prev_100k_hi) ) ## country year > 2000 lo hi ## 1 Afghanistan FALSE 193.45 695.00 ## 2 Afghanistan TRUE 181.92 576.23 ## 3 Albania FALSE 15.18 60.27 ## 4 Albania TRUE 11.11 45.38 ## 5 Algeria FALSE 57.00 184.36 ## 6 Algeria TRUE 69.23 223.15
  • 10. PLYR:ADDING COLUMS mutate(D, country_t = paste0(country,year)) ## country iso2 iso3 iso_numeric g_whoregion year e_pop_num e_prev_100k ## 1 Afghanistan AF AFG 4 EMR 1990 11731193 306 ## 2 Afghanistan AF AFG 4 EMR 1991 12612043 343 ## 3 Afghanistan AF AFG 4 EMR 1992 13811876 371 ## 4 Afghanistan AF AFG 4 EMR 1993 15175325 392 ## 5 Afghanistan AF AFG 4 EMR 1994 16485018 410 ## 6 Afghanistan AF AFG 4 EMR 1995 17586073 424 … ## e_inc_tbhiv_num_hi source_tbhiv c_cdr c_cdr_lo c_cdr_hi country_t ## 1 16 NA 20 15 24 Afghanistan1990 ## 2 20 NA 96 80 110 Afghanistan1991 ## 3 24 NA NA NA NA Afghanistan1992 ## 4 31 NA NA NA NA Afghanistan1993 ## 5 39 NA NA NA NA Afghanistan1994 ## 6 47 NA NA NA NA Afghanistan1995
  • 11. PLYR:ADDING COLUMS ddply(D,.(),transform, country_t = paste0(country,year)) ## country iso2 iso3 iso_numeric g_whoregion year e_pop_num e_prev_100k ## 1 Afghanistan AF AFG 4 EMR 1990 11731193 306 ## 2 Afghanistan AF AFG 4 EMR 1991 12612043 343 ## 3 Afghanistan AF AFG 4 EMR 1992 13811876 371 ## 4 Afghanistan AF AFG 4 EMR 1993 15175325 392 ## 5 Afghanistan AF AFG 4 EMR 1994 16485018 410 ## 6 Afghanistan AF AFG 4 EMR 1995 17586073 424 … ## e_inc_tbhiv_num_hi source_tbhiv c_cdr c_cdr_lo c_cdr_hi country_t ## 1 16 NA 20 15 24 Afghanistan1990 ## 2 20 NA 96 80 110 Afghanistan1991 ## 3 24 NA NA NA NA Afghanistan1992 ## 4 31 NA NA NA NA Afghanistan1993 ## 5 39 NA NA NA NA Afghanistan1994 ## 6 47 NA NA NA NA Afghanistan1995
  • 12. DPLYR split apply combine summarise mutate Take a data frame, compute summaries, produce a new data frame
  • 13. PIPES data group by country create column: incidence/population summarise: mean rate
  • 14. summarise: mean rate WITHOUT PIPES data group by country create column: incidence/population data data$rate <- incidence/population summarise(group_by(data,country),mean.rate=mean(rate))
  • 15. summarise: mean rate PIPES data group by country create column: incidence/population data data$rate <- incidence/population data %>% group_by(country) %>% summarise(mean.rate=mean(rate))
  • 16. summarise: mean rate PIPES group by country create column: incidence/population data data %>% mutate(rate = incidence/population) %>% group_by(country) %>% summarise(mean.rate=mean(rate))
  • 20. DPLYR: SELECTING D %>% select(e_prev_100k,e_prev_100k_lo,e_prev_100k_hi) ## Source: local data frame [5,120 x 3] ## ## e_prev_100k e_prev_100k_lo e_prev_100k_hi ## 1 306 156 506 ## 2 343 178 562 ## 3 371 189 614 ## 4 392 194 657 ## 5 410 198 697 ## 6 424 199 733 ## 7 438 202 764 ## 8 448 203 788 ## 9 454 204 800 ## 10 446 203 782 ## .. ... ... ...
  • 21. DPLYR: FILTER & SUMMARISE D %>% filter(country == 'Afghanistan') %>% summarise( mid=mean(e_prev_100k), lo=mean(e_prev_100k_lo), hi=mean(e_prev_100k_hi) ) ## Source: local data frame [1 x 3] ## ## mid lo hi ## 1 376.4 187.2 630.7
  • 22. DPLYR: FILTER & SUMMARISE D %>% group_by(country) %>% summarise(mid=mean(e_prev_100k)) ## Source: local data frame [219 x 2] ## ## country mid ## 1 Afghanistan 376.417 ## 2 Albania 29.333 ## 3 Algeria 124.375 ## 4 American Samoa 14.567 ## 5 Andorra 29.921 ## 6 Angola 388.583 ## 7 Anguilla 52.417 ## 8 Antigua and Barbuda 8.725 ## 9 Argentina 55.500 ## 10 Armenia 76.875 ## .. ... ...
  • 23. DPLYR: PLOTTING D %>% arrange(e_prev_100k) %>% mutate(id=row_number()) %>% select(id, e_prev_100k) %>% plot
  • 24. DPLYR:ADDING COLUMNS D %>% mutate(country_t = paste0(country,year)) %>% select(country_t) ## Source: local data frame [5,120 x 1] ## ## country_t ## 1 Afghanistan1990 ## 2 Afghanistan1991 ## 3 Afghanistan1992 ## 4 Afghanistan1993 ## 5 Afghanistan1994 ## 6 Afghanistan1995 ## 7 Afghanistan1996 ## 8 Afghanistan1997 ## 9 Afghanistan1998 ## 10 Afghanistan1999 ## .. ...
  • 25. DPLYR: DATABASES data <- data.frame(x=1:200000000, y=runif(4), z=runif(50)) format(object.size(data), units="GB") ## [1] "3.7 Gb"
  • 26. DPLYR: DATABASES src <- src_sqlite(“data.sqlite") data <- tbl(src, "data")
  • 27. DPLYR: DATABASES data %>% summarise(mean(x), max(y), mean(z)) ## Source: sqlite 3.7.17 [data.sqlite] ## From: <derived table> [?? x 3] ## ## mean(x) max(y) mean(z) ## 1 1e+08 0.9008 0.4501 ## .. ... ... ...
  • 28. DPLYR: DATABASES data %>% summarise(mean(x), max(y), mean(z)) ## Source: sqlite 3.7.17 [data.sqlite] ## From: <derived table> [?? x 3] ## ## mean(x) max(y) mean(z) ## 1 1e+08 0.9008 0.4501 ## .. ... ... ... Use data as if it were local data.frame Processing done in the database
  • 29. DPLYR • Similar to data.table & sqldf • (Almost) transparently use a DB for speed • Works with pipes • Lazy
  • 30. POSTSCRIPT: PLYR • Most functions provided by dplyr, except: • **ply, nicer than sapply, tapply, etc.
  • 31. PLYR: SUMMARISING library(plyr) E <- D[with(D, country=='Afghanistan'),] ddply(E, .(country), summarise, mid=mean(e_prev_100k), lo=mean(e_prev_100k_lo), hi=mean(e_prev_100k_hi) ) ## country mid lo hi ## 1 Afghanistan 376.4 187.2 630.7
  • 32. return data frame PLYR: SUMMARISING library(plyr) E <- D[with(D, country=='Afghanistan'),] ddply(E, .(country), summarise, mid=mean(e_prev_100k), lo=mean(e_prev_100k_lo), hi=mean(e_prev_100k_hi) ) take data frame
  • 33. return data frame PLYR: SUMMARISING library(plyr) E <- D[with(D, country=='Afghanistan'),] ddply(E, .(country), summarise, mid=mean(e_prev_100k), lo=mean(e_prev_100k_lo), hi=mean(e_prev_100k_hi) ) take data frame grouping columnsinput
  • 34. return data frame PLYR: SUMMARISING library(plyr) E <- D[with(D, country=='Afghanistan'),] ddply(E, .(country), summarise, mid=mean(e_prev_100k), lo=mean(e_prev_100k_lo), hi=mean(e_prev_100k_hi) ) take data frame grouping columnsinput new columns
  • 35. PLYR: READ ALL CSV FILES ldply( list.files(pattern="test.*.csv"), read.csv ) ## X a b ## 1 1 0.005018 0.329372 ## 2 2 0.145043 -1.065981 ## 3 3 0.930621 0.005956 ## 4 4 0.047814 0.271862 ## 5 5 0.916632 -0.630735 ## 6 6 0.746737 0.385569 ## 7 7 0.223346 1.312936 ## 8 8 0.904864 -0.433001 ## 9 9 0.653918 -1.065692 ## 10 10 0.770291 -0.209569 ## 11 11 0.387646 1.820283 ## 12 12 0.361888 -0.164768 ## 13 13 0.799788 -1.141612 ## 14 14 0.960051 -0.368712
  • 36. PLYR: PLOT EACH GROUP d_ply( long, .(Sensor), function(df){ pdf(paste0(df$Sensor[1],".pdf")); plot(df$value); dev.off() })
  • 37. RESOURCES • dplyr vignettes: http://cran.r-project.org/web/packages/dplyr/vignettes • plyr: StackOverflow, google