SlideShare a Scribd company logo
1 of 45
Download to read offline
R for Everything
Jared P. Lander
2/45
3/45
4/45
Giants
5/45
Compressed Data Online
6/45
7/45
Create Directory
# See if directory exists 
dir.exists('FootballTemp')
[1] FALSE
# create it 
dir.create('FootballTemp') 
# check again 
dir.exists('FootballTemp')
[1] TRUE
8/45
9/45
Download Files
download.file('http://www.jaredlander.com/data/Football1415.tar.gz',  
              destfile='FootballTemp/football.tar.gz',  
              method='curl')
10/45
Untar
11/45
getXKCD('1168')
12/45
Untar the File
untar('FootballTemp/football.tar.gz', exdir='FootballFiles')
13/45
Did They Extract?
dir('FootballFiles')
[1] "pbp‐2014.csv" "pbp‐2015.csv"
14/45
Delete Tar
unlink('FootballTemp/football.tar.gz') 
dir('FootballTemp')
character(0)
15/45
Inspect One File
file.info('FootballFiles/pbp‐2014.csv')
                               size isdir mode               mtime 
FootballFiles/pbp‐2014.csv 10280324 FALSE  666 2016‐03‐25 00:14:23 
                                         ctime               atime exe 
FootballFiles/pbp‐2014.csv 2016‐04‐04 22:48:47 2016‐04‐04 22:48:47  no
16/45
Inspect All Files
dir('FootballFiles') %>% file.info
             size isdir mode mtime ctime atime  exe 
pbp‐2014.csv   NA    NA <NA>  <NA>  <NA>  <NA> <NA> 
pbp‐2015.csv   NA    NA <NA>  <NA>  <NA>  <NA> <NA>
17/45
Inspect All Files
dir('FootballFiles', full.names=TRUE) %>% file.info
                               size isdir mode               mtime 
FootballFiles/pbp‐2014.csv 10280324 FALSE  666 2016‐03‐25 00:14:23 
FootballFiles/pbp‐2015.csv 10671016 FALSE  666 2016‐03‐25 00:14:23 
                                         ctime               atime exe 
FootballFiles/pbp‐2014.csv 2016‐04‐04 22:48:47 2016‐04‐04 22:48:47  no 
FootballFiles/pbp‐2015.csv 2016‐04‐04 22:48:47 2016‐04‐04 22:48:47  no
18/45
Better Names
file.rename(from=dir('FootballFiles', full.names=TRUE),  
            to=sprintf('FootballFiles/Football%s.csv', 14:15))
[1] TRUE TRUE
19/45
Better Names
dir('FootballFiles')
[1] "Football14.csv" "Football15.csv"
20/45
Make Copies
dir.create('FootballFiles/Backup') 
file.copy(dir('FootballFiles', full.names=TRUE, pattern='.csv'),  
          sprintf('FootballFiles/Backup/Footballl%s.csv', 14:15))
[1] TRUE TRUE
21/45
Make Copies
dir('FootballFiles', recursive=TRUE)
[1] "Backup/Footballl14.csv" "Backup/Footballl15.csv" "Football14.csv"         
[4] "Football15.csv"        
22/45
Count Columns
count.fields('FootballFiles/Football14.csv', sep=',') %>% head(15)
 [1] 45 45 45 45 15 45 45 45 45 45 45 45 45 45 45
count.fields('FootballFiles/Football15.csv', sep=',') %>% head(15)
 [1] 45 45 45 45 45 45 45 45 45 NA 15 45 45 45 45
23/45
Line Count
system('wc ‐l FootballFiles/Football14.csv')
45696 FootballFiles/Football14.csv
system('wc ‐l FootballFiles/Football15.csv')
46278 FootballFiles/Football15.csv
24/45
Reference Files
dataPath <‐ 'FootballFiles' 
file.path(dataPath, 'Football14.csv')
[1] "FootballFiles/Football14.csv"
file.path(dataPath, 'Football15.csv')
[1] "FootballFiles/Football15.csv"
25/45
Read Data
theFiles <‐ dir(dataPath, pattern='.csv', full.names=TRUE) 
games <‐ theFiles %>% map_df(read.csv2, sep=',', header=TRUE, stringsAsFactors=FALSE)
26/45
See the Data
DT::datatable(data=games%>% slice(sample(nrow(games), size=500, replace=FALSE)),  
              rownames=FALSE, 
              options = list( 
                  dom = "rtiS", 
                  scrollY = 400, scrollX=TRUE, 
                  scrollCollapse = TRUE), 
              filter=list(position='top') 
)
27/45
See the Data
Showing 1 to 10 of 500 entries
2016010300 2016-01-03 1 12 31 BUF NYJ
2015120608 2015-12-06 2 15 0 ATL TB
2015122100 2015-12-21 1 15 0 DET NO
2014111610 11/16/2014 1 6 6 DET ARI
2015112904 2015-11-29 2 1 23 IND TB
2015122710 2015-12-27 2 14 7 GB ARI
2014101203 10/12/2014 1 11 20 PIT CLE
2015101102 2015-10-11 2 8 2 CIN SEA
GameId GameDate Quarter Minute Second OffenseTeam DefenseTeam Down ToGo
All All All All All All All All All
28/45
Pass vs Rush
29/45
Focus on One Team's Offense
oneOff <‐ games %>% 
    filter(OffenseTeam == 'NYG', PlayType %in% c('PASS', 'RUSH')) %>% 
    mutate(PlayType=factor(PlayType, levels=c('RUSH', 'PASS')),  
           Down=factor(Down, levels=c(1, 2, 3, 4)))
30/45
Probability of a Pass
passRushMod <‐ glm(PlayType ~ Down + ToGo ‐ 1, data=oneOff, family=binomial) 
coefplot(passRushMod, trans=arm::invlogit, title='Probability of Pass')
31/45
Scenarios
# make grid of scenarios 
scenarios <‐ expand.grid(ToGo=1:15, Down=1:4) %>% as.tbl %>%  
    mutate(Down=factor(Down, levels=c(1, 2, 3, 4))) 
# make prediction based on model 
scenarioPredict <‐ predict(passRushMod,  
                           newdata=scenarios, type='response', se.fit=TRUE) 
# build confidence intervals 
scenarios <‐ scenarios %>% mutate(Prediction=scenarioPredict$fit,  
                                  Lower=Prediction ‐ 2*scenarioPredict$se.fit, 
                                  Upper=Prediction + 2*scenarioPredict$se.fit)
32/45
Scenarios
ToGo Down Prediction Lower Upper
1 1 0.2754536 0.2135514 0.3373558
2 1 0.2959441 0.2371832 0.3547051
3 1 0.3172914 0.2621339 0.3724488
4 1 0.3394361 0.2882498 0.3906223
5 1 0.3623061 0.3153154 0.4092968
6 1 0.3858171 0.3430379 0.4285962
knitr::kable(head(scenarios))
33/45
Probability of Pass
ggplot(scenarios, aes(x=ToGo)) + scale_y_continuous(label=scales::percent) + 
    geom_ribbon(aes(ymin=Lower, ymax=Upper), fill='lightgrey') + 
    geom_line(aes(y=Prediction)) + facet_wrap(~Down, nrow=2)
34/45
Get Eli's Stats
eliPage <‐ read_html('http://www.pro‐football‐reference.com/players/M/MannEl00.htm')
eliStats <‐ eliPage %>% html_nodes("#passing") %>%  
    html_table(header=TRUE) %>% getElement(1) 
useful::topleft(eliStats, c=7, r=8)
   Year Age  Tm Pos No.  G GS 
1  2004  23 NYG  qb  10  9  7 
2  2005  24 NYG  QB  10 16 16 
3  2006  25 NYG  QB  10 16 16 
4  2007  26 NYG  QB  10 16 16 
5 2008*  27 NYG  QB  10 16 16 
6  2009  28 NYG  QB  10 16 16 
7  2010  29 NYG  QB  10 16 16 
8 2011*  30 NYG  QB  10 16 16
35/45
36/45
Save Them
dir.create('results') 
ggsave('results/EliPass.png') 
write.table(eliStats, file='results/eliStats.csv', sep=',', row.names=FALSE)
[1] TRUE
[1] TRUE
37/45
38/45
Commit Them
repo <‐ repository(getwd()) 
add(repo, file.path('results', c('eliPass.png', 'eliStats.csv'))) 
commit(repo, message='Tracking plot and csv') 
push(repo)
39/45
40/45
Email Them
footballResults <‐ mime( 
    To = "jared@landeranalytics.com", 
    From = "jared@jaredlander.com", 
    Subject = "Eli Results", 
    body = "See the attached graph and data.") %>%  
    attach_file('results/EliPass.png') %>%  
    attach_file('results/eliStats.csv') 
send_message(footballResults)
41/45
Things We've Done
Create Directories
Query Directories
Untar Files
Read XKCD
Delete Files
Get File Info
Move Files
Copy Files
Count Columns
Run System Commands
·
·
·
·
·
·
·
·
·
·
Build File Paths
Read Data
Munge Data
Fit a GLM
Make Predictions
Generate Plots
Save Files
Scrape a Website
Commit and Push to Git
Send an Email
·
·
·
·
·
·
·
·
·
·
42/45
Jared P. Lander
Chief Data Scientist of Lander Analytics
Author of R for Everyone
Adjunct Professor at Columbia University
Organizer of New York Open Statistical Programming (The R) Meetup
Website: http://www.jaredlander.com
·
·
·
·
·
43/45
Packages
rvest
ggplot2
dplyr
purrr
coefplot
magritrr
useful
·
·
·
·
·
·
·
44/45
The Tools
R
RStudio
knitr
Pandoc
ioslides
·
·
·
·
·
45/45

More Related Content

Viewers also liked

Viewers also liked (15)

Dr. Datascience or: How I Learned to Stop Munging and Love Tests
Dr. Datascience or: How I Learned to Stop Munging and Love TestsDr. Datascience or: How I Learned to Stop Munging and Love Tests
Dr. Datascience or: How I Learned to Stop Munging and Love Tests
 
Iterating over statistical models: NCAA tournament edition
Iterating over statistical models: NCAA tournament editionIterating over statistical models: NCAA tournament edition
Iterating over statistical models: NCAA tournament edition
 
Building Scalable Prediction Services in R
Building Scalable Prediction Services in RBuilding Scalable Prediction Services in R
Building Scalable Prediction Services in R
 
What We Learned Building an R-Python Hybrid Predictive Analytics Pipeline
What We Learned Building an R-Python Hybrid Predictive Analytics PipelineWhat We Learned Building an R-Python Hybrid Predictive Analytics Pipeline
What We Learned Building an R-Python Hybrid Predictive Analytics Pipeline
 
Improving Data Interoperability for Python and R
Improving Data Interoperability for Python and RImproving Data Interoperability for Python and R
Improving Data Interoperability for Python and R
 
Julia + R for Data Science
Julia + R for Data ScienceJulia + R for Data Science
Julia + R for Data Science
 
Thinking Small About Big Data
Thinking Small About Big DataThinking Small About Big Data
Thinking Small About Big Data
 
I Don't Want to Be a Dummy! Encoding Predictors for Trees
I Don't Want to Be a Dummy! Encoding Predictors for TreesI Don't Want to Be a Dummy! Encoding Predictors for Trees
I Don't Want to Be a Dummy! Encoding Predictors for Trees
 
Data Science Challenges in Personal Program Analysis
Data Science Challenges in Personal Program AnalysisData Science Challenges in Personal Program Analysis
Data Science Challenges in Personal Program Analysis
 
The Political Impact of Social Penumbras
The Political Impact of Social PenumbrasThe Political Impact of Social Penumbras
The Political Impact of Social Penumbras
 
Reflection on the Data Science Profession in NYC
Reflection on the Data Science Profession in NYCReflection on the Data Science Profession in NYC
Reflection on the Data Science Profession in NYC
 
R Packages for Time-Varying Networks and Extremal Dependence
R Packages for Time-Varying Networks and Extremal DependenceR Packages for Time-Varying Networks and Extremal Dependence
R Packages for Time-Varying Networks and Extremal Dependence
 
Broom: Converting Statistical Models to Tidy Data Frames
Broom: Converting Statistical Models to Tidy Data FramesBroom: Converting Statistical Models to Tidy Data Frames
Broom: Converting Statistical Models to Tidy Data Frames
 
The Feels
The FeelsThe Feels
The Feels
 
High-Performance Python
High-Performance PythonHigh-Performance Python
High-Performance Python
 

More from Work-Bench

Cloud Native Infrastructure Management Solutions Compared
Cloud Native Infrastructure Management Solutions ComparedCloud Native Infrastructure Management Solutions Compared
Cloud Native Infrastructure Management Solutions Compared
Work-Bench
 

More from Work-Bench (8)

2017 Enterprise Almanac
2017 Enterprise Almanac2017 Enterprise Almanac
2017 Enterprise Almanac
 
AI to Enable Next Generation of People Managers
AI to Enable Next Generation of People ManagersAI to Enable Next Generation of People Managers
AI to Enable Next Generation of People Managers
 
Startup Recruiting Workbook: Sourcing and Interview Process
Startup Recruiting Workbook: Sourcing and Interview ProcessStartup Recruiting Workbook: Sourcing and Interview Process
Startup Recruiting Workbook: Sourcing and Interview Process
 
Cloud Native Infrastructure Management Solutions Compared
Cloud Native Infrastructure Management Solutions ComparedCloud Native Infrastructure Management Solutions Compared
Cloud Native Infrastructure Management Solutions Compared
 
Building a Demand Generation Machine at MongoDB
Building a Demand Generation Machine at MongoDBBuilding a Demand Generation Machine at MongoDB
Building a Demand Generation Machine at MongoDB
 
How to Market Your Startup to the Enterprise
How to Market Your Startup to the EnterpriseHow to Market Your Startup to the Enterprise
How to Market Your Startup to the Enterprise
 
Marketing & Design for the Enterprise
Marketing & Design for the EnterpriseMarketing & Design for the Enterprise
Marketing & Design for the Enterprise
 
Playing the Marketing Long Game
Playing the Marketing Long GamePlaying the Marketing Long Game
Playing the Marketing Long Game
 

Recently uploaded

+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
Health
 
Abortion pills in Jeddah | +966572737505 | Get Cytotec
Abortion pills in Jeddah | +966572737505 | Get CytotecAbortion pills in Jeddah | +966572737505 | Get Cytotec
Abortion pills in Jeddah | +966572737505 | Get Cytotec
Abortion pills in Riyadh +966572737505 get cytotec
 
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
HyderabadDolls
 
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
ZurliaSoop
 
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
gajnagarg
 
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
gajnagarg
 
In Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi Arabia
In Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi ArabiaIn Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi Arabia
In Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi Arabia
ahmedjiabur940
 
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
HyderabadDolls
 
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
nirzagarg
 
Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1
ranjankumarbehera14
 
Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...
nirzagarg
 

Recently uploaded (20)

+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
 
Statistics notes ,it includes mean to index numbers
Statistics notes ,it includes mean to index numbersStatistics notes ,it includes mean to index numbers
Statistics notes ,it includes mean to index numbers
 
Abortion pills in Jeddah | +966572737505 | Get Cytotec
Abortion pills in Jeddah | +966572737505 | Get CytotecAbortion pills in Jeddah | +966572737505 | Get Cytotec
Abortion pills in Jeddah | +966572737505 | Get Cytotec
 
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
 
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
 
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
 
Digital Transformation Playbook by Graham Ware
Digital Transformation Playbook by Graham WareDigital Transformation Playbook by Graham Ware
Digital Transformation Playbook by Graham Ware
 
Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...
Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...
Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...
 
Vadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book now
Vadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book nowVadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book now
Vadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book now
 
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
 
Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...
Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...
Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...
 
20240412-SmartCityIndex-2024-Full-Report.pdf
20240412-SmartCityIndex-2024-Full-Report.pdf20240412-SmartCityIndex-2024-Full-Report.pdf
20240412-SmartCityIndex-2024-Full-Report.pdf
 
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24  Building Real-Time Pipelines With FLaNKDATA SUMMIT 24  Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
 
Ranking and Scoring Exercises for Research
Ranking and Scoring Exercises for ResearchRanking and Scoring Exercises for Research
Ranking and Scoring Exercises for Research
 
In Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi Arabia
In Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi ArabiaIn Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi Arabia
In Riyadh ((+919101817206)) Cytotec kit @ Abortion Pills Saudi Arabia
 
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
 
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
 
Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...
Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...
Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...
 
Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1
 
Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Tumkur [ 7014168258 ] Call Me For Genuine Models We...
 

R for Everything