SlideShare uma empresa Scribd logo
1 de 121
Baixar para ler offline
Rscript R/master.R 
--port=7137
●
○
○
> rsuite install
Detecting repositories ...
Will use repositories:
CRAN.CRAN = https://mran.microsoft.com/snapshot/2017-10-15
CRAN.CRANextra = http://www.stats.ox.ac.uk/pub/RWin
Other = http://wlog-rsuite.s3.amazonaws.com
Installing RSuite(v0.17x) package ...
installing the source package 'RSuite'
All done.
> rsuite proj start -n spmf
Commands:
update
Checks if newest version of RSuite CLI is installed. If not
installer for newest version is downloaded and installation
is initiated.
install
Install RSuite with all the dependencies.
proj
Use it to manage project, its dependencies, and build
project packages.
repo
Use to manage repositories. e.g. upload packages.
pkgzip
Use to create PKGZIP packages to fillup remove repository.
version
Show RSuite CLI version.
help
Show this message and exit.
Call 'rsuite [command] help' to get information on acceptable [args].
logs/.gitignore
PARAMETERS
●
●
●
○
○
○
○
●
●
●
●
LogLevel: INFO
N_days: 365
solver_max_iterations: 10
solver_opt_horizon: 8
●
●
○ main
○ if __name__ == "__main__":
predmodel
● ==
● >=
● <=
●
master.R
spmf/libs
packages_import.R
master.R
import_training.R (I)
● import/<session_id>/
● work/<session_id>/
library(predmodel)
import_path <- file.path(script_path, "../import")
work_path <- file.path(script_path, "../work")
# required
session_id <- args$get(name = "session_id", default = "201711122000", required = FALSE)
loginfo("--> Session id:%s", session_id)
session_work <- file.path(work_path, session_id)
if(!dir.exists(session_work)) {
dir.create(session_work)
}
import_training_data(file.path(import_path, session_id),
session_work)
import_training.R (II)
devtools
import_training_data
#' @export
import_training_data <- function(import_path, work_path) {
pkg_loginfo("Importing from %s into %s",
import_path,
work_path)
n <- 10000
dt <- data.table(feature1 = rnorm(n), feature2 = rnorm(n))
m <- round(n*0.3)
dt[, resp := c(rep(1, m), rep(0, n - m))]
fwrite(x = dt,
file = file.path(work_path, "training.csv"),
sep = ";")
}
estimate_model.R (I)
●
●
library(predmodel)
work_path <- file.path(script_path, "../work")
# required
session_id <- args$get(name = "session_id", required =
FALSE, default = "201710111655")
loginfo("--> Session id:%s", session_id)
session_work <- file.path(work_path, session_id)
h2o.init(max_mem_size = "4g",
nthreads = 2)
logdebug("---> H2O started")
train_file <- file.path(session_work, "training.csv")
stopifnot(file.exists(train_file))
train_file %>%
transform_training() %>%
estimate_model(session_id) %>%
save_model(session_work)
transform_training
#' @export
transform_training <- function(train_file) {
dt <- h2o.importFile(path = train_file,
destination_frame = "train_dt",
parse = TRUE,
header = TRUE,
sep = ";")
dt$resp <- as.factor(dt$resp)
dt <- h2o.assign(data=dt, key = "train_dt")
return(dt)
}
estimate_model
#'@export
estimate_model <- function(dt, session_id) {
model <- h2o.gbm(x = colnames(dt),
y = "resp",
training_frame = dt,
model_id = sprintf("gbm_%s", session_id),
ntrees = 10,
learn_rate = 0.1)
}
save_model
#' @export
save_model <- function(model, session_work) {
h2o.saveModel(model,
path = session_work,
force =TRUE)
}
import_test.R (I)
● import/<session_id>/
● work/<session_id>/
library(predmodel)
import_path <- file.path(script_path, "../import")
work_path <- file.path(script_path, "../work")
# required
session_id <- args$get(name = "session_id", default = "201711122000", required = FALSE)
loginfo("--> Session id:%s", session_id)
session_work <- file.path(work_path, session_id)
if(!dir.exists(session_work)) {
dir.create(session_work)
}
import_test_data(file.path(import_path, session_id),
session_work)
import_test_data
#' @export
import_test_data <- function(import_path, work_path) {
pkg_loginfo("Importing from %s into %s",
import_path,
work_path)
n <- 1000
dt <- data.table(feature1 = rnorm(n), feature2 = rnorm(n))
fwrite(x = dt,
file = file.path(work_path, "test.csv"),
sep = ";")
}
score_model.R (I)
● work/<score_session_id>
● work/<train_session_id>
● export/<score_session_id>
score_model.R (II)
library(h2o)
library(magrittr)
library(predmodel)
work_path <- file.path(script_path, "../work")
export_path <- file.path(script_path, "../export")
# required
train_session_id <- args$get(name = "train_session_id",
required = FALSE, default = "201710111655")
score_session_id <- args$get(name = "score_session_id",
required = FALSE, default = "201710111655")
loginfo("--> train session id:%s", train_session_id)
loginfo("--> score session id:%s", score_session_id)
score_session_export <- export_path
train_session_work <- file.path(work_path, train_session_id)
score_session_work <- file.path(work_path, score_session_id)
h2o.init(max_mem_size = "4g",
nthreads = 2)
logdebug("---> H2O started")
test_file <- file.path(score_session_work, "test.csv")
model_file <- file.path(train_session_work,
sprintf("gbm_%s", train_session_id))
stopifnot(file.exists(test_file))
stopifnot(file.exists(model_file))
test_dt <- test_file %>%
transform_test()
score_model(test_dt = test_dt,
model_path = model_file) %>%
export_score(export_path = export_path,
score_session_id = score_session_id)
transform_test
#' @export
transform_test <- function(test_file) {
h2o.importFile(path = test_file,
destination_frame = "test_dt",
parse = TRUE,
header = TRUE,
sep = ";")
}
score_model
#' @export
score_model <- function(test_dt, model_path) {
model <- h2o.loadModel(model_path)
pred_dt <- h2o.predict(model, test_dt)
pred_dt
}
export_score
#' @export
export_score <- function(score_dt, score_session_id, export_path) {
score_dt <- as.data.table(score_dt)
score_dt[, score_session_id := score_session_id]
fwrite(x = score_dt,
file = file.path(export_path, "score.csv"),
sep = ";",
append = TRUE)
}
Production
spmf_0.1_001.zip
Production/spmf import export
work
Production/spmf/R
a. Rscript import_training.R
b. Rscript estimate_model.R
c. Rscript import_test.R
d. Rscript score_model.R
Production/spmf/export
print
loginfo("Phase 1 passed")
logdebug("Iter %d done", i)
logtrace("Iter %d done", i)
logwarning("Are you sure?")
logerror("I failed :(")
Packages
pkg_loginfo("Phase 1 passed")
pkg_logdebug("Iter %d done", i)
pkg_logtrace("Iter %d done", i)
pkg_logwarning("Are you sure?")
pkg_logerror("I failed :(")
2017-11-13 13:47:03 INFO::--> Session id:201711122000
2017-11-13 13:47:03 INFO:predmodel:Importing from
C:/Workplace/Sandbox/Production/spmf/R/../import/201711122000 into
C:/Workplace/Sandbox/Production/spmf/R/../work/201711122000
2017-11-13 13:47:14 INFO::--> Session id:201711122000
2017-11-13 13:47:51 INFO::--> Session id:201711131000
2017-11-13 13:47:51 INFO:predmodel:Importing from
C:/Workplace/Sandbox/Production/spmf/R/../import/201711131000 into
C:/Workplace/Sandbox/Production/spmf/R/../work/201711131000
2017-11-13 13:47:57 INFO::--> train session id:201711122000
2017-11-13 13:47:57 INFO::--> score session id:201711131000
LogLevel: INFO
LogLevel: DEBUG
LogLevel: TRACE
import_training.R
tests/test_spmf.R
library(predmodel)
library(testthat)
context("Testing context")
test_that(desc = "Test",
code = {
expect_true(5 > 3)
expect_true(pi < 3)
})
Large scale machine learning projects with R Suite
Large scale machine learning projects with R Suite
Large scale machine learning projects with R Suite
Large scale machine learning projects with R Suite
Large scale machine learning projects with R Suite

Mais conteúdo relacionado

Mais procurados

Oleksandr Tarasenko "Using Kafka in your python applications"
Oleksandr Tarasenko "Using Kafka in your python applications"Oleksandr Tarasenko "Using Kafka in your python applications"
Oleksandr Tarasenko "Using Kafka in your python applications"Fwdays
 
Using Kafka in your python application - Python fwdays 2020
Using Kafka in your python application - Python fwdays 2020Using Kafka in your python application - Python fwdays 2020
Using Kafka in your python application - Python fwdays 2020Oleksandr Tarasenko
 
Global Interpreter Lock: Episode I - Break the Seal
Global Interpreter Lock: Episode I - Break the SealGlobal Interpreter Lock: Episode I - Break the Seal
Global Interpreter Lock: Episode I - Break the SealTzung-Bi Shih
 
Job Queue in Golang
Job Queue in GolangJob Queue in Golang
Job Queue in GolangBo-Yi Wu
 
用 Go 語言打造多台機器 Scale 架構
用 Go 語言打造多台機器 Scale 架構用 Go 語言打造多台機器 Scale 架構
用 Go 語言打造多台機器 Scale 架構Bo-Yi Wu
 
The Power of Rails 2.3 Engines & Templates
The Power of Rails 2.3 Engines & TemplatesThe Power of Rails 2.3 Engines & Templates
The Power of Rails 2.3 Engines & TemplatesTse-Ching Ho
 
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;Tzung-Bi Shih
 
Últimas atualizações de produtividade no Visual Studio 2017​
Últimas atualizações de produtividade no Visual Studio 2017​Últimas atualizações de produtividade no Visual Studio 2017​
Últimas atualizações de produtividade no Visual Studio 2017​Letticia Nicoli
 
What is new with JavaScript in Gnome: The 2021 edition
What is new with JavaScript in Gnome: The 2021 editionWhat is new with JavaScript in Gnome: The 2021 edition
What is new with JavaScript in Gnome: The 2021 editionIgalia
 
PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...
PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...
PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...Tsundere Chen
 
Functional and scale performance tests using zopkio
Functional and scale performance tests using zopkio Functional and scale performance tests using zopkio
Functional and scale performance tests using zopkio Marcelo Araujo
 
Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit,
Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit, Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit,
Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit, Farshid Pirahansiah
 
Server monitoring using grafana and prometheus
Server monitoring using grafana and prometheusServer monitoring using grafana and prometheus
Server monitoring using grafana and prometheusCeline George
 
Golang Project Layout and Practice
Golang Project Layout and PracticeGolang Project Layout and Practice
Golang Project Layout and PracticeBo-Yi Wu
 
Mod06 new development tools
Mod06 new development toolsMod06 new development tools
Mod06 new development toolsPeter Haase
 
"Making OpenCV Code Run Fast," a Presentation from Intel
"Making OpenCV Code Run Fast," a Presentation from Intel"Making OpenCV Code Run Fast," a Presentation from Intel
"Making OpenCV Code Run Fast," a Presentation from IntelEdge AI and Vision Alliance
 
PyHEP 2018: Tools to bind to Python
PyHEP 2018:  Tools to bind to PythonPyHEP 2018:  Tools to bind to Python
PyHEP 2018: Tools to bind to PythonHenry Schreiner
 
Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...
Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...
Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...eCommConf
 

Mais procurados (20)

Source Plugins
Source PluginsSource Plugins
Source Plugins
 
Oleksandr Tarasenko "Using Kafka in your python applications"
Oleksandr Tarasenko "Using Kafka in your python applications"Oleksandr Tarasenko "Using Kafka in your python applications"
Oleksandr Tarasenko "Using Kafka in your python applications"
 
Using Kafka in your python application - Python fwdays 2020
Using Kafka in your python application - Python fwdays 2020Using Kafka in your python application - Python fwdays 2020
Using Kafka in your python application - Python fwdays 2020
 
Global Interpreter Lock: Episode I - Break the Seal
Global Interpreter Lock: Episode I - Break the SealGlobal Interpreter Lock: Episode I - Break the Seal
Global Interpreter Lock: Episode I - Break the Seal
 
Job Queue in Golang
Job Queue in GolangJob Queue in Golang
Job Queue in Golang
 
用 Go 語言打造多台機器 Scale 架構
用 Go 語言打造多台機器 Scale 架構用 Go 語言打造多台機器 Scale 架構
用 Go 語言打造多台機器 Scale 架構
 
The Power of Rails 2.3 Engines & Templates
The Power of Rails 2.3 Engines & TemplatesThe Power of Rails 2.3 Engines & Templates
The Power of Rails 2.3 Engines & Templates
 
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
 
Últimas atualizações de produtividade no Visual Studio 2017​
Últimas atualizações de produtividade no Visual Studio 2017​Últimas atualizações de produtividade no Visual Studio 2017​
Últimas atualizações de produtividade no Visual Studio 2017​
 
What is new with JavaScript in Gnome: The 2021 edition
What is new with JavaScript in Gnome: The 2021 editionWhat is new with JavaScript in Gnome: The 2021 edition
What is new with JavaScript in Gnome: The 2021 edition
 
PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...
PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...
PyCon TW 2017 - PyPy's approach to construct domain-specific language runtime...
 
Functional and scale performance tests using zopkio
Functional and scale performance tests using zopkio Functional and scale performance tests using zopkio
Functional and scale performance tests using zopkio
 
Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit,
Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit, Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit,
Install, Compile, Setup, Setting OpenCV 3.2, Visual C++ 2015, Win 64bit,
 
Server monitoring using grafana and prometheus
Server monitoring using grafana and prometheusServer monitoring using grafana and prometheus
Server monitoring using grafana and prometheus
 
Golang Project Layout and Practice
Golang Project Layout and PracticeGolang Project Layout and Practice
Golang Project Layout and Practice
 
Mod06 new development tools
Mod06 new development toolsMod06 new development tools
Mod06 new development tools
 
CI-CD WITH GITLAB WORKFLOW
CI-CD WITH GITLAB WORKFLOWCI-CD WITH GITLAB WORKFLOW
CI-CD WITH GITLAB WORKFLOW
 
"Making OpenCV Code Run Fast," a Presentation from Intel
"Making OpenCV Code Run Fast," a Presentation from Intel"Making OpenCV Code Run Fast," a Presentation from Intel
"Making OpenCV Code Run Fast," a Presentation from Intel
 
PyHEP 2018: Tools to bind to Python
PyHEP 2018:  Tools to bind to PythonPyHEP 2018:  Tools to bind to Python
PyHEP 2018: Tools to bind to Python
 
Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...
Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...
Tim Panton - Presentation at Emerging Communications Conference & Awards (eCo...
 

Semelhante a Large scale machine learning projects with R Suite

Writing and Publishing Puppet Modules - PuppetConf 2014
Writing and Publishing Puppet Modules - PuppetConf 2014Writing and Publishing Puppet Modules - PuppetConf 2014
Writing and Publishing Puppet Modules - PuppetConf 2014Puppet
 
Burn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websitesBurn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websitesLindsay Holmwood
 
Nko workshop - node js crud & deploy
Nko workshop - node js crud & deployNko workshop - node js crud & deploy
Nko workshop - node js crud & deploySimon Su
 
Writing and Publishing Puppet Modules
Writing and Publishing Puppet ModulesWriting and Publishing Puppet Modules
Writing and Publishing Puppet ModulesPuppet
 
Lean Php Presentation
Lean Php PresentationLean Php Presentation
Lean Php PresentationAlan Pinstein
 
Railsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshareRailsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slidesharetomcopeland
 
Pyramid Deployment and Maintenance
Pyramid Deployment and MaintenancePyramid Deployment and Maintenance
Pyramid Deployment and MaintenanceJazkarta, Inc.
 
Zero Downtime Deployment with Ansible
Zero Downtime Deployment with AnsibleZero Downtime Deployment with Ansible
Zero Downtime Deployment with AnsibleStein Inge Morisbak
 
Rntb20200805
Rntb20200805Rntb20200805
Rntb20200805t k
 
IR Journal (itscholar.codegency.co.in).pdf
IR Journal (itscholar.codegency.co.in).pdfIR Journal (itscholar.codegency.co.in).pdf
IR Journal (itscholar.codegency.co.in).pdfRahulRoy130127
 
Protractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsProtractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsLudmila Nesvitiy
 
Hello click click boom
Hello click click boomHello click click boom
Hello click click boomsymbian_mgl
 
From Dev to DevOps - Codemotion ES 2012
From Dev to DevOps - Codemotion ES 2012From Dev to DevOps - Codemotion ES 2012
From Dev to DevOps - Codemotion ES 2012Carlos Sanchez
 
PerlDancer for Perlers (FOSDEM 2011)
PerlDancer for Perlers (FOSDEM 2011)PerlDancer for Perlers (FOSDEM 2011)
PerlDancer for Perlers (FOSDEM 2011)xSawyer
 
fog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloudfog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the CloudWesley Beary
 
Dynamic Tracing of your AMP web site
Dynamic Tracing of your AMP web siteDynamic Tracing of your AMP web site
Dynamic Tracing of your AMP web siteSriram Natarajan
 
Let's play with adf 3.0
Let's play with adf 3.0Let's play with adf 3.0
Let's play with adf 3.0Eugenio Romano
 
Тестирование и Django
Тестирование и DjangoТестирование и Django
Тестирование и DjangoMoscowDjango
 
VPN Access Runbook
VPN Access RunbookVPN Access Runbook
VPN Access RunbookTaha Shakeel
 

Semelhante a Large scale machine learning projects with R Suite (20)

Writing and Publishing Puppet Modules - PuppetConf 2014
Writing and Publishing Puppet Modules - PuppetConf 2014Writing and Publishing Puppet Modules - PuppetConf 2014
Writing and Publishing Puppet Modules - PuppetConf 2014
 
Burn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websitesBurn down the silos! Helping dev and ops gel on high availability websites
Burn down the silos! Helping dev and ops gel on high availability websites
 
Nko workshop - node js crud & deploy
Nko workshop - node js crud & deployNko workshop - node js crud & deploy
Nko workshop - node js crud & deploy
 
Writing and Publishing Puppet Modules
Writing and Publishing Puppet ModulesWriting and Publishing Puppet Modules
Writing and Publishing Puppet Modules
 
Lean Php Presentation
Lean Php PresentationLean Php Presentation
Lean Php Presentation
 
Nativescript angular
Nativescript angularNativescript angular
Nativescript angular
 
Railsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshareRailsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshare
 
Pyramid Deployment and Maintenance
Pyramid Deployment and MaintenancePyramid Deployment and Maintenance
Pyramid Deployment and Maintenance
 
Zero Downtime Deployment with Ansible
Zero Downtime Deployment with AnsibleZero Downtime Deployment with Ansible
Zero Downtime Deployment with Ansible
 
Rntb20200805
Rntb20200805Rntb20200805
Rntb20200805
 
IR Journal (itscholar.codegency.co.in).pdf
IR Journal (itscholar.codegency.co.in).pdfIR Journal (itscholar.codegency.co.in).pdf
IR Journal (itscholar.codegency.co.in).pdf
 
Protractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsProtractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applications
 
Hello click click boom
Hello click click boomHello click click boom
Hello click click boom
 
From Dev to DevOps - Codemotion ES 2012
From Dev to DevOps - Codemotion ES 2012From Dev to DevOps - Codemotion ES 2012
From Dev to DevOps - Codemotion ES 2012
 
PerlDancer for Perlers (FOSDEM 2011)
PerlDancer for Perlers (FOSDEM 2011)PerlDancer for Perlers (FOSDEM 2011)
PerlDancer for Perlers (FOSDEM 2011)
 
fog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloudfog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloud
 
Dynamic Tracing of your AMP web site
Dynamic Tracing of your AMP web siteDynamic Tracing of your AMP web site
Dynamic Tracing of your AMP web site
 
Let's play with adf 3.0
Let's play with adf 3.0Let's play with adf 3.0
Let's play with adf 3.0
 
Тестирование и Django
Тестирование и DjangoТестирование и Django
Тестирование и Django
 
VPN Access Runbook
VPN Access RunbookVPN Access Runbook
VPN Access Runbook
 

Último

Non Text Magic Studio Magic Design for Presentations L&P.pdf
Non Text Magic Studio Magic Design for Presentations L&P.pdfNon Text Magic Studio Magic Design for Presentations L&P.pdf
Non Text Magic Studio Magic Design for Presentations L&P.pdfPratikPatil591646
 
Presentation of project of business person who are success
Presentation of project of business person who are successPresentation of project of business person who are success
Presentation of project of business person who are successPratikSingh115843
 
Statistics For Management by Richard I. Levin 8ed.pdf
Statistics For Management by Richard I. Levin 8ed.pdfStatistics For Management by Richard I. Levin 8ed.pdf
Statistics For Management by Richard I. Levin 8ed.pdfnikeshsingh56
 
IBEF report on the Insurance market in India
IBEF report on the Insurance market in IndiaIBEF report on the Insurance market in India
IBEF report on the Insurance market in IndiaManalVerma4
 
Decoding Movie Sentiments: Analyzing Reviews with Data Analysis model
Decoding Movie Sentiments: Analyzing Reviews with Data Analysis modelDecoding Movie Sentiments: Analyzing Reviews with Data Analysis model
Decoding Movie Sentiments: Analyzing Reviews with Data Analysis modelBoston Institute of Analytics
 
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...Dr Arash Najmaei ( Phd., MBA, BSc)
 
Digital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing worksDigital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing worksdeepakthakur548787
 
why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...
why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...
why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...Jack Cole
 
Digital Indonesia Report 2024 by We Are Social .pdf
Digital Indonesia Report 2024 by We Are Social .pdfDigital Indonesia Report 2024 by We Are Social .pdf
Digital Indonesia Report 2024 by We Are Social .pdfNicoChristianSunaryo
 
Role of Consumer Insights in business transformation
Role of Consumer Insights in business transformationRole of Consumer Insights in business transformation
Role of Consumer Insights in business transformationAnnie Melnic
 
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdfEnglish-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdfblazblazml
 
DATA ANALYSIS using various data sets like shoping data set etc
DATA ANALYSIS using various data sets like shoping data set etcDATA ANALYSIS using various data sets like shoping data set etc
DATA ANALYSIS using various data sets like shoping data set etclalithasri22
 
Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...
Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...
Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...Boston Institute of Analytics
 
Bank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis ProjectBank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis ProjectBoston Institute of Analytics
 

Último (17)

Non Text Magic Studio Magic Design for Presentations L&P.pdf
Non Text Magic Studio Magic Design for Presentations L&P.pdfNon Text Magic Studio Magic Design for Presentations L&P.pdf
Non Text Magic Studio Magic Design for Presentations L&P.pdf
 
Presentation of project of business person who are success
Presentation of project of business person who are successPresentation of project of business person who are success
Presentation of project of business person who are success
 
Statistics For Management by Richard I. Levin 8ed.pdf
Statistics For Management by Richard I. Levin 8ed.pdfStatistics For Management by Richard I. Levin 8ed.pdf
Statistics For Management by Richard I. Levin 8ed.pdf
 
2023 Survey Shows Dip in High School E-Cigarette Use
2023 Survey Shows Dip in High School E-Cigarette Use2023 Survey Shows Dip in High School E-Cigarette Use
2023 Survey Shows Dip in High School E-Cigarette Use
 
IBEF report on the Insurance market in India
IBEF report on the Insurance market in IndiaIBEF report on the Insurance market in India
IBEF report on the Insurance market in India
 
Decoding Movie Sentiments: Analyzing Reviews with Data Analysis model
Decoding Movie Sentiments: Analyzing Reviews with Data Analysis modelDecoding Movie Sentiments: Analyzing Reviews with Data Analysis model
Decoding Movie Sentiments: Analyzing Reviews with Data Analysis model
 
Data Analysis Project: Stroke Prediction
Data Analysis Project: Stroke PredictionData Analysis Project: Stroke Prediction
Data Analysis Project: Stroke Prediction
 
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
6 Tips for Interpretable Topic Models _ by Nicha Ruchirawat _ Towards Data Sc...
 
Digital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing worksDigital Marketing Plan, how digital marketing works
Digital Marketing Plan, how digital marketing works
 
why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...
why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...
why-transparency-and-traceability-are-essential-for-sustainable-supply-chains...
 
Insurance Churn Prediction Data Analysis Project
Insurance Churn Prediction Data Analysis ProjectInsurance Churn Prediction Data Analysis Project
Insurance Churn Prediction Data Analysis Project
 
Digital Indonesia Report 2024 by We Are Social .pdf
Digital Indonesia Report 2024 by We Are Social .pdfDigital Indonesia Report 2024 by We Are Social .pdf
Digital Indonesia Report 2024 by We Are Social .pdf
 
Role of Consumer Insights in business transformation
Role of Consumer Insights in business transformationRole of Consumer Insights in business transformation
Role of Consumer Insights in business transformation
 
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdfEnglish-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
English-8-Q4-W3-Synthesizing-Essential-Information-From-Various-Sources-1.pdf
 
DATA ANALYSIS using various data sets like shoping data set etc
DATA ANALYSIS using various data sets like shoping data set etcDATA ANALYSIS using various data sets like shoping data set etc
DATA ANALYSIS using various data sets like shoping data set etc
 
Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...
Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...
Data Analysis Project Presentation: Unveiling Your Ideal Customer, Bank Custo...
 
Bank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis ProjectBank Loan Approval Analysis: A Comprehensive Data Analysis Project
Bank Loan Approval Analysis: A Comprehensive Data Analysis Project
 

Large scale machine learning projects with R Suite

  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 18.
  • 19.
  • 20.
  • 21.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31. > rsuite install Detecting repositories ... Will use repositories: CRAN.CRAN = https://mran.microsoft.com/snapshot/2017-10-15 CRAN.CRANextra = http://www.stats.ox.ac.uk/pub/RWin Other = http://wlog-rsuite.s3.amazonaws.com Installing RSuite(v0.17x) package ... installing the source package 'RSuite' All done.
  • 32.
  • 33. > rsuite proj start -n spmf
  • 34. Commands: update Checks if newest version of RSuite CLI is installed. If not installer for newest version is downloaded and installation is initiated. install Install RSuite with all the dependencies. proj Use it to manage project, its dependencies, and build project packages. repo Use to manage repositories. e.g. upload packages. pkgzip Use to create PKGZIP packages to fillup remove repository. version Show RSuite CLI version. help Show this message and exit. Call 'rsuite [command] help' to get information on acceptable [args].
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 59.
  • 60. ● ● ○ main ○ if __name__ == "__main__":
  • 61.
  • 62.
  • 63.
  • 64.
  • 65.
  • 67.
  • 70.
  • 74.
  • 75.
  • 76.
  • 77. import_training.R (I) ● import/<session_id>/ ● work/<session_id>/ library(predmodel) import_path <- file.path(script_path, "../import") work_path <- file.path(script_path, "../work") # required session_id <- args$get(name = "session_id", default = "201711122000", required = FALSE) loginfo("--> Session id:%s", session_id) session_work <- file.path(work_path, session_id) if(!dir.exists(session_work)) { dir.create(session_work) } import_training_data(file.path(import_path, session_id), session_work)
  • 79.
  • 81. import_training_data #' @export import_training_data <- function(import_path, work_path) { pkg_loginfo("Importing from %s into %s", import_path, work_path) n <- 10000 dt <- data.table(feature1 = rnorm(n), feature2 = rnorm(n)) m <- round(n*0.3) dt[, resp := c(rep(1, m), rep(0, n - m))] fwrite(x = dt, file = file.path(work_path, "training.csv"), sep = ";") }
  • 82.
  • 83.
  • 84. estimate_model.R (I) ● ● library(predmodel) work_path <- file.path(script_path, "../work") # required session_id <- args$get(name = "session_id", required = FALSE, default = "201710111655") loginfo("--> Session id:%s", session_id) session_work <- file.path(work_path, session_id) h2o.init(max_mem_size = "4g", nthreads = 2) logdebug("---> H2O started") train_file <- file.path(session_work, "training.csv") stopifnot(file.exists(train_file)) train_file %>% transform_training() %>% estimate_model(session_id) %>% save_model(session_work)
  • 85. transform_training #' @export transform_training <- function(train_file) { dt <- h2o.importFile(path = train_file, destination_frame = "train_dt", parse = TRUE, header = TRUE, sep = ";") dt$resp <- as.factor(dt$resp) dt <- h2o.assign(data=dt, key = "train_dt") return(dt) }
  • 86. estimate_model #'@export estimate_model <- function(dt, session_id) { model <- h2o.gbm(x = colnames(dt), y = "resp", training_frame = dt, model_id = sprintf("gbm_%s", session_id), ntrees = 10, learn_rate = 0.1) }
  • 87. save_model #' @export save_model <- function(model, session_work) { h2o.saveModel(model, path = session_work, force =TRUE) }
  • 88.
  • 89.
  • 90. import_test.R (I) ● import/<session_id>/ ● work/<session_id>/ library(predmodel) import_path <- file.path(script_path, "../import") work_path <- file.path(script_path, "../work") # required session_id <- args$get(name = "session_id", default = "201711122000", required = FALSE) loginfo("--> Session id:%s", session_id) session_work <- file.path(work_path, session_id) if(!dir.exists(session_work)) { dir.create(session_work) } import_test_data(file.path(import_path, session_id), session_work)
  • 91. import_test_data #' @export import_test_data <- function(import_path, work_path) { pkg_loginfo("Importing from %s into %s", import_path, work_path) n <- 1000 dt <- data.table(feature1 = rnorm(n), feature2 = rnorm(n)) fwrite(x = dt, file = file.path(work_path, "test.csv"), sep = ";") }
  • 92.
  • 93.
  • 94. score_model.R (I) ● work/<score_session_id> ● work/<train_session_id> ● export/<score_session_id>
  • 95. score_model.R (II) library(h2o) library(magrittr) library(predmodel) work_path <- file.path(script_path, "../work") export_path <- file.path(script_path, "../export") # required train_session_id <- args$get(name = "train_session_id", required = FALSE, default = "201710111655") score_session_id <- args$get(name = "score_session_id", required = FALSE, default = "201710111655") loginfo("--> train session id:%s", train_session_id) loginfo("--> score session id:%s", score_session_id) score_session_export <- export_path train_session_work <- file.path(work_path, train_session_id) score_session_work <- file.path(work_path, score_session_id) h2o.init(max_mem_size = "4g", nthreads = 2) logdebug("---> H2O started") test_file <- file.path(score_session_work, "test.csv") model_file <- file.path(train_session_work, sprintf("gbm_%s", train_session_id)) stopifnot(file.exists(test_file)) stopifnot(file.exists(model_file)) test_dt <- test_file %>% transform_test() score_model(test_dt = test_dt, model_path = model_file) %>% export_score(export_path = export_path, score_session_id = score_session_id)
  • 96. transform_test #' @export transform_test <- function(test_file) { h2o.importFile(path = test_file, destination_frame = "test_dt", parse = TRUE, header = TRUE, sep = ";") }
  • 97. score_model #' @export score_model <- function(test_dt, model_path) { model <- h2o.loadModel(model_path) pred_dt <- h2o.predict(model, test_dt) pred_dt }
  • 98. export_score #' @export export_score <- function(score_dt, score_session_id, export_path) { score_dt <- as.data.table(score_dt) score_dt[, score_session_id := score_session_id] fwrite(x = score_dt, file = file.path(export_path, "score.csv"), sep = ";", append = TRUE) }
  • 99.
  • 100.
  • 101.
  • 102.
  • 103.
  • 104.
  • 106. Production/spmf/R a. Rscript import_training.R b. Rscript estimate_model.R c. Rscript import_test.R d. Rscript score_model.R Production/spmf/export
  • 107.
  • 108. print
  • 109.
  • 110. loginfo("Phase 1 passed") logdebug("Iter %d done", i) logtrace("Iter %d done", i) logwarning("Are you sure?") logerror("I failed :(") Packages pkg_loginfo("Phase 1 passed") pkg_logdebug("Iter %d done", i) pkg_logtrace("Iter %d done", i) pkg_logwarning("Are you sure?") pkg_logerror("I failed :(")
  • 111. 2017-11-13 13:47:03 INFO::--> Session id:201711122000 2017-11-13 13:47:03 INFO:predmodel:Importing from C:/Workplace/Sandbox/Production/spmf/R/../import/201711122000 into C:/Workplace/Sandbox/Production/spmf/R/../work/201711122000 2017-11-13 13:47:14 INFO::--> Session id:201711122000 2017-11-13 13:47:51 INFO::--> Session id:201711131000 2017-11-13 13:47:51 INFO:predmodel:Importing from C:/Workplace/Sandbox/Production/spmf/R/../import/201711131000 into C:/Workplace/Sandbox/Production/spmf/R/../work/201711131000 2017-11-13 13:47:57 INFO::--> train session id:201711122000 2017-11-13 13:47:57 INFO::--> score session id:201711131000
  • 114.
  • 115.