SlideShare uma empresa Scribd logo
1 de 73
Baixar para ler offline
> me
$name
[1] "Takashi Kitano"
$twitter
[1] "@kashitan"
$work_in
[1] " " -> " ( 6/30)"
新しい時代が
始まって2ヶ⽉
みなさまいかが
お過ごしでしょうか?
⼼機⼀転した⽅も
多いのでは
そうだ
Rで読書Hack
しよう
titles <- c(" ", " ",
" ", " ",
"7 ")
books <- dplyr::tibble(
docId = as.character(1:length(titles)),
title = titles,
author = c(rep(" ", 2),
rep(" F ", 2),
" R "))
books %>% knitr::kable()
docId title author
1
2
3 F
4 F
5 7 R
sentDf %>% head() %>% knitr::kable()
docId sectionId sentenceId sentence
1 1_0006 1_0006_0001 ──
1 1_0006 1_0006_0002
1 1_0006 1_0006_0003
1 1_0006 1_0006_0004
1 1_0006 1_0006_0005
1 1_0006 1_0006_0006
tokenDf <- sentDf %>%
as.data.frame() %>%
RMeCab::RMeCabDF("sentence", 1) %>%
purrr::set_names(nm = sentDf$sentenceId) %>%
purrr::map2_dfr(.x = ., .y = names(.), .f = function(x, y) {
tibble(docId = stringr::str_replace(y, "_.*", ""),
sectionId = stringr::str_replace(y, "(.+_.*)_.*", "1"),
sentenceId = y,
token = x,
hinshi = names(x))
})
docId sectionId sentenceId token hinshi
1 1_0006 1_0006_0001
1 1_0006 1_0006_0001 ─
1 1_0006 1_0006_0001 ─
1 1_0006 1_0006_0001
1 1_0006 1_0006_0001
1 1_0006 1_0006_0001
1 1_0006 1_0006_0002
1 1_0006 1_0006_0002
1 1_0006 1_0006_0002
1 1_0006 1_0006_0002
tokenDf %>% head(n = 10) %>% knitr::kable()
https://www.intage.co.jp/glossary/400/
https://www.gastonsanchez.com/visually-enforced/how-to/2012/07/19/Correspondence-Analysis/
https://www.gastonsanchez.com/visually-enforced/how-to/2012/07/19/Correspondence-Analysis/
https://youtu.be/dE10fBCDWQc
sw <- c(" ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ")
CA_in <- tokenDf %>%
dplyr::filter(hinshi %in% c(" ", " ")) %>%
dplyr::filter(! token %in% sw) %>%
dplyr::count(docId, token) %>%
dplyr::ungroup() %>%
dplyr::inner_join(books, by = "docId") %>%
dplyr::select(token, title, n) %>%
tidyr::spread(key = title, value=n, fill = 0) %>%
dplyr::mutate(max = pmax(!!!rlang::syms(titles))) %>%
dplyr::top_n(n = 60, wt = max) %>%
as.data.frame()
rownames(CA_in) <- CA_in$token
CA_in <- CA_in %>%
select(one_of(titles))
CA_in %>% head() %>% knitr::kable()
7
0 1 0 0 269
17 15 166 431 110
0 0 0 0 444
148 116 1 15 538
103 71 69 94 246
182 175 87 87 236
FactoMineR::CA(CA_in)
『対応分析⼊⾨』の著者・藤本先⽣が
issue上げるも2年近く放置。。。
d <- dplyr::tibble(
x = CA_out$row$coord[, 1],
y = CA_out$row$coord[, 2],
label = rownames(CA_out$row$coord),
type = "row"
) %>%
dplyr::bind_rows(tibble(
x = CA_out$col$coord[, 1],
y = CA_out$col$coord[, 2],
label = rownames(CA_out$col$coord),
type = "col"
))
d %>% head(n = 12) %>% knitr::kable()
x y label type
-0.4651474 -1.4411540 row
0.9067268 0.1419623 row
-0.4639592 -1.4488050 row
-0.5503475 -0.6609927 row
-0.0656860 -0.2595280 row
-0.2077013 0.0100741 row
0.0335994 0.0251759 row
-0.5759668 0.0906504 row
-0.1080142 0.2464559 row
-0.0559726 0.0791701 row
labels <- glue::glue("Dim {axis} ({format(var, digits = 3, nsmall =
1)} %)",
axis = c(1, 2), var = CA_out$eig[1:2, 2])
d %>%
ggplot2::ggplot(aes(x = x, y = y, label = label,
shape = type, colour = type)) +
ggplot2::geom_vline(xintercept = 0, linetype = "dashed") +
ggplot2::geom_hline(yintercept = 0, linetype = "dashed") +
ggplot2::geom_point() +
ggrepel::geom_text_repel(family = "HiraMaruProN-W4") +
ggplot2::scale_shape_manual(values = c(17, 16)) +
ggplot2::xlab(labels[1]) +
ggplot2::ylab(labels[2]) +
ggplot2::ggtitle("CA - Biplot") +
ggplot2::theme(legend.position = "none")
TF-IDFで
対応分析しよう
CA_in <- tokenDf %>%
dplyr::filter(hinshi %in% c(" ", " ")) %>%
dplyr::filter(! token %in% sw) %>%
dplyr::count(docId, token) %>%
dplyr::ungroup() %>%
tidytext::bind_tf_idf(term = token, document = docId, n = n) %>%
dplyr::inner_join(books, by = "docId") %>%
dplyr::select(token, title, tf_idf) %>%
tidyr::spread(key = title, value=tf_idf, fill = 0.0) %>%
dplyr::mutate(tfidf_max = pmax(!!!rlang::syms(titles))) %>%
dplyr::top_n(n = 60, wt = tfidf_max) %>%
as.data.frame()
rownames(CA_in) <- CA_in$token
CA_in <- CA_in %>%
select(one_of(titles))
CA_in %>% head() %>% knitr::kable()
7
0.00E+00 0.0000224 0 0.00E+00 0.0036436
5.91E-05 0.0019978 0 0.00E+00 0.0000151
5.91E-05 0.0019853 0 0.00E+00 0.0000151
0.00E+00 0.0000000 0 0.00E+00 0.0014751
Ⅱ 0.00E+00 0.0000000 0 2.49E-05 0.0008940
0.00E+00 0.0000000 0 0.00E+00 0.0025933


トピックモデル
で分析してみよう
https://www.albert2005.co.jp/knowledge/machine_learning/topic_model/about_topic_model
https://youtu.be/dE10fBCDWQc
LDA_in <- tokenDf %>%
dplyr::filter(hinshi %in% c(" ", " ")) %>%
dplyr::filter(! token %in% sw) %>%
dplyr::group_by(docId, token) %>%
dplyr::count(token) %>%
dplyr::ungroup() %>%
dplyr::group_by(token) %>%
dplyr::mutate(total = sum(n)) %>%
dplyr::ungroup() %>%
dplyr::top_n(n = 5000, wt = total) %>%
dplyr::inner_join(books, by = "docId") %>%
tidytext::cast_dtm(title, token, n)
LDA_out <- topicmodels::LDA(LDA_in, k = 5,
control = list(seed = 123))
LDA_out %>%
tidytext::tidy(matrix = "gamma") %>%
ggplot2::ggplot(aes(factor(topic), gamma)) +
ggplot2::geom_boxplot() +
ggplot2::facet_wrap(~ document) +
ggplot2::theme_minimal(base_family = "HiraMaruProN-W4")
LDA_out %>%
tidytext::tidy() %>%
dplyr::group_by(topic) %>%
dplyr::top_n(n = 10, wt = beta) %>%
dplyr::ungroup() %>%
dplyr::mutate(term = reorder(term, beta)) %>%
ggplot2::ggplot(aes(term, beta, fill = factor(topic))) +
ggplot2::geom_col(show.legend = FALSE) +
ggplot2::facet_wrap(~ topic, scales = "free_y") +
ggplot2::coord_flip() +
ggplot2::theme_minimal(base_family = "HiraMaruProN-W4")


https://logmi.jp/business/articles/156592
⽂章要約しよう
https://qiita.com/icoxfog417/items/d06651db10e27220c819
https://qiita.com/icoxfog417/items/d06651db10e27220c819
tokenDf <- tokenDf %>%
dplyr::filter(docId == "1") %>%
dplyr::group_by(sectionId) %>%
tidyr::nest()
tokenDf %>% head()
# A tibble: 6 x 2
sectionId data
<chr> <list>
1 1_0006 <tibble [2,642 × 4]>
2 1_0007 <tibble [1,432 × 4]>
3 1_0009 <tibble [5,928 × 4]>
4 1_0010 <tibble [3,755 × 4]>
5 1_0011 <tibble [5,135 × 4]>
6 1_0013 <tibble [3,440 × 4]>
tokenDf$data[[1]] %>%
head() %>%
knitr::kable()
docId sentenceId token hinshi
1 1_0006_0001
1 1_0006_0001 ─
1 1_0006_0001 ─
1 1_0006_0001
1 1_0006_0001
1 1_0006_0001
tokenDf$data[[2]] %>%
head() %>%
knitr::kable()
docId sentenceId token hinshi
1 1_0007_0001
1 1_0007_0001
1 1_0007_0001
1 1_0007_0001
1 1_0007_0001
1 1_0007_0001
simDf <- tokenDf %>%
# lexRankr::sentenceSimil()
dplyr::mutate(sentenceId = purrr::map(.$data, ~.$sentenceId),
token = purrr::map(.$data, ~.$token),
docId = purrr::map2(sectionId, token,
function(x, y) {
rep(x, length(y))
}
)) %>%
#
dplyr::mutate(simil = purrr::pmap(list(sentenceId, token, docId),
lexRankr::sentenceSimil)) %>%
select(sectionId, simil)
simDf %>% head()
# A tibble: 6 x 2
sectionId simil
<chr> <list>
1 1_0006 <df[,3] [5,671 × 3]>
2 1_0007 <df[,3] [2,080 × 3]>
3 1_0009 <df[,3] [39,340 × 3]>
4 1_0010 <df[,3] [13,695 × 3]>
5 1_0011 <df[,3] [25,651 × 3]>
6 1_0013 <df[,3] [12,880 × 3]>
simDf$simil[[1]] %>% head() %>% knitr::kable()
sent1 sent2 similVal
1_0006_0001 1_0006_0002 0.5045201
1_0006_0001 1_0006_0003 0.4682931
1_0006_0001 1_0006_0004 0.0000000
1_0006_0001 1_0006_0005 0.5541319
1_0006_0001 1_0006_0006 0.5856045
1_0006_0001 1_0006_0007 0.4752808
topNSents <- simDf %>%
# lexRankr::lexRankFromSimil
dplyr::mutate(s1 = purrr::map(.$simil, ~.$sent1),
s2 = purrr::map(.$simil, ~.$sent2),
simil = purrr::map(.$simil, ~.$similVal),
n = purrr::map(.$sentenceId, function(x) {
as.integer(ceiling(dplyr::n_distinct(x) * 0.1))
})) %>%
#
dplyr::mutate(topN = purrr::pmap(list(s1, s2, simil, n),
lexRankr::lexRankFromSimil,
threshold = 0.2,
continuous = TRUE))
topNSents %>% head()
# A tibble: 6 x 2
sectionId topN
<chr> <list>
1 1_0006 <df[,2] [11 × 2]>
2 1_0007 <df[,2] [7 × 2]>
3 1_0009 <df[,2] [29 × 2]>
4 1_0010 <df[,2] [17 × 2]>
5 1_0011 <df[,2] [23 × 2]>
6 1_0013 <df[,2] [17 × 2]>
topNSents$topN[[1]] %>% knitr::kable()
sentenceId value
1_0006_0012 0.0110008
1_0006_0025 0.0110065
1_0006_0027 0.0109621
1_0006_0028 0.0110488
1_0006_0033 0.0109785
1_0006_0040 0.0110083
1_0006_0041 0.0110468
1_0006_0047 0.0110727
1_0006_0059 0.0110430
1_0006_0085 0.0110101
1_0006_0086 0.0109732
res <- topNSents %>%
select(topN) %>%
tidyr::unnest() %>%
dplyr::inner_join(sentDf, by = “sentenceId")
res %>% head() %>% knitr::kable()
sentenceI
d
value docI
d
section
Id
sentence
1_0006_0
012
0.0110008 1 1_0006
1_0006_0
025
0.0110065 1 1_0006
1_0006_0
027
0.0109621 1 1_0006
1_0006_0
028
0.0110488 1 1_0006
1_0006_0
033
0.0109785 1 1_0006
1_0006_0
040
0.0110083 1 1_0006 ──
sentDf %>%
filter(docId == "1") %>%
group_by(sectionId) %>%
count()
# A tibble: 31 x 2
# Groups: sectionId [31]
sectionId n
<chr> <int>
1 1_0006 107
2 1_0007 65
3 1_0009 281
4 1_0010 166
5 1_0011 227
6 1_0013 161
res %>%
filter(docId == "1") %>%
group_by(sectionId) %>%
count()
# A tibble: 31 x 2
# Groups: sectionId [31]
sectionId n
<chr> <int>
1 1_0006 11
2 1_0007 7
3 1_0009 29
4 1_0010 17
5 1_0011 23
6 1_0013 17
令和から本気出す
令和から本気出す
令和から本気出す

Mais conteúdo relacionado

Mais procurados

Webmontag Berlin "coffee script"
Webmontag Berlin "coffee script"Webmontag Berlin "coffee script"
Webmontag Berlin "coffee script"
Webmontag Berlin
 
Clustering com numpy e cython
Clustering com numpy e cythonClustering com numpy e cython
Clustering com numpy e cython
Anderson Dantas
 
Let’s Talk About Ruby
Let’s Talk About RubyLet’s Talk About Ruby
Let’s Talk About Ruby
Ian Bishop
 
Magicke metody v Pythonu
Magicke metody v PythonuMagicke metody v Pythonu
Magicke metody v Pythonu
Jirka Vejrazka
 
Python data structures
Python data structuresPython data structures
Python data structures
Harry Potter
 

Mais procurados (18)

Webmontag Berlin "coffee script"
Webmontag Berlin "coffee script"Webmontag Berlin "coffee script"
Webmontag Berlin "coffee script"
 
Clustering com numpy e cython
Clustering com numpy e cythonClustering com numpy e cython
Clustering com numpy e cython
 
Pre-Bootcamp introduction to Elixir
Pre-Bootcamp introduction to ElixirPre-Bootcamp introduction to Elixir
Pre-Bootcamp introduction to Elixir
 
Coding Horrors
Coding HorrorsCoding Horrors
Coding Horrors
 
An Elephant of a Different Colour: Hack
An Elephant of a Different Colour: HackAn Elephant of a Different Colour: Hack
An Elephant of a Different Colour: Hack
 
Python 1
Python 1Python 1
Python 1
 
Modern Application Foundations: Underscore and Twitter Bootstrap
Modern Application Foundations: Underscore and Twitter BootstrapModern Application Foundations: Underscore and Twitter Bootstrap
Modern Application Foundations: Underscore and Twitter Bootstrap
 
Association Rule Mining with R
Association Rule Mining with RAssociation Rule Mining with R
Association Rule Mining with R
 
Let’s Talk About Ruby
Let’s Talk About RubyLet’s Talk About Ruby
Let’s Talk About Ruby
 
Elixir
ElixirElixir
Elixir
 
Magicke metody v Pythonu
Magicke metody v PythonuMagicke metody v Pythonu
Magicke metody v Pythonu
 
ICP2014: Pimp dein Apigility
ICP2014: Pimp dein ApigilityICP2014: Pimp dein Apigility
ICP2014: Pimp dein Apigility
 
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
 
Intoduction to php arrays
Intoduction to php arraysIntoduction to php arrays
Intoduction to php arrays
 
Groovy collection api
Groovy collection apiGroovy collection api
Groovy collection api
 
WordPress 3.1 at DC PHP
WordPress 3.1 at DC PHPWordPress 3.1 at DC PHP
WordPress 3.1 at DC PHP
 
جلسه سوم پایتون برای هکر های قانونی دوره مقدماتی پاییز ۹۲
جلسه سوم پایتون برای هکر های قانونی دوره مقدماتی پاییز ۹۲جلسه سوم پایتون برای هکر های قانونی دوره مقدماتی پاییز ۹۲
جلسه سوم پایتون برای هکر های قانونی دوره مقدماتی پاییز ۹۲
 
Python data structures
Python data structuresPython data structures
Python data structures
 

Semelhante a 令和から本気出す

JQuery In Rails
JQuery In RailsJQuery In Rails
JQuery In Rails
Louie Zhao
 
Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»
e-Legion
 

Semelhante a 令和から本気出す (20)

JQuery In Rails
JQuery In RailsJQuery In Rails
JQuery In Rails
 
dplyr
dplyrdplyr
dplyr
 
R programming language
R programming languageR programming language
R programming language
 
R Programming Homework Help
R Programming Homework HelpR Programming Homework Help
R Programming Homework Help
 
Writing Readable Code with Pipes
Writing Readable Code with PipesWriting Readable Code with Pipes
Writing Readable Code with Pipes
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
 
Next Generation Programming in R
Next Generation Programming in RNext Generation Programming in R
Next Generation Programming in R
 
Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»
 
Data aggregation in R
Data aggregation in RData aggregation in R
Data aggregation in R
 
Basic R Data Manipulation
Basic R Data ManipulationBasic R Data Manipulation
Basic R Data Manipulation
 
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
第5回 様々なファイル形式の読み込みとデータの書き出し(解答付き)
 
Time Series Analysis:Basic Stochastic Signal Recovery
Time Series Analysis:Basic Stochastic Signal RecoveryTime Series Analysis:Basic Stochastic Signal Recovery
Time Series Analysis:Basic Stochastic Signal Recovery
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with R
 
Introduction to tibbles
Introduction to tibblesIntroduction to tibbles
Introduction to tibbles
 
Dplyr and Plyr
Dplyr and PlyrDplyr and Plyr
Dplyr and Plyr
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
library(sparkline)
library(sparkline)library(sparkline)
library(sparkline)
 
M11 bagging loo cv
M11 bagging loo cvM11 bagging loo cv
M11 bagging loo cv
 
SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittr
 
Tt subtemplates-caching
Tt subtemplates-cachingTt subtemplates-caching
Tt subtemplates-caching
 

Mais de Takashi Kitano (10)

20170923 excelユーザーのためのr入門
20170923 excelユーザーのためのr入門20170923 excelユーザーのためのr入門
20170923 excelユーザーのためのr入門
 
mxnetで頑張る深層学習
mxnetで頑張る深層学習mxnetで頑張る深層学習
mxnetで頑張る深層学習
 
可視化周辺の進化がヤヴァイ 〜2016〜
可視化周辺の進化がヤヴァイ 〜2016〜可視化周辺の進化がヤヴァイ 〜2016〜
可視化周辺の進化がヤヴァイ 〜2016〜
 
Rによるウイスキー分析
Rによるウイスキー分析Rによるウイスキー分析
Rによるウイスキー分析
 
20160311 基礎からのベイズ統計学輪読会第6章 公開ver
20160311 基礎からのベイズ統計学輪読会第6章 公開ver20160311 基礎からのベイズ統計学輪読会第6章 公開ver
20160311 基礎からのベイズ統計学輪読会第6章 公開ver
 
20140625 rでのデータ分析(仮) for_tokyor
20140625 rでのデータ分析(仮) for_tokyor20140625 rでのデータ分析(仮) for_tokyor
20140625 rでのデータ分析(仮) for_tokyor
 
lubridateパッケージ入門
lubridateパッケージ入門lubridateパッケージ入門
lubridateパッケージ入門
 
20150329 tokyo r47
20150329 tokyo r4720150329 tokyo r47
20150329 tokyo r47
 
20140920 tokyo r43
20140920 tokyo r4320140920 tokyo r43
20140920 tokyo r43
 
Google's r style guideのすゝめ
Google's r style guideのすゝめGoogle's r style guideのすゝめ
Google's r style guideのすゝめ
 

Último

Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
gajnagarg
 
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
Health
 
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
nirzagarg
 
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
gajnagarg
 
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
gajnagarg
 
Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1
ranjankumarbehera14
 
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
HyderabadDolls
 
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
gajnagarg
 
Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...
Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...
Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...
HyderabadDolls
 
Computer science Sql cheat sheet.pdf.pdf
Computer science Sql cheat sheet.pdf.pdfComputer science Sql cheat sheet.pdf.pdf
Computer science Sql cheat sheet.pdf.pdf
SayantanBiswas37
 
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
nirzagarg
 

Último (20)

Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
 
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
+97470301568>>weed for sale in qatar ,weed for sale in dubai,weed for sale in...
 
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
 
Ranking and Scoring Exercises for Research
Ranking and Scoring Exercises for ResearchRanking and Scoring Exercises for Research
Ranking and Scoring Exercises for Research
 
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24  Building Real-Time Pipelines With FLaNKDATA SUMMIT 24  Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
 
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
 
Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...
Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...
Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...
 
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
 
Discover Why Less is More in B2B Research
Discover Why Less is More in B2B ResearchDiscover Why Less is More in B2B Research
Discover Why Less is More in B2B Research
 
Dubai Call Girls Peeing O525547819 Call Girls Dubai
Dubai Call Girls Peeing O525547819 Call Girls DubaiDubai Call Girls Peeing O525547819 Call Girls Dubai
Dubai Call Girls Peeing O525547819 Call Girls Dubai
 
Predicting HDB Resale Prices - Conducting Linear Regression Analysis With Orange
Predicting HDB Resale Prices - Conducting Linear Regression Analysis With OrangePredicting HDB Resale Prices - Conducting Linear Regression Analysis With Orange
Predicting HDB Resale Prices - Conducting Linear Regression Analysis With Orange
 
Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...
Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...
Gomti Nagar & best call girls in Lucknow | 9548273370 Independent Escorts & D...
 
Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1
 
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
Sealdah % High Class Call Girls Kolkata - 450+ Call Girl Cash Payment 8005736...
 
Fun all Day Call Girls in Jaipur 9332606886 High Profile Call Girls You Ca...
Fun all Day Call Girls in Jaipur   9332606886  High Profile Call Girls You Ca...Fun all Day Call Girls in Jaipur   9332606886  High Profile Call Girls You Ca...
Fun all Day Call Girls in Jaipur 9332606886 High Profile Call Girls You Ca...
 
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
 
Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...
Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...
Sonagachi * best call girls in Kolkata | ₹,9500 Pay Cash 8005736733 Free Home...
 
Top Call Girls in Balaghat 9332606886Call Girls Advance Cash On Delivery Ser...
Top Call Girls in Balaghat  9332606886Call Girls Advance Cash On Delivery Ser...Top Call Girls in Balaghat  9332606886Call Girls Advance Cash On Delivery Ser...
Top Call Girls in Balaghat 9332606886Call Girls Advance Cash On Delivery Ser...
 
Computer science Sql cheat sheet.pdf.pdf
Computer science Sql cheat sheet.pdf.pdfComputer science Sql cheat sheet.pdf.pdf
Computer science Sql cheat sheet.pdf.pdf
 
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
 

令和から本気出す

  • 1.
  • 2. > me $name [1] "Takashi Kitano" $twitter [1] "@kashitan" $work_in [1] " " -> " ( 6/30)"
  • 5.
  • 6.
  • 7.
  • 9.
  • 10. titles <- c(" ", " ", " ", " ", "7 ") books <- dplyr::tibble( docId = as.character(1:length(titles)), title = titles, author = c(rep(" ", 2), rep(" F ", 2), " R "))
  • 11. books %>% knitr::kable() docId title author 1 2 3 F 4 F 5 7 R
  • 12.
  • 13.
  • 14. sentDf %>% head() %>% knitr::kable() docId sectionId sentenceId sentence 1 1_0006 1_0006_0001 ── 1 1_0006 1_0006_0002 1 1_0006 1_0006_0003 1 1_0006 1_0006_0004 1 1_0006 1_0006_0005 1 1_0006 1_0006_0006
  • 15.
  • 16. tokenDf <- sentDf %>% as.data.frame() %>% RMeCab::RMeCabDF("sentence", 1) %>% purrr::set_names(nm = sentDf$sentenceId) %>% purrr::map2_dfr(.x = ., .y = names(.), .f = function(x, y) { tibble(docId = stringr::str_replace(y, "_.*", ""), sectionId = stringr::str_replace(y, "(.+_.*)_.*", "1"), sentenceId = y, token = x, hinshi = names(x)) })
  • 17. docId sectionId sentenceId token hinshi 1 1_0006 1_0006_0001 1 1_0006 1_0006_0001 ─ 1 1_0006 1_0006_0001 ─ 1 1_0006 1_0006_0001 1 1_0006 1_0006_0001 1 1_0006 1_0006_0001 1 1_0006 1_0006_0002 1 1_0006 1_0006_0002 1 1_0006 1_0006_0002 1 1_0006 1_0006_0002 tokenDf %>% head(n = 10) %>% knitr::kable()
  • 18.
  • 19.
  • 24. sw <- c(" ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ") CA_in <- tokenDf %>% dplyr::filter(hinshi %in% c(" ", " ")) %>% dplyr::filter(! token %in% sw) %>% dplyr::count(docId, token) %>% dplyr::ungroup() %>% dplyr::inner_join(books, by = "docId") %>% dplyr::select(token, title, n) %>% tidyr::spread(key = title, value=n, fill = 0) %>% dplyr::mutate(max = pmax(!!!rlang::syms(titles))) %>% dplyr::top_n(n = 60, wt = max) %>% as.data.frame()
  • 25. rownames(CA_in) <- CA_in$token CA_in <- CA_in %>% select(one_of(titles)) CA_in %>% head() %>% knitr::kable() 7 0 1 0 0 269 17 15 166 431 110 0 0 0 0 444 148 116 1 15 538 103 71 69 94 246 182 175 87 87 236
  • 28. d <- dplyr::tibble( x = CA_out$row$coord[, 1], y = CA_out$row$coord[, 2], label = rownames(CA_out$row$coord), type = "row" ) %>% dplyr::bind_rows(tibble( x = CA_out$col$coord[, 1], y = CA_out$col$coord[, 2], label = rownames(CA_out$col$coord), type = "col" ))
  • 29. d %>% head(n = 12) %>% knitr::kable() x y label type -0.4651474 -1.4411540 row 0.9067268 0.1419623 row -0.4639592 -1.4488050 row -0.5503475 -0.6609927 row -0.0656860 -0.2595280 row -0.2077013 0.0100741 row 0.0335994 0.0251759 row -0.5759668 0.0906504 row -0.1080142 0.2464559 row -0.0559726 0.0791701 row
  • 30. labels <- glue::glue("Dim {axis} ({format(var, digits = 3, nsmall = 1)} %)", axis = c(1, 2), var = CA_out$eig[1:2, 2]) d %>% ggplot2::ggplot(aes(x = x, y = y, label = label, shape = type, colour = type)) + ggplot2::geom_vline(xintercept = 0, linetype = "dashed") + ggplot2::geom_hline(yintercept = 0, linetype = "dashed") + ggplot2::geom_point() + ggrepel::geom_text_repel(family = "HiraMaruProN-W4") + ggplot2::scale_shape_manual(values = c(17, 16)) + ggplot2::xlab(labels[1]) + ggplot2::ylab(labels[2]) + ggplot2::ggtitle("CA - Biplot") + ggplot2::theme(legend.position = "none")
  • 31.
  • 32.
  • 34. CA_in <- tokenDf %>% dplyr::filter(hinshi %in% c(" ", " ")) %>% dplyr::filter(! token %in% sw) %>% dplyr::count(docId, token) %>% dplyr::ungroup() %>% tidytext::bind_tf_idf(term = token, document = docId, n = n) %>% dplyr::inner_join(books, by = "docId") %>% dplyr::select(token, title, tf_idf) %>% tidyr::spread(key = title, value=tf_idf, fill = 0.0) %>% dplyr::mutate(tfidf_max = pmax(!!!rlang::syms(titles))) %>% dplyr::top_n(n = 60, wt = tfidf_max) %>% as.data.frame()
  • 35. rownames(CA_in) <- CA_in$token CA_in <- CA_in %>% select(one_of(titles)) CA_in %>% head() %>% knitr::kable() 7 0.00E+00 0.0000224 0 0.00E+00 0.0036436 5.91E-05 0.0019978 0 0.00E+00 0.0000151 5.91E-05 0.0019853 0 0.00E+00 0.0000151 0.00E+00 0.0000000 0 0.00E+00 0.0014751 Ⅱ 0.00E+00 0.0000000 0 2.49E-05 0.0008940 0.00E+00 0.0000000 0 0.00E+00 0.0025933
  • 36.
  • 37.
  • 39.
  • 41.
  • 42.
  • 44. LDA_in <- tokenDf %>% dplyr::filter(hinshi %in% c(" ", " ")) %>% dplyr::filter(! token %in% sw) %>% dplyr::group_by(docId, token) %>% dplyr::count(token) %>% dplyr::ungroup() %>% dplyr::group_by(token) %>% dplyr::mutate(total = sum(n)) %>% dplyr::ungroup() %>% dplyr::top_n(n = 5000, wt = total) %>% dplyr::inner_join(books, by = "docId") %>% tidytext::cast_dtm(title, token, n)
  • 45. LDA_out <- topicmodels::LDA(LDA_in, k = 5, control = list(seed = 123)) LDA_out %>% tidytext::tidy(matrix = "gamma") %>% ggplot2::ggplot(aes(factor(topic), gamma)) + ggplot2::geom_boxplot() + ggplot2::facet_wrap(~ document) + ggplot2::theme_minimal(base_family = "HiraMaruProN-W4")
  • 46.
  • 47. LDA_out %>% tidytext::tidy() %>% dplyr::group_by(topic) %>% dplyr::top_n(n = 10, wt = beta) %>% dplyr::ungroup() %>% dplyr::mutate(term = reorder(term, beta)) %>% ggplot2::ggplot(aes(term, beta, fill = factor(topic))) + ggplot2::geom_col(show.legend = FALSE) + ggplot2::facet_wrap(~ topic, scales = "free_y") + ggplot2::coord_flip() + ggplot2::theme_minimal(base_family = "HiraMaruProN-W4")
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 55.
  • 58.
  • 59.
  • 60. tokenDf <- tokenDf %>% dplyr::filter(docId == "1") %>% dplyr::group_by(sectionId) %>% tidyr::nest() tokenDf %>% head() # A tibble: 6 x 2 sectionId data <chr> <list> 1 1_0006 <tibble [2,642 × 4]> 2 1_0007 <tibble [1,432 × 4]> 3 1_0009 <tibble [5,928 × 4]> 4 1_0010 <tibble [3,755 × 4]> 5 1_0011 <tibble [5,135 × 4]> 6 1_0013 <tibble [3,440 × 4]>
  • 61. tokenDf$data[[1]] %>% head() %>% knitr::kable() docId sentenceId token hinshi 1 1_0006_0001 1 1_0006_0001 ─ 1 1_0006_0001 ─ 1 1_0006_0001 1 1_0006_0001 1 1_0006_0001 tokenDf$data[[2]] %>% head() %>% knitr::kable() docId sentenceId token hinshi 1 1_0007_0001 1 1_0007_0001 1 1_0007_0001 1 1_0007_0001 1 1_0007_0001 1 1_0007_0001
  • 62. simDf <- tokenDf %>% # lexRankr::sentenceSimil() dplyr::mutate(sentenceId = purrr::map(.$data, ~.$sentenceId), token = purrr::map(.$data, ~.$token), docId = purrr::map2(sectionId, token, function(x, y) { rep(x, length(y)) } )) %>% # dplyr::mutate(simil = purrr::pmap(list(sentenceId, token, docId), lexRankr::sentenceSimil)) %>% select(sectionId, simil)
  • 63. simDf %>% head() # A tibble: 6 x 2 sectionId simil <chr> <list> 1 1_0006 <df[,3] [5,671 × 3]> 2 1_0007 <df[,3] [2,080 × 3]> 3 1_0009 <df[,3] [39,340 × 3]> 4 1_0010 <df[,3] [13,695 × 3]> 5 1_0011 <df[,3] [25,651 × 3]> 6 1_0013 <df[,3] [12,880 × 3]>
  • 64. simDf$simil[[1]] %>% head() %>% knitr::kable() sent1 sent2 similVal 1_0006_0001 1_0006_0002 0.5045201 1_0006_0001 1_0006_0003 0.4682931 1_0006_0001 1_0006_0004 0.0000000 1_0006_0001 1_0006_0005 0.5541319 1_0006_0001 1_0006_0006 0.5856045 1_0006_0001 1_0006_0007 0.4752808
  • 65.
  • 66. topNSents <- simDf %>% # lexRankr::lexRankFromSimil dplyr::mutate(s1 = purrr::map(.$simil, ~.$sent1), s2 = purrr::map(.$simil, ~.$sent2), simil = purrr::map(.$simil, ~.$similVal), n = purrr::map(.$sentenceId, function(x) { as.integer(ceiling(dplyr::n_distinct(x) * 0.1)) })) %>% # dplyr::mutate(topN = purrr::pmap(list(s1, s2, simil, n), lexRankr::lexRankFromSimil, threshold = 0.2, continuous = TRUE))
  • 67. topNSents %>% head() # A tibble: 6 x 2 sectionId topN <chr> <list> 1 1_0006 <df[,2] [11 × 2]> 2 1_0007 <df[,2] [7 × 2]> 3 1_0009 <df[,2] [29 × 2]> 4 1_0010 <df[,2] [17 × 2]> 5 1_0011 <df[,2] [23 × 2]> 6 1_0013 <df[,2] [17 × 2]>
  • 68. topNSents$topN[[1]] %>% knitr::kable() sentenceId value 1_0006_0012 0.0110008 1_0006_0025 0.0110065 1_0006_0027 0.0109621 1_0006_0028 0.0110488 1_0006_0033 0.0109785 1_0006_0040 0.0110083 1_0006_0041 0.0110468 1_0006_0047 0.0110727 1_0006_0059 0.0110430 1_0006_0085 0.0110101 1_0006_0086 0.0109732
  • 69. res <- topNSents %>% select(topN) %>% tidyr::unnest() %>% dplyr::inner_join(sentDf, by = “sentenceId") res %>% head() %>% knitr::kable() sentenceI d value docI d section Id sentence 1_0006_0 012 0.0110008 1 1_0006 1_0006_0 025 0.0110065 1 1_0006 1_0006_0 027 0.0109621 1 1_0006 1_0006_0 028 0.0110488 1 1_0006 1_0006_0 033 0.0109785 1 1_0006 1_0006_0 040 0.0110083 1 1_0006 ──
  • 70. sentDf %>% filter(docId == "1") %>% group_by(sectionId) %>% count() # A tibble: 31 x 2 # Groups: sectionId [31] sectionId n <chr> <int> 1 1_0006 107 2 1_0007 65 3 1_0009 281 4 1_0010 166 5 1_0011 227 6 1_0013 161 res %>% filter(docId == "1") %>% group_by(sectionId) %>% count() # A tibble: 31 x 2 # Groups: sectionId [31] sectionId n <chr> <int> 1 1_0006 11 2 1_0007 7 3 1_0009 29 4 1_0010 17 5 1_0011 23 6 1_0013 17