SlideShare uma empresa Scribd logo
1 de 29
Baixar para ler offline
EKAW 2010 • Tutorial T3
  Friday • 15th october 2010


Knowledge Acquisition from Social Networking Sites
Z. Zhang, A.E. Cano, K. Elbedweihy, A.-S. Dadzie
!"#$%&'()%"*                                                                          9*36:3+*.%',+1++/6"4*;;;*
!"#$%&'&()#$&%#$%*&$#+,&*%-.%"&/0%1.2%333%                                            4./*&(%$-(2)-2(&%5%-.0%/&6&/%7%+1-2<1-,"-=+>+$(6,+,?*
                                                                                      •! &-#-*
•! '"&+$,#-"&*#.+*/$%(+&'$+*%0*1"%23+&4+*-(5'6,6)%"*                                     –! data/animalcorpus/!
   0$%7*,%(6-3*"+#2%$16"4*,6#+,*                                                         –! data/examples/!
                                                                                         –! data/corpora/facebook_data | twitter_data/*
•! 3+-$"*#%*',+*$+3+8-"#*#%%3,*#%*-(5'6$+*6"0%$7-)%"*-"&*                             •! (%&+*
   1"%23+&4+*0$%7*,%(6-3*"+#2%$16"4*,6#+,*                                               –! facebook/!
                                                                                         –! twitter/!
•! ($+-#+*-*,67/3+*-//36(-)%"*#%*&+7%",#$-#+*#.+*                                        –! information_extraction/ekawtutorial/ | jatr_v1.0/*
   #+(."%3%46+,*6"*/$-()(+*                                                           •! +>#+$"-3*36@$-$6+,*
                                                                                         –! lib/!


                                                                                      •! &%2"3%-&,*0$%7*#'#%$6-3*2+@,6#+*
                                                                                         http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/
                                                                                         tutorial_prep.html#exercise_downloads
                                                                                         http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/
                                                                                         tutorial_prep.html#third_party_downloads




9*36:3+*.%',+1++/6"4*;;;*                                                             9*36:3+*.%',+1++/6"4*;;;*
82,,#,+%-"&%900/#)9:.,$%                                                              B$#,+%9,-%
•! A+,#*6"#+$"+#*(%""+()%"*
    –! #%*$'"*0-(+@%%1*-"&*#26:+$*+>-7/3+,*

•! #+,#+&*26#.*;<=%>3?%
•! 9,-*@'63&*,($6/#*
    –! @'63&;>73*B*+1-2;1-,"-;A+,#C'""+$*(3-,,**
    –! &%'@3+<(36(1*%"*,#-$#+$*D3+*0%$*+-(.*-//36(-)%"*-"&*E?F*EC *
         •! 7-G*"++&*#%*7%&60G*$64.#,*#%*+>+('#+*H(.7%&*IJJK*
    –! +"#+$*L-"#M*-#*(%",%3+*0%$*#%/*3+8+3*%0*+-(.*,%'$(+*(%&+*0%3&+$*EC*
•! $-9,*9/.,&%
    –! ,+#*'/*(3-,,/-#.*HE?F*&+/+"&+"#K*
    –! (-33*N-8-(*26#.*+-(.*#+,#*(3-,,*
•! @<A%
    –! ($+-#+*-*"+2*-//36(-)%"*',6"4*,$(*0%3&+$,*0%$*+-(.*%0*#26:+$O*0-(+@%%1*P*6+*
    –! ,+#*'/*(3-,,/-#.*H!QR*&+/+"&+"#K*
    –! ,+#*'/*-//36(-)%"*/$%/+$)+,*-"&*$'"*+-(.*7-6"*7+#.%&*
9*36:3+*.%',+1++/6"4*;;;*                                              F+#'/*
C-9,*9/.,&%                                                            D9)&E..F%9,*%-G#H&(%IJ@$%
                                                                       •! Q%('7+"#-)%"S*
                                                                          –! 0-(+@%%1*T$-/.*9U!S**
                                                                                http://developers.facebook.com/docs
                                                                          –! #26:+$*9U!**
                                                                                http://apiwiki.twitter.com/Twitter-API-Documentation
                                                                       •! F64"*'/S*
                                                                          –! 0-(+@%%1S*http://www.facebook.com
                                                                          –! #26:+$S*https://twitter.com/signup
                                                                       •! V6@$-$6+,*
                                                                          –! C+,#WXS*http://restfb.com*
                                                                          –! #26:+$YNS*http://twitter4j.org/en




F+#'/*                                                                 F(+"-$6%*
 K9-2(9/%L9,+29+&%J(.)&$$#,+%9,*%@,D.(M9:.,%A'-(9):.,%                 NO>O%C.2-"%ID(#)9%P.(/*%Q20%5%M9-)"%$2MM9(#$9:.,%
                                                                       •!*Q'$6"4*#.+*^_[_*`%$3&*a'/*#%'$"-7+"#*6"*F%'#.*90$6(-O*
•!*E/+"ZVU*[;Y**]-8-*#%%316#*0%$*@'63&6"4*ZVU*-"&*!R*-//36(-)%",*     #26:+$*-"&*0-(+@%%1*2+$+*',+&*+>#+",68+3G*-,*-*&6,(',,6%"*
                                                                       @%-$&*0%$*0-",*#%*+>(.-"4+*6"0%$7-)%"*-"&*%/6"6%",*-@%'#*
    –! (%"#-6",*/$+<@'63#*3-"4'-4+*7%&+3,*#%*@+*',+&*@G*E/+"ZVU*0%$*
      3-"4'-4+*/$%(+,,6"4*
                                                                       7-#(.+,b*
   http://opennlp.sourceforge.net                                         –! *.'"&$+&,*%0*#.%',-"&,*%0*7+,,-4+,*2+$+*4+"+$-#+&*&-63G*
   http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/                 %"*#.+*#2%*,%(6-3*"+#2%$16"4*,6#+,b*
     exercise_rscs/ie_models_eng.zip                                      –! *-*3-$4+*/$%/%$)%"*%0*#.+,+*7+,,-4+,*&6,(',,*#.+*7-#(.*%0*
                                                                             #.+*&-Gb*
•!*]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*H]9ACK*                       •!*2+*-$+*6"#+$+,#+&*6"*-"-3G,6"4*#.+,+*7+,,-4+,**
   http://www.dcs.shef.ac.uk/~ziqizhang/resources/tools/
     jatr_v1.0.zip
                                                                          –! #%*'"&+$,#-"&*2.-#*-$+*#.+*7%,#*/%/'3-$*#%/6(,*#.-#*
                                                                             6"#+$+,#*/+%/3+b*
F(+"-$6%*(%"#;*                                                  F(+"-$6%*9"-3G,6,*
NO>O%C.2-"%ID(#)9%P.(/*%Q20%5%M9-)"%$2MM9(#$9:.,%                8&7)90%D(.M%-"&%M.(,#,+%$&$$#.,%
•!*A%*&%*,%*2+*@'63#*-*c7-#(.*,'77-$6,-)%"d*-//36(-)%"*          •!*.%2*#%*6&+")0G*,/+(6D(*(%"#+"#*%0*6"#+$+,#*
   –!%#,02-*<*).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.*                –!*(%"#+"#*$+#$6+8-3*-"&*D3#+$6"4*
   –!%.2-02-*<*$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%#.-#*(-"*@+*   •!*.%2*#%*/$%(+,,*#.+*(%"#+"#*-"&*7-1+*,+",+*%0*6#*
   ',+&*#%*,'77-$6,+*(%$/',*(%"#+"#*                                –!*6"0%$7-)%"*+>#$-()%"**
                                                                    –!*"-#'$-3*3-"4'-4+*/$%(+,,6"4**
•!*e,6"4*#.+*+>#$-(#+&*#+$7,*2+*(-"*-"-3G,+*2.-#*.-,*@++"*
#.+*0%(',*%0*&6,(',,6%"*%0*#.+*7-#(.*%0*#.+*&-G*
•!*W%$*#.6,*8+$G*+>+$(6,+O*2+*,#'&G*#.+*7-#(.*@+#2++"*
                                      9*f"%23+&4+*
A,+/9,*%9,*%R&(M9,1%.,%-"&%NS-"%.D%;2,&%NO>O;*
                                   9(5'6,6)%"*/$%(+,,*




F(+"-$6%*9"-3G,6,*                                               a%$/',*T+"+$-)%"*
•!%@,02-T%).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.*                 •! R.9/S*($+-#+*-*(%$/',*%0*7+,,-4+,**
   –! *2+*"++&*#%*/6"</%6"#*$+3+8-"#*7+,,-4+,*%"*#26:+$*-"&*        –! #.-#* &6,(',,* #.+* 7-#(.* @+#2++"* R"43-"&* -"&* T+$7-"G*
      0-(+@%%1*                                                        %"*^I#.*]'"+*^_[_*
   –! %',6"4%-G#H&(%-"&%D9)&E..F%9U!,U%2+*-//3G*).,-&,-%         •! @,02-S*
      (&-(#&69/%9,*%V/-&(#,+*#%*@'63&*#.6,*(%$/',*
                                                    a%$/',*         –! #26:+$*9U!*/$%86&6"4*-((+,,*#%*#26:+$*&-#-*
                                                  4+"+$-)%"*        –! 0-(+@%%1*9U!*/$%86&6"4*-((+,,*#%*0-(+@%%1*&-#-*
                                                                    –! (%"#+"#* D3#+$6"4* /-$-7+#+$,* H#.+* R"43-"&<T+$7-"G*
•!%W2-02-T%$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%*                        7-#(.*%"*^I#.*]'"+*^_[_K*
   –! 2+*-//3G*@A%9,*%KLJ%%"*#.+*(%$/',*#%*-(.6+8+*#.6,*4%-3%    •! W2-02-%%
                                           a%"#+"#*                 –! (%$/',*%0*7+,,-4+,*$+3-#+&*#%*%"3G*#.+*7-#(.*%0*6"#+$+,#*
                                         -"-3G,6,*@G*!R*
a%$/',*T+"+$-)%"*',6"4*#26:+$*




                            -G#H&(%




                                                                       a%&+*6"S*ekaw-kasna_exercises/twitter
                                                                       R>#+$"-3*36@,S*lib/twitter4j-core-2.1.6-SNAPSHOT.jar |
                                                                                         log4j-1.2.15.jar




a%$/',*T+"+$-)%"*',6"4*#26:+$*                                        a%$/',*T+"+$-)%"*',6"4*#26:+$*
 A'3>%8AC!%IJ@%XI,9/1$#,+%-"&%02E/#)%:M&/#,&%$-9-2$Y%                  A'3>%8AC!%IJ@%
 •! U$%86&+,*7+#.%&,*0%$*0+#(.6"4*&-#-*$+3-#+&*#%S**                   •! 9"-3Gj+*#.+*,#$'(#'$+*-"&*(%"#+"#*%0*/'@36(*
    •! *A67+36"+,O*F#-#',O*e,+$,O*g+7@+$,O*,'@,($6@+$,O*0%33%2+$,O*       )7+36"+*,#-#',+,*
       ,%(6-3*4$-/.,*+#(;*                                                –! `.+$+*2-,*#.+*,#-#',*#2++#+&*0$%7k*
 –! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*                   –! `-,*6#*-*$+#2++#k
    -(#'-33G*&%*,%7+#.6"4h*<*R&6#*#.+*(3-,,S*
                                                         R>+$(6,+*
    ekaw.kasna.twitter.StatusTest
 •! C+0+$*#%*#.+*A26:+$Y]*N-8-&%(*#%*(%7/3+#+*#.+*+>+$(6,+,S*
    *http://twitter4j.org/en/javadoc/index.html

 !(1%#-%1.2($&/DS*$'"*C-9-2$!&$-3Z969%
a%$/',*T+"+$-)%"*',6"4*#26:+$*                                                                                                   a%$/',*T+"+$-)%"*',6"4*#26:+$*
  A'3>%8AC!%IJ@%                                                                                                                   A'3>%8AC!%IJ@%
                                                                                                                                   •! 9",2+$
                          Twitter twitter = new                                                                                     try{
                          TwitterFactory().getInstance();                                                                             ResponseList<Status>publicTimeline = twitter.getPublicTimeline();
                                                                                                                                      //*TODO Complete exercise and analyse structure and content of each status
    try{                                                                                                                              GeoLocation geoLocation;
         //We request the public timeline, which returns a list of Status                                                             Place place;
      ResponseList<Status> publicTimeline = twitter.getPublicTimeline();                                                              while (it.hasNext()){
          /**                                                                                                                               Status st = it.next();
           * Complete this exercise and analyse the structure and content                                                                   log.info(st.getText());
  of each of the Status.                                                                                                                    log.info(st.getSource());
           * Have a look at the java doc of the Status Class, or just                                                                       if ((geoLocation = st.getGeoLocation()) != null)
  check the available methods in your IDE                                                                                                      log.info(geoLocation.toString());
           */                                                                                                                               if ((place = st.getPlace()) != null) {
       Iterator<Status> it = publicTimeline.iterator();                                                                                       log.info(place.getFullName());
                                                                                                                                              log.info(place.getBoundingBoxCoordinates().toString());
       while (it.hasNext()){
                                                                                                                                            }
           //TODO check what are the info you can get from a Status.
                                                                                                                                      }
       }
                                                                                                                                    } catch (TwitterException e){

  •! !(1%#-%1.2($&/DS*+&6#*-"&*$'"*C-9-2$!&$-3Z969%                                                                                 }
                                                                                                                                      e.printStackTrace();




a%$/',*T+"+$-)%"*',6"4*#26:+$*                                                                                                   a%$/',*T+"+$-)%"*',6"4*#26:+$*
  A'3>%8AC!%IJ@%
                                                                                                                                 A'3N%C&9()"%IJ@%
  •! E'#/'#**)7+36"+*,#-#',
 ??????????!!??888888888 RT @nico_news: ???????????????????????????????????????? http://bit.ly/aZcvfl
 <a href="http://twipple.jp/" rel="nofollow">?????/twipple</a>
                                                                                                                                 •! 933%2,*6"#+$-()%"*26#.*#26:+$*
 Southampton v Tranmere: Preview followed by live coverage of Saturday's game between Southampton and Tranmere in L...
       http://bit.ly/9N802N
                                                                                                                                    $&9()"*-"&*-(&,*$*&-#-*
 <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>
 Laper gueeee                                                                                                                       –! #%/*#%/6(,*#.-#*-$+*('$$+"#3G*#$+"&6"4*%"*
 <a href="http://www.snaptu.com" rel="nofollow">Snaptu.com</a>
 ?????????????????????????? / ??????????????????????????
                                                                                                                                       A26:+$*
                                                                                                                                 •! !#*+>/%,+,*#.+*0%33%26"4*7+#.%&,S**
 <a href="http://www.echofon.com/" rel="nofollow">Echofon</a>
 Changing the Language of Oppression http://bit.ly/aXA4w3 #specialneeds
 <a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>
 Are you attending the SuperSwarm at Jewel, Piccadilly tonight? Let's get an idea of numbers via my poll @ www.theprgeek.co.uk      –! ,+-$(.O**
       #superswarmLDN
 web                                                                                                                                –! #$+"&,O**
 Simon Cowell To Receive Special Emmy Award: October 7, 2010: Music mogul and former American Idol judge Simo... http://
       tinyurl.com/299o5gg                                                                                                          –! #$+"&,?('$$+"#O*#$+"&,?&-63GO*#$+"&,?
 <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>
 "Wajahmu seperti bulan" --» ini artinya ngatain kan yah? Org bulan bolong2                                                            2++13G*
 <a href="http://blackberry.com/twitter" rel="nofollow">Twitter for BlackBerry®</a>
 FM????????????
 <a href="http://stone.com/Twittelator" rel="nofollow">Twittelator</a>
                                                                                                                                 •! A.+*F+-$(.*9U!*,'//%$#,*-7%"4*
 ???? [????:?????/????????????????????????]559 #colopl_msg
 <a href="http://t.colopl.jp/t/" rel="nofollow">Colotwi</a>
                                                                                                                                    %#.+$,O*#.+*0%33%26"4*%/+$-#%$,*0%$*
 pikiran saya cabangnya banyak, jd pusing sendiri..penuh rasanya ni kepala                                                          (%",#$'()"4*-*5'+$G*,#$6"4*
 <a href="http://m.tweete.net" rel="nofollow">m.tweete.net</a>...
a%$/',*T+"+$-)%"*',6"4*#26:+$*                                                       a%$/',*T+"+$-)%"*',6"4*#26:+$*
 A'3[%C&9()"%IJ@%                                                                      A'3N%C&9()"%IJ@%
                                                                                       –! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*-(#'-33G*&%*
 $#,)&#*T%   F/+(6D+,*#.+*6&*%0*#.+*,#-#',*0$%7*2.6(.*#%*,#-$#*#.+*,+-$(.*               ,%7+#.6"4h*<*R&6#*#.+*(3-,,S*
 2,:/#*T%    F/+(6D+,*#.+*6&*%0*#.+*,#-#',*0$%7*2.6(.*#%*+"&*#.+*,+-$(.*
                                                                                             ekaw.kasna.twitter.QueryTest                                                                           R>+$(6,+*
 C#,)&T%      F#-#',+,*/$%&'(+&*,6"(+*-*,/+(6D+&*&-#+*H+;4;*^_[_<_l<[_K*
 B,:/T%                                                                                       Query query = new Query();
 V/-&(T/#,F$% C+#$6+8+,*#2++#,*26#.%'#*36"1,*                                                 query.query("football");

 D(.MT%       C+#$6+8+,*,#-#',+,*0$%7*-*468+"*',+$;*H+;4;*0$%7S*D0-K*                               //*TODO Modify the query object, and search for
 /9,+T%       C+#$6+8+,*,#-#',+,*6"*-*468+"*3-"4'-4+*                                         today's tweets (in english) related to football

 W8%          +;4;O*7+")%"6"4*g+>6(%*EC*W$-"(+*                                                     //*TODO Restrict your results to tweets generated
                                                                                              within 300 kilometers of Johannesburg, South Africa
 T%Y%         +;4;O*(%"#-6"6"4*0%%#@-33*26#.*-*/%,6)8+*-m#'&+*H+;4;*0%%#@-33*SK*K*
                                                                                                    // hint: use Query's geoCode method, the
 K&+9:.,%     +;4;O*7+")%"6"4*@++$*@'#*"%#*$%%#*                                              Kilometers unit is given as Query.KILOMETERS
                                                                                                    // hint: South Africa's lat: 26.12, long: 28.2
 C.2()&T%     +;4;O*a%"#-6"6"4*0%%#@-33*+"#+$+&*86-*A26:+$W++&*H+;4;*"+2,*
              ,%'$(+SA26:+$W++&K*                                                      •! !(1%#-%1.2($&/DS*$'"*]2&(1!&$-3Z969%




a%$/',*T+"+$-)%"*',6"4*#26:+$*                                                       a%$/',*T+"+$-)%"*',6"4*#26:+$*
                                                                                       A'3>%8AC!%IJ@%
 A'3N%C&9()"%IJ@%
                                                                                       •! E'#/'#**5'+$G*$+5'+,#*0%$*L0%%#@-33M*"+-$*L]%.-""+,@'$4M
 –! I,$G&(%
                                                                                     hits:15
        Query query = new Query();                                                   MQMhlanzi:Total Football 360: Bafana Eager to Keep the Momentum of Winning! http://t.co/xOPTaY9
                                                                                     Benleeds:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or
        query.query("football");                                                           Morecambe or Dagenham and Redbridge?
                                                                                     Tumelo13:Gota admit I miss my NONstop #football convo's wit @Denisao_4 and @GordonTyler8! Haha talk bout nothing but the
                                                                                           #beautifulgame
              //*TODO Modify the query object, and search for                        Tumelo13:RT @Denisao_4: Ey bra @Tumelo13 that's not a sin! That's for the love of football! I approve wow! Let's hope it works :)??
        today's tweets related to football                                                 Amen
                                                                                     Edwardo84:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer
                                                                                     jonerz97:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe
              //*TODO Restrict your results to tweets generated                            or Dagenham and Redbridge?
        within 300 kilometers of Johannesburg, South Africa                          dcocker11:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer
                                                                                     AntimoOsato91:@siasduplessis Oros and The Dutch National Football Team could be good sponsors too! Haha :)
                                                                                     IsaacTeka:#football - EURO 2012 qualifier between Germany and Turkey is gonna be a fierce encounter. #Ozil and #Khedira
              // hint: use Query's geoCode method, the                               applenessuk:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer
                                                                                     johnyrotten:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or
        Kilometers unit is given as Query.KILOMETERS                                       Morecambe or Dagenham and Redbridge?
              // hint: Johannesburg’s lat: 26.12, long: 28.2                         kartikverma:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer
         query.geoCode(new GeoLocation(26.12,28.2),                                  RawRemedy:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or
                                                                                           Morecambe or Dagenham and Redbridge?
        30,Query.KILOMETERS);                                                        TLW1Dan:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer
                                                                                     jopayne:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe
                                                                                           or Dagenham and Redbridge?
a%$/',*T+"+$-)%"*',6"4*#26:+$*                                                   a%$/',*T+"+$-)%"*',6"4*#26:+$*
 A'3[%C-(&9M%IJ@%                                                                 A'3[%C-(&9M%IJ@%
                                                                                  Twitter 4j allows you to retrieve streaming samples using the class
RestAPI and SearchAPI only present a limited snapshot of
                                                                                  TwitterStream. For the public timeline you just need basic
a timeline.              During the finals of the 2010 World Cup                  authentication.
                                 the rate of tweets containing the tags
                                 #Spain, #Netherlands, #Germany,                  [*** Create a TwitterStream instance
                                 #Uruguay, was quite high.                                twitterStream = new
                                                                                         TwitterStreamFactory(this).getInstance("yourAcc","yourPass");

                                 Two options:                                            Set a Listener for receiving the event of a status. Your listener should
                                                                                  ^*
                                 •! make requests, say, every 2sec                       implement the method public void onStatus(Status status)
                                  through the RestAPI or the Search API,
                                 •! BETTER:                                              twitterStream.setStatusListener(this);
                                      •! start listening to a stream of public
                                                                                  l***   Start Sampling
                                      tweets &
                                                                                         twitterStream.sample();
                                      •! filter according to the tag patterns
                                                                                  Y*     Do something with the tweet in your onStatus method




a%$/',*T+"+$-)%"*',6"4*#26:+$*                                                   a%$/',*T+"+$-)%"*',6"4*#26:+$*
 A'3[%C-(&9M%IJ@%                                                                 A'3[%C-(&9M%IJ@%
 –! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*-(#'-33G*&%*               –! I,$G&(
    ,%7+#.6"4h*<*R&6#*#.+*(3-,,S*
    ekaw.kasna.twitter.StreamTest                                                        private void startConsuming() throws TwitterException {
                                                                                            twitterStream.setStatusListener(this);

    private void startConsuming() throws TwitterException {                                 //*TODO Using TwitterStream’s filter method,
       twitterStream.setStatusListener(this);                                            restrict your sampling to collect tweets that include
                                                                                         the words: football, worldcup, final
       //*TODO Using TwitterStream’s filter method,
    restrict your sampling to collect tweets that include                                     String[] filterWords = {"#worldcup", "#WorldCup",
    the words: football, worldcup, final                                                 "#Worldcup", "#WORLDCUP"};
                                                                                                 twitterStream.setStatusListener(this);
        twitterStream.sample();                                                                  twitterStream.filter(0,null,filterWords);
    }                                                                                       twitterStream.sample();
                                                                                         }

 •! !(1%#-%1.2($&/DS*$'"*C-(&9M!&$-3Z969%
a%$/',*T+"+$-)%"*',6"4*#26:+$*                                       a%$/',*T+"+$-)%"*',6"4*#26:+$*
I**#:.,9/%A'&()#$&T%I2-"&,:)9:.,%                                    •! Try it yourself!
                                                                        •! Authenticating using Oauth
•!*$+,#$6()%",*#%*-((+,,6"4*/$68-#+*&-#-hhh*                                 •! OAuthTest.java
                                                                        •! Using the application “Ekaw-Kasna”
•!%Q^IKRAC%CAJ%NO>O**                                                   •! Login with your twitter account and go to:
     •!*(.-"4+*#%*-'#.+")(-)%"*7%&+*0%$*$+#$6+86"4*6"&686&'-3,M*        http://twitter.com/apps/new
     ,#-#',*6"0%$7-)%"*
     •!0$%7*-*,67/3+*',+$"-7+</-,,2%$&*#%S*
          •! W92-"7E9$&*%92-"&,:)9:.,*%0*$+46,#+$+&*c-//36(-)%",d*




a%$/',*T+"+$-)%"*',6"4*#26:+$*                                       a%$/',*T+"+$-)%"*',6"4*#26:+$*
                                                                       •! I2-"&,:)9:,+%2$#,+%W92-"%
                                                                          –! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z*
                                                                             •! +"#+$*#.+*eCV*-#*#.+*(%",%3+*6"*-*2+@*@$%2,+$*
                                                                             •! #%*%@#-6"*-"*%-'#.=#%1+"*
                                        i%'*2633*"++&*#.+,+*#2%*
                                       ,#$6"4,*0%$*-'#.+")(-)"4**




                                                                                                                i%'*2633*@+*4686"4*
                                                                                                               -'#.%$6j-)%"*#%*#.6,*
                                                                                                               -//36(-)%"*#%*-((+,,*
                                                                                                                 G%'$*6"0%$7-)%"*
a%$/',*T+"+$-)%"*',6"4*#26:+$*                                 a%$/',*T+"+$-)%"*',6"4*#26:+$*
 •! I2-"&,:)9:,+%2$#,+%W92-"%                                   •! I2-"&,:)9:,+%2$#,+%W92-"%
    –! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z*                         –! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z*
       •! +"#+$*#.+*eCV*#%*%@#-6"*-"*%-'#.=#%1+"**                    •! +"#+$*#.+*eCV*#%*%@#-6"*-"*%-'#.=#%1+"**
                                                                   –! E"(+*G%'*c933%2d*-'#.%$6j-)%"*G%'*2633*@+*/$%86&+&*
    –! E"(+*G%'*c933%2d*-'#.%$6j-)%"*G%'*2633*@+*/$%86&+&*            26#.*#.+*U!ZS*
       26#.*-*U!ZS*                                                –! R"#+$*#.+*U!Z*#%*(%7/3+#+*-'#.+")(-)%"*
                                            A.6,*6,*#.+*U!Z*       ciEe*9CR*9eAnRZA!a9ARQhhd*
                                              "++&+&*#%*
                                             (%7/3+#+*#.+*
                                            -'#.+")(-)%"*




                                                               a%$/',*T+"+$-)%"*',6"4*0-(+@%%1*




                          D9)&E..F%




                                                                a%&+*6"S*ekaw-kasna_exercises/facebook
                                                                R>#+$"-3*36@,S*lib/restfb-1.5.3.jar | log4j-1.2.15.jar
0-(+@%%1*9U!**W+#(.6"4*E@N+(#,*                                 0-(+@%%1*9U!**W+#(.6"4*e,+$*&-#-*
 •! The Graph API                                                 https://graph.facebook.com/facebook
     •! provides facilities for reading and writing data to
     facebook

 •! Each API request starts with the URL:
    https://graph.facebook.com

 •! e.g., data about any object can be found by fetching
    https://graph.facebook.com/objectID
   - objectID is the unique id of this object in the social
 graph

   - e.g., the unique id for a page is its name:
    https://graph.facebook.com/facebook




0-(+@%%1*9U!**a%""+()%",*                                       0-(+@%%1*9U!**a%""+()%",*

 •! All objects in the facebook social graph are connected via
 relationships (connections)

 •! Fetch connections
    https://graph.facebook.com/objectID/connection_type


 •! e.g., the page’s own posts
    https://graph.facebook.com/facebook/posts
0-(+@%%1*9U!**U-4+*a%""+()%",*                                                              0-(+@%%1*9U!**W63#+$6"4*Q-#-*

D&&*%            A.+*/-4+M,*2-33*                                                             •! Data can be filtered using parameters
0#)-2(&%         A.+*/-4+M,*/$%D3+*/6(#'$+*                                                       •! e.g.,
-9++&*%          A.+*/.%#%,O*86&+%,O*-"&*/%,#,*6"*2.6(.*#.6,*/-4+*.-,*@++"*#-44+&*                  -! since, until ---> specify date ranges
/#,F$%           A.+*/-4+o,*/%,#+&*36"1,*
                                                                                                    -! limit ---> specify amount of returned data
0".-.$%          A.+*/.%#%,*#.6,*/-4+*.-,*'/3%-&+&*
+(.20$%          A.+*4$%'/,*#.6,*/-4+*6,*-*7+7@+$*%0*
9/E2M$_6#*&.$%   A.+*/.%#%*-3@'7,?86&+%,**#.6,*/-4+*.-,*($+-#+&*
                                                                                                 •! e.g., fetching the feed
$-9-2$&$%        A.+*/-4+o,*,#-#',*'/&-#+,*
                                                                                                      -! within specified dates and
,.-&$%           A.+*/-4+o,*"%#+,*                                                                    -! with a limit of 50
0.$-$%           A.+*/-4+o,*%2"*/%,#,*
                                                                                                 https://graph.facebook.com/worldcup/feed?
                                                                                              since=2010-07-17&until=2010-07-20&limit=50
M&ME&($%         A.+*/-4+o,*7+7@+$,;*i%'*(-"*%"3G*5'+$G*'/*#%*J__*7+7@+$,;*!#*6,*"%#*
                 /%,,6@3+*#%*6#+$-#+*#.$%'4.*#.+*36,#;*R>-7/3+S*.:/,S??4$-/.;0-(+@%%1;(%7?
                 pU9TR=!Qq?7+7@+$,k3676#rJ__*
&6&,-$%          A.+*+8+"#,*#.6,*/-4+*6,*-:+"&6"4*
)"&)F#,$%        a.+(16",*7-&+*@G*0$6+"&,*%0*#.+*('$$+"#*,+,,6%"*',+$*




0-(+@%%1*9U!**W63#+$6"4*Q-#-*                                                               0-(+@%%1*9U!**W6"&6"4*E@N+(#,**

                                                                                              •! Search for objects
                                                                                                 https://graph.facebook.com/search?
                                                                                              q=query&type=objectType


                                                     c($+-#+&=)7+d*6,*26#.6"*                    - query ---> what you want to find
                                                     #.+*,/+(6D+&*&-#+*$-"4+,*
                                                                                                 - objectType ---> type of the object (e.g.
                                                                                              facebook post, user)

                                                                                              •! e.g., search all public posts for “2010 world cup”
                                                                                                   https://graph.facebook.com/search?q=2010%20world
                                                                                              %20cup&type=post
0-(+@%%1*9U!**W6"&6"4*E@N+(#,**                         0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+*

                                                          Try it yourself!

                                                          •! Fetch the data about the page worldcup

                                                          •! Get the feed of this page (hint: connection is feed)
                                                              •! this is the wall for the page worldcup

                                                          •! Return only the first 5 messages of this feed
  U%,#,*(%"#-6"6"4*#.+*#+$7,**
   c^_[_d*B*c2%$3&d*B*c('/d*
                                                          •! Search for all pages containing worldcup in the
                                                          page name




0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+*                       0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+*
 •! ANSWERS                                               •! ANSWERS
     •! page worldcup:                                        •! Get the feed (wall) of the page worldcup:
                                                             https://graph.facebook.com/worldcup/feed
          •! fetch https://graph.facebook.com/worldcup
0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+*                                   0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+*
 •! ANSWERS                                                           •! ANSWERS
     •! Return only the first 5 messages of the feed:                     •! Search for all pages containing worldcupin the
    https://graph.facebook.com/worldcup/feed&limit=5
                                                                             page name
                                                                             https://graph.facebook.com/search?q=worldcup&type=page




a36+"#*V6@$-$6+,*                                                    C+,#WX*9U!**`%$3&*a'/*F(+"-$6%**

 •! Multiple client libraries for facebook API                        •! Exercise:
    http://developers.facebook.com/search?                                get the messages sent on the day of the
 q=User:Client_Libraries
                                                                          England-Germany match - 27th of June 2010
    •! RestFB client library was the first java library to support
                                                                      [***   Search for all pages containing “worldcup”
    the GraphAPI
    •! Other Java libraries now supporting GraphAPI
                                                                      ^*     For every page:
        - BatchFB
                                                                              •! Get the messages posted on that day
        - TinyFBGraphClient
                                                                              •! Store the messages to generate your corpus
        - facebook Java Webapp

    •!We use the RestFB client library in this tutorial
C+,#WX*9U!**Q+0-'3#0-(+@%%1a36+"#**                                                C+,#WX*9U!**F+-$(.6"4*

 •! DefaultfacebookClient                                                            •! Step 1:
     •! provides methods for reading and writing data
                                                                                             Connection<T>
     to facebook graph                                                                       fetchConnection(String connection,
                                                                                             Class<T> connectionType,
 FacebookClient facebookClient                                                               Parameter... parameters)
 = new DefaultfacebookClient();                        9((+,,*/'@36(*&-#-*
                                                                                    facebookClient facebookClient = new DefaultfacebookClient();

 facebookClient = new                                                               Connection<Page> pageSearch =
 DefaultfacebookClient(ACCESS_TOKEN);                                               facebookClient.fetchConnection("search",Page.class,
                                                                                       Parameter.with("q", "world cup"), Parameter.with("type",
                                                                                       "page"), Parameter.with("limit", "10"));
                                C+5'6$+&*#%*-((+,,*/$68-#+*
                                 &-#-*%$*+&6#?/'@36,.*&-#-*
                                                                                                                      .:/,S??4$-/.;0-(+@%%1;(%7?,+-$(.k5r2%$3&
                                                                                                                              B('/P#G/+r/-4+P3676#r[_*




C+,#WX*9U!**F+-$(.6"4*                                                             C+,#WX*9U!**$+#'$"*0$%7*$+5'+,#*<*/-4+,*
•! $+#'$",*-*36,#*%0*#.+*D$,#*[_*/-4+,*-@%'#*c2%$3&('/d*                            •! World Cup Pages
•! W%$*+-(.*/-4+O*/$%/+$)+,*$+#'$"+&*6"(3'&+S*
                                                                                     K9M&%                Q9-&+.(1%                 @<%
    –! 6&O*"-7+O*(-#+4%$GO*0++&O*/6(#'$+,*b                                          `%$3&*a'/*           U%36)(6-",*               J_tY[_YulvI*
                                                                                     `%$3&*a'/*           U$%&'(#,=%#.+$*           [lJJJvYvuItt^lu*
                         4+#Q-#-*<<s*$+#'$",*-*36,#*%0*%@N+(#,*H&+/+"&6"4*%"*#.+*    2%$3&*('/*           F/%$#,=-#.3+)(,*          [lY[Ivl_l^vv_vl*
                                         (%""+()%"*$+5'+,#+&K*
                                                                                     `%$3&*a'/*^_[_*      U$%&'(#,=%#.+$*           ^JIvvtYItvvv*
                                                                                     C'4@G*`%$3&*a'/*     F/%$#,=-#.3+)(,*          [[v^Iv^l^[Il*
 for (Page page : pageSearch.getData()) {                                            ^_[_*`%$3&*a'/*      e"1"%2"*                  [^J_YtltY^_^^tJ*
     System.out.print("Name: " + page.getName());                                    w`ECVQ*aeUd*         a3'@,*                    [^lvttYI^[Iv*
     System.out.print("Category: " + page.getCategory());                            `%$3&*a'/*%"*RFUZ*   F/%$#,=-#.3+)(,*          [v[Jl[lt^_Y_*
     System.out.println("ID: " + page.getId());
                                                                                     `ECVQ*aeU*           F/%$#,=#+-7,*             [^_l_IlvYvv[_Jv*
 }*
                                                                                     ^_[_*`%$3&*a'/*      V%(-3=@',6"+,,*           lvI[[uIIlt[v*
C+,#WX*9U!**R>+$(6,+*                                        C+,#WX*9U!**R>+$(6,+*

 Try it yourself!                                              ANSWERS
                                                               Connection<Group> groupSearch =
 •! Edit the class SearchTest.java                                facebookClient.fetchConnection(
                                                                        "search", Group.class,
                                                                        Parameter.with("q", "2010 world cup"),
 •! Search for all groups talking about a topic of                      Parameter.with("type", "group"),
                                                                        Parameter.with("limit", "15"));
 interest to you
 •! Get the first 15 groups                                    for (Group group : groupSearch.getData()) {
                                                                  System.out.println("Name: " + group.getName());
 •! For every group:                                              System.out.println("ID: " + group.getId());
                                                               }
      - print name and ID




C+,#WX*9U!**$+#'$"*0$%7*$+5'+,#*<*4$%'/,*                    C+,#WX*9U!**T+m"4*#.+*0++&*
‘2010 world cup’ groups
  K9M&%                                    @<%
                                                               •! Step 2:
  kkkkkkk**x-7-3+1*Ey(6-3*T$%'/*           ^^JJ^[YItu[J*
  ^_[_*W!W9*`ECVQ*aeU*                     [^Y[Iulu_uJ[YJv*
                                                                     Connection<T>
                                                                     fetchConnection(String connection,
  ^_[_*W!W9*`%$3&*a'/*                     ^^_YtlvIYJ*
                                                                     Class<T> connectionType,
  ^_[_*W!W9*`ECVQ*aeU*FEeAn*9WC!a9*        ^I_Ilt[tYJI*
                                                                     Parameter... parameters)
  ^_[_*W60-*`%$3&*a'/*F%'#.*90$61-*        [^_uIl^[[^II[Ju*
  ^_[_*W!W9*`%$3&*a'/*F%'#.*90$6(-*        [[[I_tJvJJ[YYlv*
  ^_[_*W60-*`%$3&*a'/*Q$6"16"4*T-7+*       ^lv[^t[ut_^u*       Connection<Post> myFeed = facebookClient.fetchConnection(
  ^_[_*W!W9*`ECVQ*aeU*FEeAn*9WC!a9*        [_tJ^t^u^J[Jlt_*       "worldcup/feed", Post.class, Parameter.with("since",
  g'"&6-3*^_[_*F'&-0$6(-*^_[_*`%$3&*('/*   [uuv^tvtIlvl*          "2010-06-27T11:00:00"), Parameter.with("until",
                                                                  "2010-06-28T17:00:00"), Parameter.with("limit", "10"));
  !#-36-*<*^_[_*W!W9*`%$3&*a'/*            [tJYlYIlt^^*
  ^_[_<W!W9<`%$3&<a'/*                     [^vlIIll_I[^uIl*
  ^_[_*`%$3&*a'/**                         [[^_uJ^JttlJYYu*
  ^_[_*`%$3&*a'/*                          [ulll^l[vlIl*                                 .:/,S??4$-/.;0-(+@%%1;(%7?2%$3&('/?0++&k
  ^_[_*W!W9*`%$3&*a'/*                     [l_YvttuvuvJYII*                             ,6"(+r^_[_<_v<^IP'")3r^_[_<_v<^tP3676#r^_*
  ^_[_*W!W9*`%$3&*a'/*                     [vl[Y_tt[uIt*
CRFA*9U!**T+m"4*#.+*0++&*                                                                                 CRFA*9U!**$+#'$"*0$%7*$+5'+,#*<*0++&*
                                                                                                           Try it yourself! - ConnectionsTest.java
•! 0++&*$+#'$",*-33*/%,#,*2$6:+"*%"*#.+*,/+(6D+&*&-#+*
                                                                                                             •!    Message: the english were hoping to play penalties what a waste of their
•! W%$*+-(.*/%,#*-:$6@'#+,*$+#'$"+&*6"(3'&+S*                                                                      training time
    –! ($+-)%"*)7+O*/%,#*"-7+O*&+,($6/)%"b;*                                                                                 Creation Time: Sun Jun 27 17:45:13 BST 2010
                                                                                                             •!    Message: Deutschland, Deutschland über alles, über alles in der Welt
                                                                                                                             Creation Time: Sun Jun 27 17:29:25 BST 2010
                                                                                                             •!    Message: world cup?? this wasn't a 'football games' but 'fakeball' games!!
 for (Post post : myFeed.getData()) {                                                                              Lampard was scored but the referee was blind....4-1?? congrats to the
                                                                                                                   referees coz they have a massive party tonite to celebrate!! $$$$$$$$$$$$$
    System.out.println("Message: " + post.getMessage());                                                           $$$ wow.... even can makes people blind!!! world cup??? **** off!!!
    System.out.println("tCreation Time" +                                                                                   Creation Time: Sun Jun 27 17:25:32 BST 2010
        post.getCreatedTime());
                                                                                                             •!    Message: how are we suppose to be patriotic with a team that plays like
    }*                                                                                                             that, none of them deserve the money they get, waste of time..............
                                                                                                                             Creation Time: Sun Jun 27 16:48:06 BST 2010
                                                                                                             •!    Message: john terry on england should get worst defender for the year...he's
                                                                                                                   no good
                                                                                                                             Creation Time: Sun Jun 27 16:42:39 BST 2010




CRFA*9U!**U%,#*U$%/+$)+,O*a%""+()%",*                                                                     a%$/',*T+"+$-)%"*',6"4*0-(+@%%1*
 Properties                                                                                                I**#:.,9/%A'&()#$&T%I2-"&,:)9:.,%
#*%                  A.+*/%,#*!Q*                                                                          •!*$+,#$6()%",*#%*-((+,,6"4*/$68-#+*&-#-hhh*
D(.M%                9"*%@N+(#*(%"#-6"6"4*#.+*!Q*-"&*"-7+*%0*#.+*',+$*2.%*/%,#+&*#.+*7+,,-4+*
                                                                                                                  •!*9((+,,*A%1+"*$+5'6$+&*0%$*,%7+*7+#.%&,*
-.%                  9*36,#*%0*#.+*/$%D3+,*7+")%"+&*%$*#-$4+#+&*6"*#.6,*/%,#*
M&$$9+&%             A.+*7+,,-4+*                                                                                       •!#%*/$+8+"#*-((+,,*H$+-&*%$*2$6#+K*#%*/$68-#+*&-#-*
0#)-2(&%             !0*-8-63-@3+O*-*36"1*#%*#.+*/6(#'$+*6"(3'&+&*26#.*#.6,*/%,#*                                       •!+;4;O*/'@36,.6"4*#%*#.+*0-(+@%%1*,%(6-3*4$-/.*
/#,F%                A.+*36"1*-:-(.+&*#%*#.6,*/%,#*                                                               •!*X6&&6"4#%"*/$%86&+,*-*4%%&*+>/3-"-)%"*0%$*4+m"4*-((+,,*#%1+",*-#S*
,9M&%                A.+*"-7+*%0*#.+*36"1*                                                                 http://benbiddington.wordpress.com/2010/04/23/facebook-graph-
)90:.,_*&$)(#0:.,%   A.+*(-/)%"?&+,($6/)%"**%0*#.+*36"1*H-//+-$,*@+"+-#.*#.+*36"1*"-7+K*                   api-getting-access-tokens
$.2()&%              !0*-8-63-@3+O*#.+*,%'$(+*36"1*-:-(.+&*#%*#.6,*/%,#*H0%$*+;4;O*-*z-,.*%$*86&+%*D3+K*
#).,%                9*36"1*#%*-"*6(%"*$+/$+,+")"4*#.+*#G/+*%0*#.6,*/%,#*
9H(#E2:.,%           9*,#$6"4*6"&6(-)"4*2.6(.*-//36(-)%"*2-,*',+&*#%*($+-#+*#.6,*/%,#*
                                                                                                           •!*+;4;O*0+#(.*#.+*0$6+"&,*%0*',+$*L1.-&6N-;+3@+&2+6.GM*
9):.,$%              9*36,#*%0*-8-63-@3+*-()%"*"-7+,*-"&*36"1,*H6"(3'&6"4*(%77+")"4O*3616"4*-"&*-"*             •!*#.6,*$+5'6$+,*-'#.+")(-)%"**#%1+"*L>>`a`bO``O;;;M*
                     %/)%"-3*-//<,/+(6D+&*-()%"K*                                                                     https://graph.facebook.com/khadija.elbedweihy/
/#F&$%               A.+*"'7@+$*%0*361+,*%"*#.6,*/%,#*
                                                                                                                      friends&access_token=11585905509...
)(&9-&*:M&%         A.+*)7+*#.+*/%,#*2-,*6"6)-33G*/'@36,.+&*
20*9-&*:M&%         A.+*)7+*%0*#.+*3-,#*(%77+"#*%"*#.6,*/%,#*                 933*/$%/+$)+,*P*
                                                                             (%""+()%",*%0*-*              •!%!(1%#-%1.2($&/D3;;;*
 Connections
                                                                                 cU%,#d*
).MM&,-$%            933*%0*#.+*(%77+"#,*%"*#.6,*/%,#*
0-(+@%%1*9U!**W+#(.6"4*e,+$*&-#-*                      0-(+@%%1*9U!**W+#(.6"4*e,+$*&-#-*
 https://graph.facebook.com/khadija.elbedweihy
                                                         •! fetch specific fields
                                                         https://graph.facebook.com/khadija.elbedweihy?
                                                         fields=id,name,picture




                              U'@36(*Q-#-*%"3G*

                                                           V6"1*#%*#.+*
                                                             /6(#'$+*

                                                                                      U6(#'$+*-#*#.+*
                                                                                        468+"*36"1*




0-(+@%%1*9U!**9'#.%$6j-)%"*R>-7/3+*                    0-(+@%%1*9U!**9'#.%$6j-)%"*R>-7/3+*




                                  9((+,,*#%1+"*2%$1,*
                                  0%$*#.+*-'#.%$6j+&*
                                       ',+$*%"3G**


                                                                                          F-7+*-((+,,*#%1+"*0%$*-*
                                                                                          &6{+$+"#*',+$*c*.&$%,.-%
                                                                                                  G.(Fd*
0-(+@%%1*9U!**e,+$*W6+3&,*                                                           0-(+@%%1*9U!**e,+$*a%""+()%",*
 #*T%                   A.+*',+$M,*!Q*
                                                                                       ".M&T%                A.+*',+$M,*Z+2,*W++&;*C+5'6$+,*#.+*read_stream*/+$76,,6%"*
 V($-,9M&T%            A.+*',+$M,*D$,#*"-7+*
                                                                                       D&&*T%                A.+*',+$M,*2-33;*C+5'6$+,*#.+*read_stream /+$76,,6%"*#%*,++*
 /9$-,9M&T%            A.+*',+$M,*3-,#*"-7+*                                                                "%"</'@36(*/%,#,;*
 ,9M&T%                 A.+*',+$M,*0'33*"-7+*                                          -9++&*T%              A.+*/.%#%,O*86&+%,O*-"&*/%,#,*6"*2.6(.*#.6,*',+$*.-,*@++"*
 9E.2-%                 A.+*',+$M,*@3'$@*#.-#*-//+-$,*'"&+$*#.+6$*/$%D3+*/6(#'$+*                            #-44+&;*C+5'6$+,*#.+*read_stream /+$76,,6%";*
 E#(-"*91%              A.+*',+$M,*@6$#.&-G*                                           0.$-$T%               A.+*',+$M,*%2"*/%,#,;*C+5'6$+,*#.+*read_stream /+$76,,6%"*
 G.(F_&*2)9:.,%         9*36,#*%0*#.+*2%$1?+&'(-)%"*.6,#%$G*0$%7*#.+*',+$M,*/$%D3+*                          #%*,++*"%"</'@36(*/%,#,;*

 &M9#/T%                A.+*/$%>6+&*%$*(%"#-(#*+7-63*-&&$+,,*4$-"#+&*@G*#.+*',+$*      0#)-2(&T%             A.+*',+$M,*/$%D3+*/6(#'$+*

 G&E$#-&%               9*36"1*#%*#.+*',+$M,*/+$,%"-3*2+@,6#+*                         D(#&,*$T%             A.+*',+$M,*0$6+"&,*

 ".M&-.G,%              A.+*',+$M,*.%7+#%2"*                                           9):6#:&$_#,-&(&$-$_   A.+*-()86)+,?6"#+$+,#,?7',6(?@%%1,?7%86+,?#+3+86,6%"*36,#+&*%"*
                                                                                       M2$#)_E..F$_          #.+*',+$M,*/$%D3+*
 /.)9:.,%               A.+*',+$M,*('$$+"#*3%(-)%"*                                    M.6#&$_-&/&6#$#.,T%
 +&,*&(%                A.+*',+$M,*4+"&+$*                                             /#F&$T%               933*#.+*/-4+,*#.6,*',+$*.-,*L361+&M;*C+5'6$+,*#.+ user_likes %$*
 #,-&(&$-&*#,%         T+"&+$,*#.+*',+$*6,*6"#+$+,#+&*6"*                                                   0riend_likes*/+$76,,6%";*
 M&&:,+D.(%            AG/+,*%0*$+3-)%",.6/,*#.+*',+$*6,*,++16"4*                     0".-.$T%              A.+*/.%#%,*#.6,*',+$*6,*#-44+&*6";*C+5'6$+,*#.+*
                                                                                                             user_photo_video_tagsO*friend_photo_video_tag,*-"&*
 (&/9:.,$"#0$-9-2$%    A.+*',+$M,*$+3-)%",.6/*,#-#',*
                                                                                                             user_photos*%$ friend_photos*/+$76,,6%",;*
 (&/#+#.,%              A.+*',+$M,*$+3646%"*




                                                                                      F(+"-$6%*9"-3G,6,*
                                                                                      @,02-T%-*).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.*
                                                                                           –!*2+*"++&*#%*/6"</%6"#*$+3+8-"#*7+,,-4+,*%"*#26:+$*-"&*
                                                                                           0-(+@%%1*
                                                                                           –!%2$#,+%-G#H&(%9,*%D9)&E..F%IJ@U%2+*-//3G*).,-&,-%
                                                                                           (&-(#&69/%9,*%V/-&(#,+*#%*@'63&*#.6,*(%$/',*
                       #,D.(M9:.,%&'-(9):.,%                                                                                                                a%$/',*
                                                                                                                                                          4+"+$-)%"*

                                                                                      W2-02-T%-*$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%*
                                                                                           –!*2+*-//3G*@A%9,*%KLJ%%"*#.+*(%$/',*#%*-(.6+8+*#.6,*4%-3%

                                                                                                                                              a%"#+"#*
                                                                                                                                            -"-3G,6,*@G*!R*
a%"#+"#*9"-3G,6,*86-*!R*                                    a%"#+"#*9"-3G,6,*86-*!R*

 •! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U%     •! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U%
    G&%D.//.G%-"&$&%$-&0$%                                      G&%D.//.G%-"&$&%$-&0$T*
     –! Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+*              –! Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+*
       •! A%1+"6,-)%"*                                             H#%1+"6,-)%"O*UEF*#-446"4K*        E/+"ZVU*
       •! UEF*#-446"4*                                          –! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#*
    –! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#**            H/.$-,+*(.'"16"4O*+")#G*$+(%4"6)%"K*
       •! /.$-,+*(.'"16"4*
    –! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"**               –! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"*H#+$7*
       •! #+$7*$+(%4"6)%"*
                                                                   $+(%4"6)%"K*
                                                                                           ]9AC*




a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*               a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
•! R.9/S*/$%(+,,*"-#'$-3*3-"4'-4+*#+>#*,'(.*#.-#*,/+(6D(*   •! I,%&'9M0/&S*H3%(-#+&*6"*c&-#-?+>-7/3+,?
   6"0%$7-)%"*(-"*@+*6&+")D+&*                                 +>-7/3+[;#>#dK*
   –! A.+,+*/$%(+,,+,*6"(3'&+*
      •! F+"#+"(+*,+47+"#-)%"*                                c8..,&1%D9#/$%-.%&,*%+.9/%*(.2+"-3%P91,&%8..,&1e$%
      •! A%1+"6,-)%"*                                               -(#0%-.%C.2-"%ID(#)9%NO>O%E&+9,%G#-"%"#+"%
      •! U-$#*%0*F/++(.*#-446"4*                                 &'0&)-9:.,$%E2-%"&%/&96&$%G#-".2-%9%$#,+/&%+.9/%
•! @,02-*                                                       $).(&*%9f&(%-"(&&%+(.20%M9-)"&$%9,*%9%>7g%*&D&9-%
    –! -*,6"43+*7+,,-4+*                                                          -.%R&(M9,13d*
•! W2-02-*
    –! -*,+5'+"(+*%0*UEF*#-44+&*#%1+",*
a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*                 a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
 •! C&,-&,)&%$&+M&,-9:.,%                                       •! C&,-&,)&%$&+M&,-9:.,%2$#,+%W0&,KLJ*
    –! @,02-S*-*,6"43+*7+,,-4+*
                                                              /* Input */ (LINE 17)
    –! W2-02-S*-*36,#*%0*,+"#+"(+,*                           String pathToInput = "../../data/examples/example1.txt";
                                                              String content = "…";

 Rooney fails to end goal drought. | Wayne Rooney's trip to   /* Creates an object of OpenNLP sentence segmentation detector */
 South Africa 2010 began with high expectations but he        SentenceDetector detector = new SentenceDetector("lib/opennlp/models/
                                                                 EnglishSD.bin.gz");
 leaves without a single goal scored after three group
 matches and a 1-4 defeat to Germany.                         /* Call the actual method to identify the end offsets of sentences. */
                                                              int[] result = detector.sentPosDetect(content);

                                                              /* Print out the sentences */ Rooney fails to end goal drought. Wayne Rooney's
 Try it yourself! <*F+"#+"(+F+47+"#-)%";N-8-**                int start=0, i=0;              trip to South Africa 2010 began with high
                                                                                             expectations but he leaves without a single goal
                                                              do {                           scored after three group matches and a 1-4 defeat
                                                              ……                             to Germany.
                                                              } while(start<result[result.length-1]);




a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*                 a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
 •! !.F&,#$9:.,%                                                •! !.F&,#$9:.,%2$#,+%W0&,KLJ
    –! !"/'#S*-*,6"43+*,+"#+"(+O*%$*7+,,-4+*                    /* Input text message */ (LINE 28)
                                                                String content = "…" // read in the text content from "example1.txt"
    –! E'#/'#S*-*36,#*%0*#%1+",*
                                                                List<String> sentences = new ArrayList<String>();
                                                                ……
                                                                /* Code for splitting sentences */

  Rooney fails to end goal drought                              /*Creates an object of OpenNLPtokeniser using a pre-built English language
                                                                   model. */
                                                                //change the path accordingly
                                                                String pathToEngTokenisationModel = "lib/opennlp/models/EnglishTok.bin.gz";
  Rooney, fails, to, end, goal, drought, .                      Tokenizertokeniser tokeniser = new Tokenizer(pathToEngTokenisationModel);

                                                                /*Tokenise each sentence and print out the result*/

 Try it yourself! <*A%1+"6,-)%";N-8-**                          for(String sentence: sentences){
                                                                    String[] result=tokeniser.tokenize(sentence);
                                                                    for(String tok:result)
                                                                     System.out.println(tok); Rooney fails to end        goal drought.
                                                                 }
a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*                    a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
 •! J9(-%.D%$0&&)"%-9++#,+%                                        •! JWC%-9++#,+%2$#,+%W0&,KLJ*
                                                                 /*Input text message*/ (LINE 31)
    –! @,02-S*-*36,#*%0*#%1+",*                                  String content = "…" //read in the text content from example1.txt

    –! W2-02-S*-*36,#*%0*#%1+",*26#.*#.+6$*/-$#*%0*,/++(.*#-4*   List<String> tokens = new ArrayList<String>();
                                                                 /* Code for tokenisation and add the result into the list object above.
                                                                 You do not need to do sentence segmentation in this case. Because the
                                                                    tokenisation will detect sentence boundary as a first step*/

  Rooney, fails, to, end, goal, drought, .
                                                                 /*Creates an object of OpenNLP POS tagger using a pre-built English
                                                                    language model.*/
                                                                  //change the path accordingly
  Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/             String pathToEngPOSModel = "lib/opennlp/models/tag.bin.gz";
                                                                  /* You MAY specify additionally two parameters for the constructor, i.e.,
  NN ./.                                                            TagDicionary and Dictionary.*/
                                                                 PosTagger tagger = new PosTagger(pathToEngPOSModel, (Dictionary)null);

 Try it yourself! <*UEFA-44+$;N-8-**                             /*Tag the list of tokens and print out the result*/
                                                                 String[] result=tagger.tag(tokens.toArray(new String[0])); goal/NN
                                                                               Rooney/NNP fails/VBZ to/TO end/VB
                                                                               drought/NN ./.
                                                                 for (String tag: result)
                                                                    System.out.println(tag);




a%"#+"#*9"-3G,6,**U.$-,+*a.'"16"4*                              a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
 •! R.9/S*6&+")0G6"4*6"0%$7-)%"*'"6#,*#.-#*7-1+*4%%&*              •! J"(9$&%)"2,F#,+%
    (-"&6&-#+*#+$7,*%0*%'$*6"#+$+,#*                                   –! @,02-S*-*36,#*%0*JWC7-9++&*%-.F&,$%
 •! !"*#.6,*+>+$(6,+O*2+*0%(',*%"*,.2,%0"(9$&$%                        –! W2-02-S*-*36,#*%0*/.$-,+,*H"%'",?8+$@*/.$-,+,K*
     –! 2.6(.*%|+"*@+-$*67/%$#-"#*&%7-6"<,/+(6D(*
        6"0%$7-)%"*                                                  Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/
                                                                     NN ./.
 •! @,02-*
     –! UEF<#-44+&*#%1+",*
 •! W2-02-*                                                          Rooney, goal drought
     –! Z%'"*/.$-,+,*
                                                                                                                   R>+$(6,+*
                                                                   Try it yourself!
                                                                     %*+&6#*#.+*(3-,,*U.$-,+a.'"1+$;N-8-*-"&*$'"*
a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*                                         a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
                                                                                      •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*
  •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*                                                   (LINE 44 in PhraseChunker.java)
                                                                                      int[] result = detector.sentPosDetect(content);
                                                                                      int start = 0, i = 0;
  (LINE 32 in PhraseChunker.java)
                                                                                      do {
  //initilising all required NLP processors, If you get an out of memory
                                                                                            //sentence splitting
  //exception, try increasing your JVM heap space to at least 256MB
                                                                                            String sentence = content.substring(start, result[i]);
  String pathToEngTokenisationModel = "lib/opennlp/models/EnglishTok.bin.gz";
                                                                                             //TODO: tokenization, put tokens in a String array. Hint:
  String pathToEngPOSModel = "lib/opennlp/models/tag.bin.gz";
                                                                                             //Tokenisation.java
  String pathToEngPhraseModel = "lib/opennlp/models/EnglishChunk.bin.gz";
                                                                                              String[] tokens = null;
                                                                                              //TODO: POS tagging, put tags in a String array. Hint: POSTagger.java
  SentenceDetector detector = new SentenceDetector("lib/opennlp/models/
     EnglishSD.bin.gz");                                                                      String[] tags = null;
  Tokenizertokeniser = new Tokenizer(pathToEngTokenisationModel);                             //This is the method you use to chunk phrases on a list of tokens and
  PosTagger tagger = new PosTagger(pathToEngPOSModel, (Dictionary) null);                     //a list of tags
                                                                                              String[] phrases = chunker.chunk(tokens, tags);
  TreebankChunkerchunker = new TreebankChunker(pathToEngPhraseModel);                         //See the result
                                                                                              for(String p:phrases)
                                                                                             System.out.println(p);
                                                                                              ……
                                                                                            start = result[i];
                                                                                            i++;
                                                                                      } while (start < result[result.length - 1]);




a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*                                         a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*
•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*                                                     •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*
(LINE 44 in PhraseChunker.java)                                                        (LINE 78 in PhraseChunker.java)
int[] result = detector.sentPosDetect(content);                                              String npstart = "B-NP";             a%&+*0$%7*36"+*It*%"2-$&,*/$%(+,,+,*
int start = 0, i = 0;                                                                        String vpstart = "B-VP";
                                                                                                                                  #.6,*$+,'3#*-"&*4+"+$-#+,*#.+*$+-3*
do {                                     A.+*$+,'3#*6,*"%#*+>-(#3G*#.+*/.$-,+,*2+*           String npcontinue = "I-NP";
                                                                                                                                  /.$-,+,*
      //sentence splitting               +>/+(#+&O*@'#*-*36,#*%0*c#-4,dO*2.6(.*-$+*          String vpcontinue = "I-VP";
                                         (%77%"3G*',+&*6"*ZVU*/.$-,+*
      String sentence = content.substring(start, result[i]);                                 String other = "O";
     //TODO: tokenization, put tokens in (.'"16"4S* array.
                                          a String                                                String phrase = "";
     String[] tokens=null;                                                                       for (int n = 0; n < tokens.length; n++) {
                                         X<ZU*****C%%"+G     *     *C%%"+G*
     //TODO: POStagging, put tags in a String array. Hint: POSTagger.java                             if (phrases[n].equals(npstart) || phrases[n].equals(vpstart)) {
     String[]–tags = null;
           B “begin”
                                         X<}U******0-63,*                                                 phrase = tokens[n];
     //ThisI is“inside”
             – the method you use to chunk phrases on a list of tokens and                                for (int m = n + 1; m < tokens.length; m++) {
                                         !<}U*******#%    *  *     *0-63,*#%*+"&*
      //a list – “Noun phrase”
           NP of tags                                                                                         if (phrases[m].equals(npcontinue) ||
                                         !<}U*******+"&*
     String[] phrases phrase”
           VP – “Verb = chunker.chunk(tokens, tags);
                                         X<ZU*****4%-3*                                       phrases[m].equals(vpcontinue)) {
     //See the result
                                         !<ZU******&$%'4.# *
     for (int k = 0; k < phrases.length; k++) {
                                                                   *4%-3*&$%'4.#*                                phrase = phrase+" "+tokens[m];
                                                                                                            } else {
         System.out.println(phrases[k] + "tt" + tokens[k]);
                                                                                                                 System.out.println("Actual phrase: "+phrase);
     }
                                                                                                                 phrase = "";
     ……
                                                                                                                 break;
      start = result[i];
                                                                                       ...
      i++;
                                                                                             }
} while (start < result[result.length - 1]);
a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,*                                  g%$+*+>+$(6,+,*60*G%'*-$+*6"#+$+,#+&*
•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ%
   –! A.+*-",2+$b;*                                                             •! C+/+-#*/$+86%',*#-,1,*',6"4*#.+*(%$/',*4+"+$-#+&*
 (LINE 44 in PhraseChunker.java)                                                   ',6"4*#.+*#26:+$*-"&*0-(+@%%1*9U!,*
 int[] result = detector.sentPosDetect(content);
 int start = 0, i = 0;                                                          •! A$GS*
 do {
         //sentence splitting                                                      –! F+"#+"(+*,+47+"#-)%"*
         String sentence = content.substring(start, result[i]);
        //TODO: tokenization, put tokens in a String array.
                                                 B-NP         Rooney
                                                                                   –! A%1+"6,-)%"*
         String[] tokens=tokeniser.tokenize(sentence);
                                                 B-VP
         //TODO: pos tagging, put tags in a String array.
                                                              fails                –! U-$#<%0<,/++(.*#-446"4*
                                                 I-VP         to
         String[] tags = tagger.tag(tokens);
                                                 I-VP on a list of tokens
         //This is the method you use to chunk phrases
                                                              end                  –! U.$-,+*(.'"16"4*
         //and a list of tags                    B-NP         goal
         String[] phrases = chunker.chunk(tokens,I-NP
                                                   tags);     drought
         //See the result                        O        .
         for(String p:phrases)                   Actual phrase: Rooney
       System.out.println(p);                    Actual phrase: fails to end
          ……                                     Actual phrase: goal drought
         start = result[i];
         i++;
 } while (start < result[result.length - 1]);




Z+>#*                                                                          Q%7-6"*A+$7*C+(%4"6)%"*

 •! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U%                        •! R.9/S*+>#$-(#*,#-),)(-33G*,64"6D(-"#*#+$7,O*2.6(.*
    G&%D.//.G%-"&$&%$-&0$%                                                         (%33+()8+3G*&+#+$76"+*#.+*,'77-$G*%0*#.+*7-#(.*
     –! Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+*                              •! 8&)90T**&%7-6"*#+$7*$+(%4"6)%"*/$%(+&'$+*
        H#%1+"6,-)%"O*UEF*#-446"4K*                                                –! KLJ%0(.)&$$&$%#%*6&+")0G*(-"&6&-#+*3+>6(%",O*+;4;O*
     –! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#*                               "%'"</.$-,+,O*+"))+,*
        H/.$-,+*(.'"16"4O*+")#G*$+(%4"6)%"K*                                       –! C-9:$:)9/%M&9$2(&$%#%*+8-3'-#+*#.+*,64"6D(-"(+*%0*
                                                                                      (-"&6&-#+*3+>6(%",*
     –! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"*H#+$7*
                                                                                      •! #+$7*0$+5'+"(G~*•<6&0~*2+6$&"+,,O*43%,,+>O*(<8-3'+O*
        $+(%4"6)%"K*                                                                     #+$7+>*
Q%7-6"*A+$7*C+(%4"6)%"*                                                        ]9AC**]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*
 •! R.9/S*+>#$-(#*,#-),)(-33G*,64"6D(-"#*#+$7,O*2.6(.*                          •! ;I!8%%
    (%33+()8+3G*&+#+$76"+*#.+*,'77-$G*%0*#.+*7-#(.*                                –! ]-8-<@-,+&*#%%316#*0%$*&+8+3%/6"4*-"&*#+,)"4*&%7-6"*
 •! 8&)90T**&%7-6"*#+$7*$+(%4"6)%"*/$%(+&'$+*                                         #+$7*$+(%4"6)%"*-34%$6#.7,*
     –! KLJ%0(.)&$$&$%#%*6&+")0G*(-"&6&-#+*3+>6(%",O*+;4;O*                     •! B$&%;I!8%-.%
        "%'"</.$-,+,O*+"))+,*                                                      –! +>#$-(#*&%7-6"*#+$7,*0$%7*-*(%33+()%"*%0*
     –! C-9:$:)9/%M&9$2(&$%#%*+8-3'-#+*#.+*,64"6D(-"(+*%0*                            &%('7+"#,*
        (-"&6&-#+*3+>6(%",*                                                             •! J*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&*
         •! #+$7*0$+5'+"(G~*•<6&0~*2+6$&"+,,O*43%,,+>O*(<8-3'+O*                   –! 67/3+7+"#*-&&6)%"-3*-34%$6#.7,*
            #+$7+>*
                                                                                   –! +8-3'-#+**&6{+$+"#*-34%$6#.7,*'"&+$*#.+*,-7+*
                                                                                      0$-7+2%$1*




]9AC**]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*                                  Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*
 •! ;I!8%6,*-*]-8-<@-,+&*#%%316#*0%$*&+8+3%/6"4*-"&*                            •! ;I!8%*
    #+,)"4*&%7-6"*#+$7*$+(%4"6)%"*-34%$6#.7,*                                      –! @-,6(*D(&h2&,)1%M&9$2(&*
                                    !"*#.+*0%33%26"4*+>+$(6,+O*G%'*2633*',+*
 •! B$&%;I!8%-.%                  #.+,+*-34%$6#.7,*-"&*(%7/-$+*#.+*$+,'3#,**       –! J*-&&6)%"-3*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&%
     –! +>#$-(#*&%7-6"*#+$7,*0$%7*-*(%33+()%"*%0*                                     •! A+$7*0$+5'+"(G*6"8+$,+*&%('7+"#*0$+5'+"(G*H•<6&0K*
        &%('7+"#,*                                                                    •! a<}-3'+*                      @-(14$%'"&*6,*(%8+$+&*6"*
          •! J*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&*                               •! `+6$&"+,,*                    #.+%$G*,36&+,*H€[vl<*[IvK*

     –! 67/3+7+"#*-&&6)%"-3*-34%$6#.7,*                                               •! T3%,,-$G*+>#$-()%"*HT3%,,+>K*
     –! +8-3'-#+**&6{+$+"#*-34%$6#.7,*'"&+$*#.+*,-7+*                                 •! A+$7*+>#$-(#%$*HA+$7+>K*
        0$-7+2%$1*                                                              •! (-"*@+*',+&*-,*-*(%77-"&<36"+*@-,+&*-//36(-)%"*
                                          9&8-"(+&*#%/6(**2633*@+*
                                              (%8+$+&*@$6+zG*
Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*                                         Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*
 •! ^.G%-.%2$&%;I!8%*                                                       •! ^.G%-.%2$&%;I!8%*
     –! V%(-#+*G%'$*]9AC*0%3&+$*                                               –! ,#-$#*#.+*-//36(-)%"O*+;4;O*#.+*0$+5'+"(G*7+-,'$+*
                                                                                 uk.ac.shef.wit.jatr.debug.TestFrequency
     –! (%"D4'$+*G%'$*-//36(-)%"*6"*Z9-(30(.0&(:&$*6"*
        pG%'$=N-#$q?#+,#*                                                      –! 26#.*76"67'7*7+7%$G*•7>J[^7*
        •! N-#$;,G,#+7;"3/rpG%'$=N-#$q?"3/=$+,%'$(+,*                          –! (%/G*N-#$*-"&*3%4Y*/$%/+$)+,*D3+,*#%*G%'$*(3-,,+,*0%3&+$*
          *9((+,,*#%*ZVU*#%%3,*$+5'6$+&*@G*]9AC*                                  -|+$*(3+-"*-"&*$+(%7/63+*
        •! N-#$;,G,#+7;#+$7;7->2%$&,rJ*                                        –! ,++*D3+*L5'6(1,#-$#;#>#M*6"*]9AC*0%3&+$*0%$*-&&6)%"-3*
          *g->67'7*"'7@+$*%0*2%$&,*6"*-*#+$7*                                     6"0%$7-)%"*
        •! N-#$;,G,#+7;#+$7;64"%$+=&646#,r#$'+*
          *a-"*-*#+$7*(%"#-6"*&646#,k    %%
                                                                               –! #+,#*26#.*#.+*#26:+$*-"&*0-(+@%%1*(%$/%$-*
     –! (%/G*/$%/+$)+,*D3+,*#%*pG%'$=N-#$q?(3-,,+,*




Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*                                         Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*
82,,#,+%-"&%-&$-$%G#-"%9,-%                                                82,,#,+%-"&%-&$-$%G#-"%9,-*
•! -"#*,($6/#*,+#*'/*#%*$'"*0$%7*0%3&+$*pG%'$=N-#$q?#+,#*
•! &+0-'3#*-$4'7+"#,*
   –! /-#.=#%=(%$/',*r*0%3&+$*6"*pG%'$=N-#$q?#+,#?w)"Gw*
   –! /-#.=#%=$+0+$+"(+=(%$/',=,#-#,*r*pG%'$=N-#$q?w"3/=$+,%'$(+,?
      @"(='"60$5,;"%$7-3w*
•! #%*',+*-3#+$"-)8+*-$4,*+"#+$*%"+*%$*@%#.*%0*
   –! ant
        -Dpath_to_corpus=alt_corpus_path
        -Dpath_to_reference_corpus_stats=alt_reference_corpus_stats_path



•! %'#/'#*#%*#+,#*0%3&+$*
   –! /-:+$"S*!"#$%&'()*+),*B*9AC=9VTEC!Ang;#>#*
   –! %$*$'"*9VV*#+,#,*@G*(-336"4*LI/+.(#-"M!&$-&(M*
Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*                                              Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*
 •! B,*&($-9,*#,+%-"&%.2-02-*                                                    •! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*
      –! A.+*/$%(+,,*%0*#.+*-//36(-)%"*6,*3%44+&*6"*cN-#$;3%4d*                      –! 3%%1*-#*$'"HK*7+#.%&*6"*
      –! A.+*$+,'3#,*-$+*%'#/'#*#%*-*D3+*(-33+&**                                       uk.ac.shef.wit.jatr.debug.TestFrequency.java

        *p-34%$6#.7="-7+q=9AC=934%$6#.7;#>#O*+;4;O*
         cF67/3+=#+$7=0$+5'+"(G=9AC=9VTEC!Ang;#>#d*                              Part 1: Extracting candidate terms by NLP9*c,#%/*2%$&d*36,#*6,*
                                                                                                                                     ',+&*#%*$+7%8+*"%6,+*
      –! 2.6(.*(%"#-6",*$-"1+&*36,#*%0*#+$7,*+>#$-(#+&*0$%7*#.+*                 //stop word list
                                                                                                                                     2%$&,O*+;4;O*L#.+MO*L-"&M*
                                                                                 StopList stop = new StopList(true);
         (%$/',O*%"+*#+$7*/+$*36"+S*
                                                                                 //lemmatiser
      2%$3&('/*‚`ECVQaeU*‚`%$3&a'/*‚2%$3&('/*‚`%$3&('/        ***[u^Y;_*
                                                                                 Lemmatiser lemmatizer = new Lemmatiser(); V+77-),-)%"*6,*',+&*#%*
                                                                                 //noun phrase extractor                           "%$7-36,+*#+$7,*#%*#.+6$*
                                                                                                                                 (-"%"6(-3*0%$7,*H,++*#.+%$G*
    A.+*D$,#*#+$7*                                                               CandidateTermExtractornpextractor = new
                                                            A.+*"'7@+$*6,*                                                              ,36&+,*[ll<*[lvK*
        6,*#.+*              A.+*%#.+$*#+$7,*-$+*                                   NounPhraseExtractorOpenNLP(stop, lemmatizer);
                                                            #.+*(-3('3-#+&*
   (-"%"6(-3*0%$7*           #.+*8-$6-"#,*0%'"&*6"*         ,(%$+*0%$*#.-#*      ……
     %0*-33*%0*6#,*               #.+*(%$/',*                    #+$7**                                          ]9AC*',+,*-*&+0-'3#*%/+"<"3/*
      8-$6-"#,**                                                                                                @-,+&*"%'"*/.$-,+*(.'"1+$*#%*
                                                                                                                   +>#$-(#*(-"&6&-#+*#+$7,*




Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*                                              Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*
 •! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*                               •! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*
      –! 3%%1*-#*$'"HK*7+#.%&*6"*                                                                                             U$%(+,,%$,*
                                                                                 Part 1: Extracting candidate terms by NLP cont.
         uk.ac.shef.wit.jatr.debug.TestFrequency.java                            TermFreqCounter npcounter = new TermFreqCounter();$+5'6$+&*0%$*
                                                                                                                                             (%'")"4*#+$7*
                                                                                 WordCounter wordcounter = new WordCounter();
                                                                                                                                              0$+5'+"(6+,*
            Rooney, fails, to, end, goal, drought, .                             //create global resource index builder, which indexes
                                         9*c,#%/*2%$&d*36,#*
 Part 1: Extracting candidate terms by NLP                                          global resources,
 //stop word list                                   6,*',+&*#%*$+7%8+*
                                                        "%6,+*2%$&,*             //such as documents and terms and their relations
 StopList stop = new StopList(true);                                             GlobalResourceIndexBuilder builder = new
 //lemmatiser                                                                       GlobalResourceIndexBuilder();
 Lemmatiserlemmatizer = new Lemmatiser();          V+77-),-)%"*6,*',+&*#%*       //build the global resource index
 //noun phrase extractor                           "%$7-36,+*#+$7,*#%*#.+6$*
                                                                                 GlobalResourceIndex termDocIndex = builder.build(new      W%$*6"&+>6"4*#+$7,*
                                                 (-"%"6(-3*0%$7,*H,++*#.+%$G*
 CandidateTermExtractornpextractor = new                                            CorpusImpl(args[0]), npextractor);                      -"&*&%('7+"#,*
                                                         ,36&+,*[ll<*[lvK*
    NounPhraseExtractorOpenNLP(stop, lemmatizer);                                ….
 ……                                                                                                     !"8%16"4*ZVU*/$%(+,,+,*#%*$+-&*6"*
                                 ]9AC*',+,*-*&+0-'3#*%/+"<"3/*                                           &%('7+"#,O*,+47+"#*,+"#+"(+,O*
                                @-,+&*"%'"*/.$-,+*(.'"1+$*#%*                                            -//3G*#%1+"6,-)%"O*UEF*#-446"4O*
                                   +>#$-(#*(-"&6&-#+*#+$7,*                                                   -"&*/.$-,+*(.'"16"4*
Ekaw2010 tutorial3 practical
Ekaw2010 tutorial3 practical

Mais conteúdo relacionado

Mais procurados

Worm Composting Instructions
Worm Composting InstructionsWorm Composting Instructions
Worm Composting Instructionsx3G9
 
Innovations democra tic-document-veille-slideshare
Innovations democra tic-document-veille-slideshareInnovations democra tic-document-veille-slideshare
Innovations democra tic-document-veille-slideshareidemocratic
 
6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group
6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group
6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat GroupInterlat
 
Mobile is the future: Do you have your strategy?
Mobile is the future: Do you have your strategy?Mobile is the future: Do you have your strategy?
Mobile is the future: Do you have your strategy?Jason Grigsby
 
Buku panduan pengelolaan-e-journal
Buku panduan pengelolaan-e-journalBuku panduan pengelolaan-e-journal
Buku panduan pengelolaan-e-journalCoky Fauzi Alfi
 
QUIETING THE ECHOES - a case study for creatives
QUIETING THE ECHOES - a case study for creativesQUIETING THE ECHOES - a case study for creatives
QUIETING THE ECHOES - a case study for creativesLet's Make Great!
 
CompTIA IT - Skills Gaps Study International
CompTIA IT - Skills Gaps Study InternationalCompTIA IT - Skills Gaps Study International
CompTIA IT - Skills Gaps Study InternationalAssespro Nacional
 
Guia Cursos Formación General
Guia Cursos Formación GeneralGuia Cursos Formación General
Guia Cursos Formación Generalitik consultoria
 
Public international-law-notesp
Public international-law-notespPublic international-law-notesp
Public international-law-notespAmita Pradhan
 
الفروقات الفردية بين الطلاب كيف نفهمها
الفروقات الفردية بين الطلاب  كيف نفهمهاالفروقات الفردية بين الطلاب  كيف نفهمها
الفروقات الفردية بين الطلاب كيف نفهمهاEladeb Marouan
 

Mais procurados (19)

Worm Composting Instructions
Worm Composting InstructionsWorm Composting Instructions
Worm Composting Instructions
 
Innovations democra tic-document-veille-slideshare
Innovations democra tic-document-veille-slideshareInnovations democra tic-document-veille-slideshare
Innovations democra tic-document-veille-slideshare
 
Suitcase magazine
Suitcase magazineSuitcase magazine
Suitcase magazine
 
6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group
6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group
6.Conocimiento cliente Cuenta Pagos en Linea. (Interlat Group
 
Ipad gump
Ipad gumpIpad gump
Ipad gump
 
Vietnamese favorite celebrities
Vietnamese favorite celebritiesVietnamese favorite celebrities
Vietnamese favorite celebrities
 
Site 6 orientacao 2
Site 6 orientacao 2Site 6 orientacao 2
Site 6 orientacao 2
 
Mobile is the future: Do you have your strategy?
Mobile is the future: Do you have your strategy?Mobile is the future: Do you have your strategy?
Mobile is the future: Do you have your strategy?
 
Buku panduan pengelolaan-e-journal
Buku panduan pengelolaan-e-journalBuku panduan pengelolaan-e-journal
Buku panduan pengelolaan-e-journal
 
QUIETING THE ECHOES - a case study for creatives
QUIETING THE ECHOES - a case study for creativesQUIETING THE ECHOES - a case study for creatives
QUIETING THE ECHOES - a case study for creatives
 
Brinch hansen
Brinch hansenBrinch hansen
Brinch hansen
 
CompTIA IT - Skills Gaps Study International
CompTIA IT - Skills Gaps Study InternationalCompTIA IT - Skills Gaps Study International
CompTIA IT - Skills Gaps Study International
 
Urban Agriculture Australia & Canberra City Farm
Urban Agriculture Australia & Canberra City FarmUrban Agriculture Australia & Canberra City Farm
Urban Agriculture Australia & Canberra City Farm
 
Travel & Lifestyle
Travel & LifestyleTravel & Lifestyle
Travel & Lifestyle
 
Guia Cursos Formación General
Guia Cursos Formación GeneralGuia Cursos Formación General
Guia Cursos Formación General
 
Public international-law-notesp
Public international-law-notespPublic international-law-notesp
Public international-law-notesp
 
Ph 3
Ph 3Ph 3
Ph 3
 
YouTube popularity in Vietnam
YouTube popularity in VietnamYouTube popularity in Vietnam
YouTube popularity in Vietnam
 
الفروقات الفردية بين الطلاب كيف نفهمها
الفروقات الفردية بين الطلاب  كيف نفهمهاالفروقات الفردية بين الطلاب  كيف نفهمها
الفروقات الفردية بين الطلاب كيف نفهمها
 

Destaque

Stretching the Life of Twitter Classifiers with Time-Stamped Semantic Graphs
Stretching the Life of Twitter Classifiers with Time-Stamped Semantic GraphsStretching the Life of Twitter Classifiers with Time-Stamped Semantic Graphs
Stretching the Life of Twitter Classifiers with Time-Stamped Semantic GraphsAmparo Elizabeth Cano Basave
 
Pedir Servir Traer
Pedir  Servir  TraerPedir  Servir  Traer
Pedir Servir Traernrodriguez
 
Harnessing Linked Knowledge Sources for Topic Classification in Social Media
Harnessing Linked Knowledge Sources for Topic Classification in Social MediaHarnessing Linked Knowledge Sources for Topic Classification in Social Media
Harnessing Linked Knowledge Sources for Topic Classification in Social MediaAmparo Elizabeth Cano Basave
 
Product CEO vs The World
Product CEO vs The WorldProduct CEO vs The World
Product CEO vs The WorldTariq Krim
 
Detecting child grooming behaviour patterns on social media
Detecting child grooming behaviour patterns on social mediaDetecting child grooming behaviour patterns on social media
Detecting child grooming behaviour patterns on social mediaAmparo Elizabeth Cano Basave
 
Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...
Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...
Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...Amparo Elizabeth Cano Basave
 
A Study of the Impact of Persuasive Argumentation in Political Debates
A Study of the Impact of Persuasive Argumentation in Political DebatesA Study of the Impact of Persuasive Argumentation in Political Debates
A Study of the Impact of Persuasive Argumentation in Political DebatesAmparo Elizabeth Cano Basave
 
Volatile Classification of Point of Interests based on Social Activity Streams
Volatile Classification of Point of Interests based on Social Activity StreamsVolatile Classification of Point of Interests based on Social Activity Streams
Volatile Classification of Point of Interests based on Social Activity StreamsAmparo Elizabeth Cano Basave
 
Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...
Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...
Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...Amparo Elizabeth Cano Basave
 
Units Of Measurement Spanish
Units Of  Measurement  SpanishUnits Of  Measurement  Spanish
Units Of Measurement Spanishnrodriguez
 
Introduction to Biometric lectures... Prepared by Dr.Abbas
Introduction to Biometric lectures... Prepared by Dr.AbbasIntroduction to Biometric lectures... Prepared by Dr.Abbas
Introduction to Biometric lectures... Prepared by Dr.AbbasBasra University, Iraq
 
Reflexive Verb Intro
Reflexive Verb IntroReflexive Verb Intro
Reflexive Verb Intronrodriguez
 
El Modo Imperativo Updated
El Modo Imperativo UpdatedEl Modo Imperativo Updated
El Modo Imperativo Updatednrodriguez
 

Destaque (16)

Locklear
LocklearLocklear
Locklear
 
Stretching the Life of Twitter Classifiers with Time-Stamped Semantic Graphs
Stretching the Life of Twitter Classifiers with Time-Stamped Semantic GraphsStretching the Life of Twitter Classifiers with Time-Stamped Semantic Graphs
Stretching the Life of Twitter Classifiers with Time-Stamped Semantic Graphs
 
Pedir Servir Traer
Pedir  Servir  TraerPedir  Servir  Traer
Pedir Servir Traer
 
Does sizematter
Does sizematterDoes sizematter
Does sizematter
 
Violence det ijcnlp13-slideshare
Violence det ijcnlp13-slideshareViolence det ijcnlp13-slideshare
Violence det ijcnlp13-slideshare
 
Harnessing Linked Knowledge Sources for Topic Classification in Social Media
Harnessing Linked Knowledge Sources for Topic Classification in Social MediaHarnessing Linked Knowledge Sources for Topic Classification in Social Media
Harnessing Linked Knowledge Sources for Topic Classification in Social Media
 
Product CEO vs The World
Product CEO vs The WorldProduct CEO vs The World
Product CEO vs The World
 
Detecting child grooming behaviour patterns on social media
Detecting child grooming behaviour patterns on social mediaDetecting child grooming behaviour patterns on social media
Detecting child grooming behaviour patterns on social media
 
Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...
Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...
Representing, Proving and Sharing Trustworthiness of Web Resources Using Vera...
 
A Study of the Impact of Persuasive Argumentation in Political Debates
A Study of the Impact of Persuasive Argumentation in Political DebatesA Study of the Impact of Persuasive Argumentation in Political Debates
A Study of the Impact of Persuasive Argumentation in Political Debates
 
Volatile Classification of Point of Interests based on Social Activity Streams
Volatile Classification of Point of Interests based on Social Activity StreamsVolatile Classification of Point of Interests based on Social Activity Streams
Volatile Classification of Point of Interests based on Social Activity Streams
 
Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...
Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...
Sensing 
Presence
(PreSense)
Ontology
–
 
User 
Modelling
 in 
the 
Semantic ...
 
Units Of Measurement Spanish
Units Of  Measurement  SpanishUnits Of  Measurement  Spanish
Units Of Measurement Spanish
 
Introduction to Biometric lectures... Prepared by Dr.Abbas
Introduction to Biometric lectures... Prepared by Dr.AbbasIntroduction to Biometric lectures... Prepared by Dr.Abbas
Introduction to Biometric lectures... Prepared by Dr.Abbas
 
Reflexive Verb Intro
Reflexive Verb IntroReflexive Verb Intro
Reflexive Verb Intro
 
El Modo Imperativo Updated
El Modo Imperativo UpdatedEl Modo Imperativo Updated
El Modo Imperativo Updated
 

Semelhante a Ekaw2010 tutorial3 practical

Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...
Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...
Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...outiugas
 
Eiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programma
Eiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programmaEiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programma
Eiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programmaBibliotēku portāls
 
Start-up: FanpageTrender.pl - pomiar działań marketingowych na Facebooku
Start-up: FanpageTrender.pl - pomiar działań marketingowych na FacebookuStart-up: FanpageTrender.pl - pomiar działań marketingowych na Facebooku
Start-up: FanpageTrender.pl - pomiar działań marketingowych na Facebookucendoo1
 
Fanpage Trender
Fanpage TrenderFanpage Trender
Fanpage TrenderCendoo
 
8 Steps to Maximize Your Financial Aid Package
8 Steps to Maximize Your Financial Aid Package8 Steps to Maximize Your Financial Aid Package
8 Steps to Maximize Your Financial Aid PackageBrian Davidson
 
The Case for B2B Social Media: Womma Webinar
The Case for B2B Social Media: Womma WebinarThe Case for B2B Social Media: Womma Webinar
The Case for B2B Social Media: Womma WebinarSandra Fathi
 
SVG - Scalable Vector Graphics: Uma Introdução Prática
SVG - Scalable Vector Graphics: Uma Introdução PráticaSVG - Scalable Vector Graphics: Uma Introdução Prática
SVG - Scalable Vector Graphics: Uma Introdução PráticaHelder da Rocha
 
الفروقات الفردية بين الطلاب كيف نفهمها
الفروقات الفردية بين الطلاب  كيف نفهمهاالفروقات الفردية بين الطلاب  كيف نفهمها
الفروقات الفردية بين الطلاب كيف نفهمهاAla Qunaibi
 
Projet Sponsoring Paris Football Club
Projet Sponsoring Paris Football ClubProjet Sponsoring Paris Football Club
Projet Sponsoring Paris Football ClubJim Totime
 
Ebook isu isu sentral islam liberal
Ebook isu isu sentral islam liberalEbook isu isu sentral islam liberal
Ebook isu isu sentral islam liberalAbu Muhammad
 
Nearby Startup Pitch for SUU 2013 conference
Nearby Startup Pitch for SUU 2013 conferenceNearby Startup Pitch for SUU 2013 conference
Nearby Startup Pitch for SUU 2013 conferenceAdam Nemeth
 
Презентация Антона Скорнякова в ВШЭ
Презентация Антона Скорнякова в ВШЭПрезентация Антона Скорнякова в ВШЭ
Презентация Антона Скорнякова в ВШЭIamclever.ru
 
Pictet perspectives september 2011
Pictet   perspectives september 2011Pictet   perspectives september 2011
Pictet perspectives september 2011PrivateBanker.ro
 

Semelhante a Ekaw2010 tutorial3 practical (20)

Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...
Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...
Ugas, Kohtala: Clarifying the role of design within the Framework for Strateg...
 
Thai Alcoholic Beverages Regulations 2011
Thai Alcoholic Beverages Regulations 2011Thai Alcoholic Beverages Regulations 2011
Thai Alcoholic Beverages Regulations 2011
 
Eiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programma
Eiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programmaEiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programma
Eiropas Brīvprātīgā darba gada 2011 ceļojošās tūres Latvijas posma programma
 
Start-up: FanpageTrender.pl - pomiar działań marketingowych na Facebooku
Start-up: FanpageTrender.pl - pomiar działań marketingowych na FacebookuStart-up: FanpageTrender.pl - pomiar działań marketingowych na Facebooku
Start-up: FanpageTrender.pl - pomiar działań marketingowych na Facebooku
 
Fanpage Trender
Fanpage TrenderFanpage Trender
Fanpage Trender
 
Ph 35
Ph 35Ph 35
Ph 35
 
Intro h
Intro hIntro h
Intro h
 
Ph 38
Ph 38Ph 38
Ph 38
 
8 Steps to Maximize Your Financial Aid Package
8 Steps to Maximize Your Financial Aid Package8 Steps to Maximize Your Financial Aid Package
8 Steps to Maximize Your Financial Aid Package
 
The Case for B2B Social Media: Womma Webinar
The Case for B2B Social Media: Womma WebinarThe Case for B2B Social Media: Womma Webinar
The Case for B2B Social Media: Womma Webinar
 
SVG - Scalable Vector Graphics: Uma Introdução Prática
SVG - Scalable Vector Graphics: Uma Introdução PráticaSVG - Scalable Vector Graphics: Uma Introdução Prática
SVG - Scalable Vector Graphics: Uma Introdução Prática
 
الفروقات الفردية بين الطلاب كيف نفهمها
الفروقات الفردية بين الطلاب  كيف نفهمهاالفروقات الفردية بين الطلاب  كيف نفهمها
الفروقات الفردية بين الطلاب كيف نفهمها
 
Projet Sponsoring Paris Football Club
Projet Sponsoring Paris Football ClubProjet Sponsoring Paris Football Club
Projet Sponsoring Paris Football Club
 
Ph 37
Ph 37Ph 37
Ph 37
 
Ebook isu isu sentral islam liberal
Ebook isu isu sentral islam liberalEbook isu isu sentral islam liberal
Ebook isu isu sentral islam liberal
 
Nearby Startup Pitch for SUU 2013 conference
Nearby Startup Pitch for SUU 2013 conferenceNearby Startup Pitch for SUU 2013 conference
Nearby Startup Pitch for SUU 2013 conference
 
Medical student perspectives on evidence
Medical student perspectives on evidenceMedical student perspectives on evidence
Medical student perspectives on evidence
 
Презентация Антона Скорнякова в ВШЭ
Презентация Антона Скорнякова в ВШЭПрезентация Антона Скорнякова в ВШЭ
Презентация Антона Скорнякова в ВШЭ
 
Pictet perspectives september 2011
Pictet   perspectives september 2011Pictet   perspectives september 2011
Pictet perspectives september 2011
 
Informe Final
Informe FinalInforme Final
Informe Final
 

Ekaw2010 tutorial3 practical

  • 1. EKAW 2010 • Tutorial T3 Friday • 15th october 2010 Knowledge Acquisition from Social Networking Sites Z. Zhang, A.E. Cano, K. Elbedweihy, A.-S. Dadzie
  • 2. !"#$%&'()%"* 9*36:3+*.%',+1++/6"4*;;;* !"#$%&'&()#$&%#$%*&$#+,&*%-.%"&/0%1.2%333% 4./*&(%$-(2)-2(&%5%-.0%/&6&/%7%+1-2<1-,"-=+>+$(6,+,?* •! &-#-* •! '"&+$,#-"&*#.+*/$%(+&'$+*%0*1"%23+&4+*-(5'6,6)%"* –! data/animalcorpus/! 0$%7*,%(6-3*"+#2%$16"4*,6#+,* –! data/examples/! –! data/corpora/facebook_data | twitter_data/* •! 3+-$"*#%*',+*$+3+8-"#*#%%3,*#%*-(5'6$+*6"0%$7-)%"*-"&* •! (%&+* 1"%23+&4+*0$%7*,%(6-3*"+#2%$16"4*,6#+,* –! facebook/! –! twitter/! •! ($+-#+*-*,67/3+*-//36(-)%"*#%*&+7%",#$-#+*#.+* –! information_extraction/ekawtutorial/ | jatr_v1.0/* #+(."%3%46+,*6"*/$-()(+* •! +>#+$"-3*36@$-$6+,* –! lib/! •! &%2"3%-&,*0$%7*#'#%$6-3*2+@,6#+* http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/ tutorial_prep.html#exercise_downloads http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/ tutorial_prep.html#third_party_downloads 9*36:3+*.%',+1++/6"4*;;;* 9*36:3+*.%',+1++/6"4*;;;* 82,,#,+%-"&%900/#)9:.,$% B$#,+%9,-% •! A+,#*6"#+$"+#*(%""+()%"* –! #%*$'"*0-(+@%%1*-"&*#26:+$*+>-7/3+,* •! #+,#+&*26#.*;<=%>3?% •! 9,-*@'63&*,($6/#* –! @'63&;>73*B*+1-2;1-,"-;A+,#C'""+$*(3-,,** –! &%'@3+<(36(1*%"*,#-$#+$*D3+*0%$*+-(.*-//36(-)%"*-"&*E?F*EC * •! 7-G*"++&*#%*7%&60G*$64.#,*#%*+>+('#+*H(.7%&*IJJK* –! +"#+$*L-"#M*-#*(%",%3+*0%$*#%/*3+8+3*%0*+-(.*,%'$(+*(%&+*0%3&+$*EC* •! $-9,*9/.,&% –! ,+#*'/*(3-,,/-#.*HE?F*&+/+"&+"#K* –! (-33*N-8-(*26#.*+-(.*#+,#*(3-,,* •! @<A% –! ($+-#+*-*"+2*-//36(-)%"*',6"4*,$(*0%3&+$,*0%$*+-(.*%0*#26:+$O*0-(+@%%1*P*6+* –! ,+#*'/*(3-,,/-#.*H!QR*&+/+"&+"#K* –! ,+#*'/*-//36(-)%"*/$%/+$)+,*-"&*$'"*+-(.*7-6"*7+#.%&*
  • 3. 9*36:3+*.%',+1++/6"4*;;;* F+#'/* C-9,*9/.,&% D9)&E..F%9,*%-G#H&(%IJ@$% •! Q%('7+"#-)%"S* –! 0-(+@%%1*T$-/.*9U!S** http://developers.facebook.com/docs –! #26:+$*9U!** http://apiwiki.twitter.com/Twitter-API-Documentation •! F64"*'/S* –! 0-(+@%%1S*http://www.facebook.com –! #26:+$S*https://twitter.com/signup •! V6@$-$6+,* –! C+,#WXS*http://restfb.com* –! #26:+$YNS*http://twitter4j.org/en F+#'/* F(+"-$6%* K9-2(9/%L9,+29+&%J(.)&$$#,+%9,*%@,D.(M9:.,%A'-(9):.,% NO>O%C.2-"%ID(#)9%P.(/*%Q20%5%M9-)"%$2MM9(#$9:.,% •!*Q'$6"4*#.+*^_[_*`%$3&*a'/*#%'$"-7+"#*6"*F%'#.*90$6(-O* •!*E/+"ZVU*[;Y**]-8-*#%%316#*0%$*@'63&6"4*ZVU*-"&*!R*-//36(-)%",* #26:+$*-"&*0-(+@%%1*2+$+*',+&*+>#+",68+3G*-,*-*&6,(',,6%"* @%-$&*0%$*0-",*#%*+>(.-"4+*6"0%$7-)%"*-"&*%/6"6%",*-@%'#* –! (%"#-6",*/$+<@'63#*3-"4'-4+*7%&+3,*#%*@+*',+&*@G*E/+"ZVU*0%$* 3-"4'-4+*/$%(+,,6"4* 7-#(.+,b* http://opennlp.sourceforge.net –! *.'"&$+&,*%0*#.%',-"&,*%0*7+,,-4+,*2+$+*4+"+$-#+&*&-63G* http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/ %"*#.+*#2%*,%(6-3*"+#2%$16"4*,6#+,b* exercise_rscs/ie_models_eng.zip –! *-*3-$4+*/$%/%$)%"*%0*#.+,+*7+,,-4+,*&6,(',,*#.+*7-#(.*%0* #.+*&-Gb* •!*]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*H]9ACK* •!*2+*-$+*6"#+$+,#+&*6"*-"-3G,6"4*#.+,+*7+,,-4+,** http://www.dcs.shef.ac.uk/~ziqizhang/resources/tools/ jatr_v1.0.zip –! #%*'"&+$,#-"&*2.-#*-$+*#.+*7%,#*/%/'3-$*#%/6(,*#.-#* 6"#+$+,#*/+%/3+b*
  • 4. F(+"-$6%*(%"#;* F(+"-$6%*9"-3G,6,* NO>O%C.2-"%ID(#)9%P.(/*%Q20%5%M9-)"%$2MM9(#$9:.,% 8&7)90%D(.M%-"&%M.(,#,+%$&$$#.,% •!*A%*&%*,%*2+*@'63#*-*c7-#(.*,'77-$6,-)%"d*-//36(-)%"* •!*.%2*#%*6&+")0G*,/+(6D(*(%"#+"#*%0*6"#+$+,#* –!%#,02-*<*).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.* –!*(%"#+"#*$+#$6+8-3*-"&*D3#+$6"4* –!%.2-02-*<*$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%#.-#*(-"*@+* •!*.%2*#%*/$%(+,,*#.+*(%"#+"#*-"&*7-1+*,+",+*%0*6#* ',+&*#%*,'77-$6,+*(%$/',*(%"#+"#* –!*6"0%$7-)%"*+>#$-()%"** –!*"-#'$-3*3-"4'-4+*/$%(+,,6"4** •!*e,6"4*#.+*+>#$-(#+&*#+$7,*2+*(-"*-"-3G,+*2.-#*.-,*@++"* #.+*0%(',*%0*&6,(',,6%"*%0*#.+*7-#(.*%0*#.+*&-G* •!*W%$*#.6,*8+$G*+>+$(6,+O*2+*,#'&G*#.+*7-#(.*@+#2++"* 9*f"%23+&4+* A,+/9,*%9,*%R&(M9,1%.,%-"&%NS-"%.D%;2,&%NO>O;* 9(5'6,6)%"*/$%(+,,* F(+"-$6%*9"-3G,6,* a%$/',*T+"+$-)%"* •!%@,02-T%).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.* •! R.9/S*($+-#+*-*(%$/',*%0*7+,,-4+,** –! *2+*"++&*#%*/6"</%6"#*$+3+8-"#*7+,,-4+,*%"*#26:+$*-"&* –! #.-#* &6,(',,* #.+* 7-#(.* @+#2++"* R"43-"&* -"&* T+$7-"G* 0-(+@%%1* %"*^I#.*]'"+*^_[_* –! %',6"4%-G#H&(%-"&%D9)&E..F%9U!,U%2+*-//3G*).,-&,-% •! @,02-S* (&-(#&69/%9,*%V/-&(#,+*#%*@'63&*#.6,*(%$/',* a%$/',* –! #26:+$*9U!*/$%86&6"4*-((+,,*#%*#26:+$*&-#-* 4+"+$-)%"* –! 0-(+@%%1*9U!*/$%86&6"4*-((+,,*#%*0-(+@%%1*&-#-* –! (%"#+"#* D3#+$6"4* /-$-7+#+$,* H#.+* R"43-"&<T+$7-"G* •!%W2-02-T%$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%* 7-#(.*%"*^I#.*]'"+*^_[_K* –! 2+*-//3G*@A%9,*%KLJ%%"*#.+*(%$/',*#%*-(.6+8+*#.6,*4%-3% •! W2-02-%% a%"#+"#* –! (%$/',*%0*7+,,-4+,*$+3-#+&*#%*%"3G*#.+*7-#(.*%0*6"#+$+,#* -"-3G,6,*@G*!R*
  • 5. a%$/',*T+"+$-)%"*',6"4*#26:+$* -G#H&(% a%&+*6"S*ekaw-kasna_exercises/twitter R>#+$"-3*36@,S*lib/twitter4j-core-2.1.6-SNAPSHOT.jar | log4j-1.2.15.jar a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3>%8AC!%IJ@%XI,9/1$#,+%-"&%02E/#)%:M&/#,&%$-9-2$Y% A'3>%8AC!%IJ@% •! U$%86&+,*7+#.%&,*0%$*0+#(.6"4*&-#-*$+3-#+&*#%S** •! 9"-3Gj+*#.+*,#$'(#'$+*-"&*(%"#+"#*%0*/'@36(* •! *A67+36"+,O*F#-#',O*e,+$,O*g+7@+$,O*,'@,($6@+$,O*0%33%2+$,O* )7+36"+*,#-#',+,* ,%(6-3*4$-/.,*+#(;* –! `.+$+*2-,*#.+*,#-#',*#2++#+&*0$%7k* –! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%* –! `-,*6#*-*$+#2++#k -(#'-33G*&%*,%7+#.6"4h*<*R&6#*#.+*(3-,,S* R>+$(6,+* ekaw.kasna.twitter.StatusTest •! C+0+$*#%*#.+*A26:+$Y]*N-8-&%(*#%*(%7/3+#+*#.+*+>+$(6,+,S* *http://twitter4j.org/en/javadoc/index.html !(1%#-%1.2($&/DS*$'"*C-9-2$!&$-3Z969%
  • 6. a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3>%8AC!%IJ@% A'3>%8AC!%IJ@% •! 9",2+$ Twitter twitter = new try{ TwitterFactory().getInstance(); ResponseList<Status>publicTimeline = twitter.getPublicTimeline(); //*TODO Complete exercise and analyse structure and content of each status try{ GeoLocation geoLocation; //We request the public timeline, which returns a list of Status Place place; ResponseList<Status> publicTimeline = twitter.getPublicTimeline(); while (it.hasNext()){ /** Status st = it.next(); * Complete this exercise and analyse the structure and content log.info(st.getText()); of each of the Status. log.info(st.getSource()); * Have a look at the java doc of the Status Class, or just if ((geoLocation = st.getGeoLocation()) != null) check the available methods in your IDE log.info(geoLocation.toString()); */ if ((place = st.getPlace()) != null) { Iterator<Status> it = publicTimeline.iterator(); log.info(place.getFullName()); log.info(place.getBoundingBoxCoordinates().toString()); while (it.hasNext()){ } //TODO check what are the info you can get from a Status. } } } catch (TwitterException e){ •! !(1%#-%1.2($&/DS*+&6#*-"&*$'"*C-9-2$!&$-3Z969% } e.printStackTrace(); a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3>%8AC!%IJ@% A'3N%C&9()"%IJ@% •! E'#/'#**)7+36"+*,#-#', ??????????!!??888888888 RT @nico_news: ???????????????????????????????????????? http://bit.ly/aZcvfl <a href="http://twipple.jp/" rel="nofollow">?????/twipple</a> •! 933%2,*6"#+$-()%"*26#.*#26:+$* Southampton v Tranmere: Preview followed by live coverage of Saturday's game between Southampton and Tranmere in L... http://bit.ly/9N802N $&9()"*-"&*-(&,*$*&-#-* <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a> Laper gueeee –! #%/*#%/6(,*#.-#*-$+*('$$+"#3G*#$+"&6"4*%"* <a href="http://www.snaptu.com" rel="nofollow">Snaptu.com</a> ?????????????????????????? / ?????????????????????????? A26:+$* •! !#*+>/%,+,*#.+*0%33%26"4*7+#.%&,S** <a href="http://www.echofon.com/" rel="nofollow">Echofon</a> Changing the Language of Oppression http://bit.ly/aXA4w3 #specialneeds <a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a> Are you attending the SuperSwarm at Jewel, Piccadilly tonight? Let's get an idea of numbers via my poll @ www.theprgeek.co.uk –! ,+-$(.O** #superswarmLDN web –! #$+"&,O** Simon Cowell To Receive Special Emmy Award: October 7, 2010: Music mogul and former American Idol judge Simo... http:// tinyurl.com/299o5gg –! #$+"&,?('$$+"#O*#$+"&,?&-63GO*#$+"&,? <a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a> "Wajahmu seperti bulan" --» ini artinya ngatain kan yah? Org bulan bolong2 2++13G* <a href="http://blackberry.com/twitter" rel="nofollow">Twitter for BlackBerry®</a> FM???????????? <a href="http://stone.com/Twittelator" rel="nofollow">Twittelator</a> •! A.+*F+-$(.*9U!*,'//%$#,*-7%"4* ???? [????:?????/????????????????????????]559 #colopl_msg <a href="http://t.colopl.jp/t/" rel="nofollow">Colotwi</a> %#.+$,O*#.+*0%33%26"4*%/+$-#%$,*0%$* pikiran saya cabangnya banyak, jd pusing sendiri..penuh rasanya ni kepala (%",#$'()"4*-*5'+$G*,#$6"4* <a href="http://m.tweete.net" rel="nofollow">m.tweete.net</a>...
  • 7. a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3[%C&9()"%IJ@% A'3N%C&9()"%IJ@% –! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*-(#'-33G*&%* $#,)&#*T% F/+(6D+,*#.+*6&*%0*#.+*,#-#',*0$%7*2.6(.*#%*,#-$#*#.+*,+-$(.* ,%7+#.6"4h*<*R&6#*#.+*(3-,,S* 2,:/#*T% F/+(6D+,*#.+*6&*%0*#.+*,#-#',*0$%7*2.6(.*#%*+"&*#.+*,+-$(.* ekaw.kasna.twitter.QueryTest R>+$(6,+* C#,)&T% F#-#',+,*/$%&'(+&*,6"(+*-*,/+(6D+&*&-#+*H+;4;*^_[_<_l<[_K* B,:/T% Query query = new Query(); V/-&(T/#,F$% C+#$6+8+,*#2++#,*26#.%'#*36"1,* query.query("football"); D(.MT% C+#$6+8+,*,#-#',+,*0$%7*-*468+"*',+$;*H+;4;*0$%7S*D0-K* //*TODO Modify the query object, and search for /9,+T% C+#$6+8+,*,#-#',+,*6"*-*468+"*3-"4'-4+* today's tweets (in english) related to football W8% +;4;O*7+")%"6"4*g+>6(%*EC*W$-"(+* //*TODO Restrict your results to tweets generated within 300 kilometers of Johannesburg, South Africa T%Y% +;4;O*(%"#-6"6"4*0%%#@-33*26#.*-*/%,6)8+*-m#'&+*H+;4;*0%%#@-33*SK*K* // hint: use Query's geoCode method, the K&+9:.,% +;4;O*7+")%"6"4*@++$*@'#*"%#*$%%#* Kilometers unit is given as Query.KILOMETERS // hint: South Africa's lat: 26.12, long: 28.2 C.2()&T% +;4;O*a%"#-6"6"4*0%%#@-33*+"#+$+&*86-*A26:+$W++&*H+;4;*"+2,* ,%'$(+SA26:+$W++&K* •! !(1%#-%1.2($&/DS*$'"*]2&(1!&$-3Z969% a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3>%8AC!%IJ@% A'3N%C&9()"%IJ@% •! E'#/'#**5'+$G*$+5'+,#*0%$*L0%%#@-33M*"+-$*L]%.-""+,@'$4M –! I,$G&(% hits:15 Query query = new Query(); MQMhlanzi:Total Football 360: Bafana Eager to Keep the Momentum of Winning! http://t.co/xOPTaY9 Benleeds:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or query.query("football"); Morecambe or Dagenham and Redbridge? Tumelo13:Gota admit I miss my NONstop #football convo's wit @Denisao_4 and @GordonTyler8! Haha talk bout nothing but the #beautifulgame //*TODO Modify the query object, and search for Tumelo13:RT @Denisao_4: Ey bra @Tumelo13 that's not a sin! That's for the love of football! I approve wow! Let's hope it works :)?? today's tweets related to football Amen Edwardo84:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer jonerz97:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe //*TODO Restrict your results to tweets generated or Dagenham and Redbridge? within 300 kilometers of Johannesburg, South Africa dcocker11:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer AntimoOsato91:@siasduplessis Oros and The Dutch National Football Team could be good sponsors too! Haha :) IsaacTeka:#football - EURO 2012 qualifier between Germany and Turkey is gonna be a fierce encounter. #Ozil and #Khedira // hint: use Query's geoCode method, the applenessuk:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer johnyrotten:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Kilometers unit is given as Query.KILOMETERS Morecambe or Dagenham and Redbridge? // hint: Johannesburg’s lat: 26.12, long: 28.2 kartikverma:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer query.geoCode(new GeoLocation(26.12,28.2), RawRemedy:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge? 30,Query.KILOMETERS); TLW1Dan:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer jopayne:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge?
  • 8. a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3[%C-(&9M%IJ@% A'3[%C-(&9M%IJ@% Twitter 4j allows you to retrieve streaming samples using the class RestAPI and SearchAPI only present a limited snapshot of TwitterStream. For the public timeline you just need basic a timeline. During the finals of the 2010 World Cup authentication. the rate of tweets containing the tags #Spain, #Netherlands, #Germany, [*** Create a TwitterStream instance #Uruguay, was quite high. twitterStream = new TwitterStreamFactory(this).getInstance("yourAcc","yourPass"); Two options: Set a Listener for receiving the event of a status. Your listener should ^* •! make requests, say, every 2sec implement the method public void onStatus(Status status) through the RestAPI or the Search API, •! BETTER: twitterStream.setStatusListener(this); •! start listening to a stream of public l*** Start Sampling tweets & twitterStream.sample(); •! filter according to the tag patterns Y* Do something with the tweet in your onStatus method a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* A'3[%C-(&9M%IJ@% A'3[%C-(&9M%IJ@% –! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*-(#'-33G*&%* –! I,$G&( ,%7+#.6"4h*<*R&6#*#.+*(3-,,S* ekaw.kasna.twitter.StreamTest private void startConsuming() throws TwitterException { twitterStream.setStatusListener(this); private void startConsuming() throws TwitterException { //*TODO Using TwitterStream’s filter method, twitterStream.setStatusListener(this); restrict your sampling to collect tweets that include the words: football, worldcup, final //*TODO Using TwitterStream’s filter method, restrict your sampling to collect tweets that include String[] filterWords = {"#worldcup", "#WorldCup", the words: football, worldcup, final "#Worldcup", "#WORLDCUP"}; twitterStream.setStatusListener(this); twitterStream.sample(); twitterStream.filter(0,null,filterWords); } twitterStream.sample(); } •! !(1%#-%1.2($&/DS*$'"*C-(&9M!&$-3Z969%
  • 9. a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* I**#:.,9/%A'&()#$&T%I2-"&,:)9:.,% •! Try it yourself! •! Authenticating using Oauth •!*$+,#$6()%",*#%*-((+,,6"4*/$68-#+*&-#-hhh* •! OAuthTest.java •! Using the application “Ekaw-Kasna” •!%Q^IKRAC%CAJ%NO>O** •! Login with your twitter account and go to: •!*(.-"4+*#%*-'#.+")(-)%"*7%&+*0%$*$+#$6+86"4*6"&686&'-3,M* http://twitter.com/apps/new ,#-#',*6"0%$7-)%"* •!0$%7*-*,67/3+*',+$"-7+</-,,2%$&*#%S* •! W92-"7E9$&*%92-"&,:)9:.,*%0*$+46,#+$+&*c-//36(-)%",d* a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* •! I2-"&,:)9:,+%2$#,+%W92-"% –! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z* •! +"#+$*#.+*eCV*-#*#.+*(%",%3+*6"*-*2+@*@$%2,+$* •! #%*%@#-6"*-"*%-'#.=#%1+"* i%'*2633*"++&*#.+,+*#2%* ,#$6"4,*0%$*-'#.+")(-)"4** i%'*2633*@+*4686"4* -'#.%$6j-)%"*#%*#.6,* -//36(-)%"*#%*-((+,,* G%'$*6"0%$7-)%"*
  • 10. a%$/',*T+"+$-)%"*',6"4*#26:+$* a%$/',*T+"+$-)%"*',6"4*#26:+$* •! I2-"&,:)9:,+%2$#,+%W92-"% •! I2-"&,:)9:,+%2$#,+%W92-"% –! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z* –! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z* •! +"#+$*#.+*eCV*#%*%@#-6"*-"*%-'#.=#%1+"** •! +"#+$*#.+*eCV*#%*%@#-6"*-"*%-'#.=#%1+"** –! E"(+*G%'*c933%2d*-'#.%$6j-)%"*G%'*2633*@+*/$%86&+&* –! E"(+*G%'*c933%2d*-'#.%$6j-)%"*G%'*2633*@+*/$%86&+&* 26#.*#.+*U!ZS* 26#.*-*U!ZS* –! R"#+$*#.+*U!Z*#%*(%7/3+#+*-'#.+")(-)%"* A.6,*6,*#.+*U!Z* ciEe*9CR*9eAnRZA!a9ARQhhd* "++&+&*#%* (%7/3+#+*#.+* -'#.+")(-)%"* a%$/',*T+"+$-)%"*',6"4*0-(+@%%1* D9)&E..F% a%&+*6"S*ekaw-kasna_exercises/facebook R>#+$"-3*36@,S*lib/restfb-1.5.3.jar | log4j-1.2.15.jar
  • 11. 0-(+@%%1*9U!**W+#(.6"4*E@N+(#,* 0-(+@%%1*9U!**W+#(.6"4*e,+$*&-#-* •! The Graph API https://graph.facebook.com/facebook •! provides facilities for reading and writing data to facebook •! Each API request starts with the URL: https://graph.facebook.com •! e.g., data about any object can be found by fetching https://graph.facebook.com/objectID - objectID is the unique id of this object in the social graph - e.g., the unique id for a page is its name: https://graph.facebook.com/facebook 0-(+@%%1*9U!**a%""+()%",* 0-(+@%%1*9U!**a%""+()%",* •! All objects in the facebook social graph are connected via relationships (connections) •! Fetch connections https://graph.facebook.com/objectID/connection_type •! e.g., the page’s own posts https://graph.facebook.com/facebook/posts
  • 12. 0-(+@%%1*9U!**U-4+*a%""+()%",* 0-(+@%%1*9U!**W63#+$6"4*Q-#-* D&&*% A.+*/-4+M,*2-33* •! Data can be filtered using parameters 0#)-2(&% A.+*/-4+M,*/$%D3+*/6(#'$+* •! e.g., -9++&*% A.+*/.%#%,O*86&+%,O*-"&*/%,#,*6"*2.6(.*#.6,*/-4+*.-,*@++"*#-44+&* -! since, until ---> specify date ranges /#,F$% A.+*/-4+o,*/%,#+&*36"1,* -! limit ---> specify amount of returned data 0".-.$% A.+*/.%#%,*#.6,*/-4+*.-,*'/3%-&+&* +(.20$% A.+*4$%'/,*#.6,*/-4+*6,*-*7+7@+$*%0* 9/E2M$_6#*&.$% A.+*/.%#%*-3@'7,?86&+%,**#.6,*/-4+*.-,*($+-#+&* •! e.g., fetching the feed $-9-2$&$% A.+*/-4+o,*,#-#',*'/&-#+,* -! within specified dates and ,.-&$% A.+*/-4+o,*"%#+,* -! with a limit of 50 0.$-$% A.+*/-4+o,*%2"*/%,#,* https://graph.facebook.com/worldcup/feed? since=2010-07-17&until=2010-07-20&limit=50 M&ME&($% A.+*/-4+o,*7+7@+$,;*i%'*(-"*%"3G*5'+$G*'/*#%*J__*7+7@+$,;*!#*6,*"%#* /%,,6@3+*#%*6#+$-#+*#.$%'4.*#.+*36,#;*R>-7/3+S*.:/,S??4$-/.;0-(+@%%1;(%7? pU9TR=!Qq?7+7@+$,k3676#rJ__* &6&,-$% A.+*+8+"#,*#.6,*/-4+*6,*-:+"&6"4* )"&)F#,$% a.+(16",*7-&+*@G*0$6+"&,*%0*#.+*('$$+"#*,+,,6%"*',+$* 0-(+@%%1*9U!**W63#+$6"4*Q-#-* 0-(+@%%1*9U!**W6"&6"4*E@N+(#,** •! Search for objects https://graph.facebook.com/search? q=query&type=objectType c($+-#+&=)7+d*6,*26#.6"* - query ---> what you want to find #.+*,/+(6D+&*&-#+*$-"4+,* - objectType ---> type of the object (e.g. facebook post, user) •! e.g., search all public posts for “2010 world cup” https://graph.facebook.com/search?q=2010%20world %20cup&type=post
  • 13. 0-(+@%%1*9U!**W6"&6"4*E@N+(#,** 0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+* Try it yourself! •! Fetch the data about the page worldcup •! Get the feed of this page (hint: connection is feed) •! this is the wall for the page worldcup •! Return only the first 5 messages of this feed U%,#,*(%"#-6"6"4*#.+*#+$7,** c^_[_d*B*c2%$3&d*B*c('/d* •! Search for all pages containing worldcup in the page name 0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+* 0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+* •! ANSWERS •! ANSWERS •! page worldcup: •! Get the feed (wall) of the page worldcup: https://graph.facebook.com/worldcup/feed •! fetch https://graph.facebook.com/worldcup
  • 14. 0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+* 0-(+@%%1*9U!**T$-/.*9U!*R>+$(6,+* •! ANSWERS •! ANSWERS •! Return only the first 5 messages of the feed: •! Search for all pages containing worldcupin the https://graph.facebook.com/worldcup/feed&limit=5 page name https://graph.facebook.com/search?q=worldcup&type=page a36+"#*V6@$-$6+,* C+,#WX*9U!**`%$3&*a'/*F(+"-$6%** •! Multiple client libraries for facebook API •! Exercise: http://developers.facebook.com/search? get the messages sent on the day of the q=User:Client_Libraries England-Germany match - 27th of June 2010 •! RestFB client library was the first java library to support [*** Search for all pages containing “worldcup” the GraphAPI •! Other Java libraries now supporting GraphAPI ^* For every page: - BatchFB •! Get the messages posted on that day - TinyFBGraphClient •! Store the messages to generate your corpus - facebook Java Webapp •!We use the RestFB client library in this tutorial
  • 15. C+,#WX*9U!**Q+0-'3#0-(+@%%1a36+"#** C+,#WX*9U!**F+-$(.6"4* •! DefaultfacebookClient •! Step 1: •! provides methods for reading and writing data Connection<T> to facebook graph fetchConnection(String connection, Class<T> connectionType, FacebookClient facebookClient Parameter... parameters) = new DefaultfacebookClient(); 9((+,,*/'@36(*&-#-* facebookClient facebookClient = new DefaultfacebookClient(); facebookClient = new Connection<Page> pageSearch = DefaultfacebookClient(ACCESS_TOKEN); facebookClient.fetchConnection("search",Page.class, Parameter.with("q", "world cup"), Parameter.with("type", "page"), Parameter.with("limit", "10")); C+5'6$+&*#%*-((+,,*/$68-#+* &-#-*%$*+&6#?/'@36,.*&-#-* .:/,S??4$-/.;0-(+@%%1;(%7?,+-$(.k5r2%$3& B('/P#G/+r/-4+P3676#r[_* C+,#WX*9U!**F+-$(.6"4* C+,#WX*9U!**$+#'$"*0$%7*$+5'+,#*<*/-4+,* •! $+#'$",*-*36,#*%0*#.+*D$,#*[_*/-4+,*-@%'#*c2%$3&('/d* •! World Cup Pages •! W%$*+-(.*/-4+O*/$%/+$)+,*$+#'$"+&*6"(3'&+S* K9M&% Q9-&+.(1% @<% –! 6&O*"-7+O*(-#+4%$GO*0++&O*/6(#'$+,*b `%$3&*a'/* U%36)(6-",* J_tY[_YulvI* `%$3&*a'/* U$%&'(#,=%#.+$* [lJJJvYvuItt^lu* 4+#Q-#-*<<s*$+#'$",*-*36,#*%0*%@N+(#,*H&+/+"&6"4*%"*#.+* 2%$3&*('/* F/%$#,=-#.3+)(,* [lY[Ivl_l^vv_vl* (%""+()%"*$+5'+,#+&K* `%$3&*a'/*^_[_* U$%&'(#,=%#.+$* ^JIvvtYItvvv* C'4@G*`%$3&*a'/* F/%$#,=-#.3+)(,* [[v^Iv^l^[Il* for (Page page : pageSearch.getData()) { ^_[_*`%$3&*a'/* e"1"%2"* [^J_YtltY^_^^tJ* System.out.print("Name: " + page.getName()); w`ECVQ*aeUd* a3'@,* [^lvttYI^[Iv* System.out.print("Category: " + page.getCategory()); `%$3&*a'/*%"*RFUZ* F/%$#,=-#.3+)(,* [v[Jl[lt^_Y_* System.out.println("ID: " + page.getId()); `ECVQ*aeU* F/%$#,=#+-7,* [^_l_IlvYvv[_Jv* }* ^_[_*`%$3&*a'/* V%(-3=@',6"+,,* lvI[[uIIlt[v*
  • 16. C+,#WX*9U!**R>+$(6,+* C+,#WX*9U!**R>+$(6,+* Try it yourself! ANSWERS Connection<Group> groupSearch = •! Edit the class SearchTest.java facebookClient.fetchConnection( "search", Group.class, Parameter.with("q", "2010 world cup"), •! Search for all groups talking about a topic of Parameter.with("type", "group"), Parameter.with("limit", "15")); interest to you •! Get the first 15 groups for (Group group : groupSearch.getData()) { System.out.println("Name: " + group.getName()); •! For every group: System.out.println("ID: " + group.getId()); } - print name and ID C+,#WX*9U!**$+#'$"*0$%7*$+5'+,#*<*4$%'/,* C+,#WX*9U!**T+m"4*#.+*0++&* ‘2010 world cup’ groups K9M&% @<% •! Step 2: kkkkkkk**x-7-3+1*Ey(6-3*T$%'/* ^^JJ^[YItu[J* ^_[_*W!W9*`ECVQ*aeU* [^Y[Iulu_uJ[YJv* Connection<T> fetchConnection(String connection, ^_[_*W!W9*`%$3&*a'/* ^^_YtlvIYJ* Class<T> connectionType, ^_[_*W!W9*`ECVQ*aeU*FEeAn*9WC!a9* ^I_Ilt[tYJI* Parameter... parameters) ^_[_*W60-*`%$3&*a'/*F%'#.*90$61-* [^_uIl^[[^II[Ju* ^_[_*W!W9*`%$3&*a'/*F%'#.*90$6(-* [[[I_tJvJJ[YYlv* ^_[_*W60-*`%$3&*a'/*Q$6"16"4*T-7+* ^lv[^t[ut_^u* Connection<Post> myFeed = facebookClient.fetchConnection( ^_[_*W!W9*`ECVQ*aeU*FEeAn*9WC!a9* [_tJ^t^u^J[Jlt_* "worldcup/feed", Post.class, Parameter.with("since", g'"&6-3*^_[_*F'&-0$6(-*^_[_*`%$3&*('/* [uuv^tvtIlvl* "2010-06-27T11:00:00"), Parameter.with("until", "2010-06-28T17:00:00"), Parameter.with("limit", "10")); !#-36-*<*^_[_*W!W9*`%$3&*a'/* [tJYlYIlt^^* ^_[_<W!W9<`%$3&<a'/* [^vlIIll_I[^uIl* ^_[_*`%$3&*a'/** [[^_uJ^JttlJYYu* ^_[_*`%$3&*a'/* [ulll^l[vlIl* .:/,S??4$-/.;0-(+@%%1;(%7?2%$3&('/?0++&k ^_[_*W!W9*`%$3&*a'/* [l_YvttuvuvJYII* ,6"(+r^_[_<_v<^IP'")3r^_[_<_v<^tP3676#r^_* ^_[_*W!W9*`%$3&*a'/* [vl[Y_tt[uIt*
  • 17. CRFA*9U!**T+m"4*#.+*0++&* CRFA*9U!**$+#'$"*0$%7*$+5'+,#*<*0++&* Try it yourself! - ConnectionsTest.java •! 0++&*$+#'$",*-33*/%,#,*2$6:+"*%"*#.+*,/+(6D+&*&-#+* •! Message: the english were hoping to play penalties what a waste of their •! W%$*+-(.*/%,#*-:$6@'#+,*$+#'$"+&*6"(3'&+S* training time –! ($+-)%"*)7+O*/%,#*"-7+O*&+,($6/)%"b;* Creation Time: Sun Jun 27 17:45:13 BST 2010 •! Message: Deutschland, Deutschland über alles, über alles in der Welt Creation Time: Sun Jun 27 17:29:25 BST 2010 •! Message: world cup?? this wasn't a 'football games' but 'fakeball' games!! for (Post post : myFeed.getData()) { Lampard was scored but the referee was blind....4-1?? congrats to the referees coz they have a massive party tonite to celebrate!! $$$$$$$$$$$$$ System.out.println("Message: " + post.getMessage()); $$$ wow.... even can makes people blind!!! world cup??? **** off!!! System.out.println("tCreation Time" + Creation Time: Sun Jun 27 17:25:32 BST 2010 post.getCreatedTime()); •! Message: how are we suppose to be patriotic with a team that plays like }* that, none of them deserve the money they get, waste of time.............. Creation Time: Sun Jun 27 16:48:06 BST 2010 •! Message: john terry on england should get worst defender for the year...he's no good Creation Time: Sun Jun 27 16:42:39 BST 2010 CRFA*9U!**U%,#*U$%/+$)+,O*a%""+()%",* a%$/',*T+"+$-)%"*',6"4*0-(+@%%1* Properties I**#:.,9/%A'&()#$&T%I2-"&,:)9:.,% #*% A.+*/%,#*!Q* •!*$+,#$6()%",*#%*-((+,,6"4*/$68-#+*&-#-hhh* D(.M% 9"*%@N+(#*(%"#-6"6"4*#.+*!Q*-"&*"-7+*%0*#.+*',+$*2.%*/%,#+&*#.+*7+,,-4+* •!*9((+,,*A%1+"*$+5'6$+&*0%$*,%7+*7+#.%&,* -.% 9*36,#*%0*#.+*/$%D3+,*7+")%"+&*%$*#-$4+#+&*6"*#.6,*/%,#* M&$$9+&% A.+*7+,,-4+* •!#%*/$+8+"#*-((+,,*H$+-&*%$*2$6#+K*#%*/$68-#+*&-#-* 0#)-2(&% !0*-8-63-@3+O*-*36"1*#%*#.+*/6(#'$+*6"(3'&+&*26#.*#.6,*/%,#* •!+;4;O*/'@36,.6"4*#%*#.+*0-(+@%%1*,%(6-3*4$-/.* /#,F% A.+*36"1*-:-(.+&*#%*#.6,*/%,#* •!*X6&&6"4#%"*/$%86&+,*-*4%%&*+>/3-"-)%"*0%$*4+m"4*-((+,,*#%1+",*-#S* ,9M&% A.+*"-7+*%0*#.+*36"1* http://benbiddington.wordpress.com/2010/04/23/facebook-graph- )90:.,_*&$)(#0:.,% A.+*(-/)%"?&+,($6/)%"**%0*#.+*36"1*H-//+-$,*@+"+-#.*#.+*36"1*"-7+K* api-getting-access-tokens $.2()&% !0*-8-63-@3+O*#.+*,%'$(+*36"1*-:-(.+&*#%*#.6,*/%,#*H0%$*+;4;O*-*z-,.*%$*86&+%*D3+K* #).,% 9*36"1*#%*-"*6(%"*$+/$+,+")"4*#.+*#G/+*%0*#.6,*/%,#* 9H(#E2:.,% 9*,#$6"4*6"&6(-)"4*2.6(.*-//36(-)%"*2-,*',+&*#%*($+-#+*#.6,*/%,#* •!*+;4;O*0+#(.*#.+*0$6+"&,*%0*',+$*L1.-&6N-;+3@+&2+6.GM* 9):.,$% 9*36,#*%0*-8-63-@3+*-()%"*"-7+,*-"&*36"1,*H6"(3'&6"4*(%77+")"4O*3616"4*-"&*-"* •!*#.6,*$+5'6$+,*-'#.+")(-)%"**#%1+"*L>>`a`bO``O;;;M* %/)%"-3*-//<,/+(6D+&*-()%"K* https://graph.facebook.com/khadija.elbedweihy/ /#F&$% A.+*"'7@+$*%0*361+,*%"*#.6,*/%,#* friends&access_token=11585905509... )(&9-&*:M&% A.+*)7+*#.+*/%,#*2-,*6"6)-33G*/'@36,.+&* 20*9-&*:M&% A.+*)7+*%0*#.+*3-,#*(%77+"#*%"*#.6,*/%,#* 933*/$%/+$)+,*P* (%""+()%",*%0*-* •!%!(1%#-%1.2($&/D3;;;* Connections cU%,#d* ).MM&,-$% 933*%0*#.+*(%77+"#,*%"*#.6,*/%,#*
  • 18. 0-(+@%%1*9U!**W+#(.6"4*e,+$*&-#-* 0-(+@%%1*9U!**W+#(.6"4*e,+$*&-#-* https://graph.facebook.com/khadija.elbedweihy •! fetch specific fields https://graph.facebook.com/khadija.elbedweihy? fields=id,name,picture U'@36(*Q-#-*%"3G* V6"1*#%*#.+* /6(#'$+* U6(#'$+*-#*#.+* 468+"*36"1* 0-(+@%%1*9U!**9'#.%$6j-)%"*R>-7/3+* 0-(+@%%1*9U!**9'#.%$6j-)%"*R>-7/3+* 9((+,,*#%1+"*2%$1,* 0%$*#.+*-'#.%$6j+&* ',+$*%"3G** F-7+*-((+,,*#%1+"*0%$*-* &6{+$+"#*',+$*c*.&$%,.-% G.(Fd*
  • 19. 0-(+@%%1*9U!**e,+$*W6+3&,* 0-(+@%%1*9U!**e,+$*a%""+()%",* #*T% A.+*',+$M,*!Q* ".M&T% A.+*',+$M,*Z+2,*W++&;*C+5'6$+,*#.+*read_stream*/+$76,,6%"* V($-,9M&T% A.+*',+$M,*D$,#*"-7+* D&&*T% A.+*',+$M,*2-33;*C+5'6$+,*#.+*read_stream /+$76,,6%"*#%*,++* /9$-,9M&T% A.+*',+$M,*3-,#*"-7+* "%"</'@36(*/%,#,;* ,9M&T% A.+*',+$M,*0'33*"-7+* -9++&*T% A.+*/.%#%,O*86&+%,O*-"&*/%,#,*6"*2.6(.*#.6,*',+$*.-,*@++"* 9E.2-% A.+*',+$M,*@3'$@*#.-#*-//+-$,*'"&+$*#.+6$*/$%D3+*/6(#'$+* #-44+&;*C+5'6$+,*#.+*read_stream /+$76,,6%";* E#(-"*91% A.+*',+$M,*@6$#.&-G* 0.$-$T% A.+*',+$M,*%2"*/%,#,;*C+5'6$+,*#.+*read_streamuser_likes %$* #,-&(&$-&*#,% T+"&+$,*#.+*',+$*6,*6"#+$+,#+&*6"* 0riend_likes*/+$76,,6%";* M&&:,+D.(% AG/+,*%0*$+3-)%",.6/,*#.+*',+$*6,*,++16"4* 0".-.$T% A.+*/.%#%,*#.6,*',+$*6,*#-44+&*6";*C+5'6$+,*#.+* user_photo_video_tagsO*friend_photo_video_tag,*-"&* (&/9:.,$"#0$-9-2$% A.+*',+$M,*$+3-)%",.6/*,#-#',* user_photos*%$ friend_photos*/+$76,,6%",;* (&/#+#.,% A.+*',+$M,*$+3646%"* F(+"-$6%*9"-3G,6,* @,02-T%-*).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.* –!*2+*"++&*#%*/6"</%6"#*$+3+8-"#*7+,,-4+,*%"*#26:+$*-"&* 0-(+@%%1* –!%2$#,+%-G#H&(%9,*%D9)&E..F%IJ@U%2+*-//3G*).,-&,-% (&-(#&69/%9,*%V/-&(#,+*#%*@'63&*#.6,*(%$/',* #,D.(M9:.,%&'-(9):.,% a%$/',* 4+"+$-)%"* W2-02-T%-*$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%* –!*2+*-//3G*@A%9,*%KLJ%%"*#.+*(%$/',*#%*-(.6+8+*#.6,*4%-3% a%"#+"#* -"-3G,6,*@G*!R*
  • 20. a%"#+"#*9"-3G,6,*86-*!R* a%"#+"#*9"-3G,6,*86-*!R* •! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U% •! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U% G&%D.//.G%-"&$&%$-&0$% G&%D.//.G%-"&$&%$-&0$T* –! Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+* –! Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+* •! A%1+"6,-)%"* H#%1+"6,-)%"O*UEF*#-446"4K* E/+"ZVU* •! UEF*#-446"4* –! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#* –! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#** H/.$-,+*(.'"16"4O*+")#G*$+(%4"6)%"K* •! /.$-,+*(.'"16"4* –! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"** –! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"*H#+$7* •! #+$7*$+(%4"6)%"* $+(%4"6)%"K* ]9AC* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! R.9/S*/$%(+,,*"-#'$-3*3-"4'-4+*#+>#*,'(.*#.-#*,/+(6D(* •! I,%&'9M0/&S*H3%(-#+&*6"*c&-#-?+>-7/3+,? 6"0%$7-)%"*(-"*@+*6&+")D+&* +>-7/3+[;#>#dK* –! A.+,+*/$%(+,,+,*6"(3'&+* •! F+"#+"(+*,+47+"#-)%"* c8..,&1%D9#/$%-.%&,*%+.9/%*(.2+"-3%P91,&%8..,&1e$% •! A%1+"6,-)%"* -(#0%-.%C.2-"%ID(#)9%NO>O%E&+9,%G#-"%"#+"% •! U-$#*%0*F/++(.*#-446"4* &'0&)-9:.,$%E2-%"&%/&96&$%G#-".2-%9%$#,+/&%+.9/% •! @,02-* $).(&*%9f&(%-"(&&%+(.20%M9-)"&$%9,*%9%>7g%*&D&9-% –! -*,6"43+*7+,,-4+* -.%R&(M9,13d* •! W2-02-* –! -*,+5'+"(+*%0*UEF*#-44+&*#%1+",*
  • 21. a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! C&,-&,)&%$&+M&,-9:.,% •! C&,-&,)&%$&+M&,-9:.,%2$#,+%W0&,KLJ* –! @,02-S*-*,6"43+*7+,,-4+* /* Input */ (LINE 17) –! W2-02-S*-*36,#*%0*,+"#+"(+,* String pathToInput = "../../data/examples/example1.txt"; String content = "…"; Rooney fails to end goal drought. | Wayne Rooney's trip to /* Creates an object of OpenNLP sentence segmentation detector */ South Africa 2010 began with high expectations but he SentenceDetector detector = new SentenceDetector("lib/opennlp/models/ EnglishSD.bin.gz"); leaves without a single goal scored after three group matches and a 1-4 defeat to Germany. /* Call the actual method to identify the end offsets of sentences. */ int[] result = detector.sentPosDetect(content); /* Print out the sentences */ Rooney fails to end goal drought. Wayne Rooney's Try it yourself! <*F+"#+"(+F+47+"#-)%";N-8-** int start=0, i=0; trip to South Africa 2010 began with high expectations but he leaves without a single goal do { scored after three group matches and a 1-4 defeat …… to Germany. } while(start<result[result.length-1]); a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! !.F&,#$9:.,% •! !.F&,#$9:.,%2$#,+%W0&,KLJ –! !"/'#S*-*,6"43+*,+"#+"(+O*%$*7+,,-4+* /* Input text message */ (LINE 28) String content = "…" // read in the text content from "example1.txt" –! E'#/'#S*-*36,#*%0*#%1+",* List<String> sentences = new ArrayList<String>(); …… /* Code for splitting sentences */ Rooney fails to end goal drought /*Creates an object of OpenNLPtokeniser using a pre-built English language model. */ //change the path accordingly String pathToEngTokenisationModel = "lib/opennlp/models/EnglishTok.bin.gz"; Rooney, fails, to, end, goal, drought, . Tokenizertokeniser tokeniser = new Tokenizer(pathToEngTokenisationModel); /*Tokenise each sentence and print out the result*/ Try it yourself! <*A%1+"6,-)%";N-8-** for(String sentence: sentences){ String[] result=tokeniser.tokenize(sentence); for(String tok:result) System.out.println(tok); Rooney fails to end goal drought. }
  • 22. a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! J9(-%.D%$0&&)"%-9++#,+% •! JWC%-9++#,+%2$#,+%W0&,KLJ* /*Input text message*/ (LINE 31) –! @,02-S*-*36,#*%0*#%1+",* String content = "…" //read in the text content from example1.txt –! W2-02-S*-*36,#*%0*#%1+",*26#.*#.+6$*/-$#*%0*,/++(.*#-4* List<String> tokens = new ArrayList<String>(); /* Code for tokenisation and add the result into the list object above. You do not need to do sentence segmentation in this case. Because the tokenisation will detect sentence boundary as a first step*/ Rooney, fails, to, end, goal, drought, . /*Creates an object of OpenNLP POS tagger using a pre-built English language model.*/ //change the path accordingly Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/ String pathToEngPOSModel = "lib/opennlp/models/tag.bin.gz"; /* You MAY specify additionally two parameters for the constructor, i.e., NN ./. TagDicionary and Dictionary.*/ PosTagger tagger = new PosTagger(pathToEngPOSModel, (Dictionary)null); Try it yourself! <*UEFA-44+$;N-8-** /*Tag the list of tokens and print out the result*/ String[] result=tagger.tag(tokens.toArray(new String[0])); goal/NN Rooney/NNP fails/VBZ to/TO end/VB drought/NN ./. for (String tag: result) System.out.println(tag); a%"#+"#*9"-3G,6,**U.$-,+*a.'"16"4* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! R.9/S*6&+")0G6"4*6"0%$7-)%"*'"6#,*#.-#*7-1+*4%%&* •! J"(9$&%)"2,F#,+% (-"&6&-#+*#+$7,*%0*%'$*6"#+$+,#* –! @,02-S*-*36,#*%0*JWC7-9++&*%-.F&,$% •! !"*#.6,*+>+$(6,+O*2+*0%(',*%"*,.2,%0"(9$&$% –! W2-02-S*-*36,#*%0*/.$-,+,*H"%'",?8+$@*/.$-,+,K* –! 2.6(.*%|+"*@+-$*67/%$#-"#*&%7-6"<,/+(6D(* 6"0%$7-)%"* Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/ NN ./. •! @,02-* –! UEF<#-44+&*#%1+",* •! W2-02-* Rooney, goal drought –! Z%'"*/.$-,+,* R>+$(6,+* Try it yourself! %*+&6#*#.+*(3-,,*U.$-,+a.'"1+$;N-8-*-"&*$'"*
  • 23. a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ* •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ* (LINE 44 in PhraseChunker.java) int[] result = detector.sentPosDetect(content); int start = 0, i = 0; (LINE 32 in PhraseChunker.java) do { //initilising all required NLP processors, If you get an out of memory //sentence splitting //exception, try increasing your JVM heap space to at least 256MB String sentence = content.substring(start, result[i]); String pathToEngTokenisationModel = "lib/opennlp/models/EnglishTok.bin.gz"; //TODO: tokenization, put tokens in a String array. Hint: String pathToEngPOSModel = "lib/opennlp/models/tag.bin.gz"; //Tokenisation.java String pathToEngPhraseModel = "lib/opennlp/models/EnglishChunk.bin.gz"; String[] tokens = null; //TODO: POS tagging, put tags in a String array. Hint: POSTagger.java SentenceDetector detector = new SentenceDetector("lib/opennlp/models/ EnglishSD.bin.gz"); String[] tags = null; Tokenizertokeniser = new Tokenizer(pathToEngTokenisationModel); //This is the method you use to chunk phrases on a list of tokens and PosTagger tagger = new PosTagger(pathToEngPOSModel, (Dictionary) null); //a list of tags String[] phrases = chunker.chunk(tokens, tags); TreebankChunkerchunker = new TreebankChunker(pathToEngPhraseModel); //See the result for(String p:phrases) System.out.println(p); …… start = result[i]; i++; } while (start < result[result.length - 1]); a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ* •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ* (LINE 44 in PhraseChunker.java) (LINE 78 in PhraseChunker.java) int[] result = detector.sentPosDetect(content); String npstart = "B-NP"; a%&+*0$%7*36"+*It*%"2-$&,*/$%(+,,+,* int start = 0, i = 0; String vpstart = "B-VP"; #.6,*$+,'3#*-"&*4+"+$-#+,*#.+*$+-3* do { A.+*$+,'3#*6,*"%#*+>-(#3G*#.+*/.$-,+,*2+* String npcontinue = "I-NP"; /.$-,+,* //sentence splitting +>/+(#+&O*@'#*-*36,#*%0*c#-4,dO*2.6(.*-$+* String vpcontinue = "I-VP"; (%77%"3G*',+&*6"*ZVU*/.$-,+* String sentence = content.substring(start, result[i]); String other = "O"; //TODO: tokenization, put tokens in (.'"16"4S* array. a String String phrase = ""; String[] tokens=null; for (int n = 0; n < tokens.length; n++) { X<ZU*****C%%"+G * *C%%"+G* //TODO: POStagging, put tags in a String array. Hint: POSTagger.java if (phrases[n].equals(npstart) || phrases[n].equals(vpstart)) { String[]–tags = null; B “begin” X<}U******0-63,* phrase = tokens[n]; //ThisI is“inside” – the method you use to chunk phrases on a list of tokens and for (int m = n + 1; m < tokens.length; m++) { !<}U*******#% * * *0-63,*#%*+"&* //a list – “Noun phrase” NP of tags if (phrases[m].equals(npcontinue) || !<}U*******+"&* String[] phrases phrase” VP – “Verb = chunker.chunk(tokens, tags); X<ZU*****4%-3* phrases[m].equals(vpcontinue)) { //See the result !<ZU******&$%'4.# * for (int k = 0; k < phrases.length; k++) { *4%-3*&$%'4.#* phrase = phrase+" "+tokens[m]; } else { System.out.println(phrases[k] + "tt" + tokens[k]); System.out.println("Actual phrase: "+phrase); } phrase = ""; …… break; start = result[i]; ... i++; } } while (start < result[result.length - 1]);
  • 24. a%"#+"#*9"-3G,6,**Z-#'$-3*V-"4'-4+*9"-3G,6,* g%$+*+>+$(6,+,*60*G%'*-$+*6"#+$+,#+&* •! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ% –! A.+*-",2+$b;* •! C+/+-#*/$+86%',*#-,1,*',6"4*#.+*(%$/',*4+"+$-#+&* (LINE 44 in PhraseChunker.java) ',6"4*#.+*#26:+$*-"&*0-(+@%%1*9U!,* int[] result = detector.sentPosDetect(content); int start = 0, i = 0; •! A$GS* do { //sentence splitting –! F+"#+"(+*,+47+"#-)%"* String sentence = content.substring(start, result[i]); //TODO: tokenization, put tokens in a String array. B-NP Rooney –! A%1+"6,-)%"* String[] tokens=tokeniser.tokenize(sentence); B-VP //TODO: pos tagging, put tags in a String array. fails –! U-$#<%0<,/++(.*#-446"4* I-VP to String[] tags = tagger.tag(tokens); I-VP on a list of tokens //This is the method you use to chunk phrases end –! U.$-,+*(.'"16"4* //and a list of tags B-NP goal String[] phrases = chunker.chunk(tokens,I-NP tags); drought //See the result O . for(String p:phrases) Actual phrase: Rooney System.out.println(p); Actual phrase: fails to end …… Actual phrase: goal drought start = result[i]; i++; } while (start < result[result.length - 1]); Z+>#* Q%7-6"*A+$7*C+(%4"6)%"* •! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U% •! R.9/S*+>#$-(#*,#-),)(-33G*,64"6D(-"#*#+$7,O*2.6(.* G&%D.//.G%-"&$&%$-&0$% (%33+()8+3G*&+#+$76"+*#.+*,'77-$G*%0*#.+*7-#(.* –! Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+* •! 8&)90T**&%7-6"*#+$7*$+(%4"6)%"*/$%(+&'$+* H#%1+"6,-)%"O*UEF*#-446"4K* –! KLJ%0(.)&$$&$%#%*6&+")0G*(-"&6&-#+*3+>6(%",O*+;4;O* –! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#* "%'"</.$-,+,O*+"))+,* H/.$-,+*(.'"16"4O*+")#G*$+(%4"6)%"K* –! C-9:$:)9/%M&9$2(&$%#%*+8-3'-#+*#.+*,64"6D(-"(+*%0* (-"&6&-#+*3+>6(%",* –! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"*H#+$7* •! #+$7*0$+5'+"(G~*•<6&0~*2+6$&"+,,O*43%,,+>O*(<8-3'+O* $+(%4"6)%"K* #+$7+>*
  • 25. Q%7-6"*A+$7*C+(%4"6)%"* ]9AC**]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#* •! R.9/S*+>#$-(#*,#-),)(-33G*,64"6D(-"#*#+$7,O*2.6(.* •! ;I!8%% (%33+()8+3G*&+#+$76"+*#.+*,'77-$G*%0*#.+*7-#(.* –! ]-8-<@-,+&*#%%316#*0%$*&+8+3%/6"4*-"&*#+,)"4*&%7-6"* •! 8&)90T**&%7-6"*#+$7*$+(%4"6)%"*/$%(+&'$+* #+$7*$+(%4"6)%"*-34%$6#.7,* –! KLJ%0(.)&$$&$%#%*6&+")0G*(-"&6&-#+*3+>6(%",O*+;4;O* •! B$&%;I!8%-.% "%'"</.$-,+,O*+"))+,* –! +>#$-(#*&%7-6"*#+$7,*0$%7*-*(%33+()%"*%0* –! C-9:$:)9/%M&9$2(&$%#%*+8-3'-#+*#.+*,64"6D(-"(+*%0* &%('7+"#,* (-"&6&-#+*3+>6(%",* •! J*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&* •! #+$7*0$+5'+"(G~*•<6&0~*2+6$&"+,,O*43%,,+>O*(<8-3'+O* –! 67/3+7+"#*-&&6)%"-3*-34%$6#.7,* #+$7+>* –! +8-3'-#+**&6{+$+"#*-34%$6#.7,*'"&+$*#.+*,-7+* 0$-7+2%$1* ]9AC**]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* •! ;I!8%6,*-*]-8-<@-,+&*#%%316#*0%$*&+8+3%/6"4*-"&* •! ;I!8%* #+,)"4*&%7-6"*#+$7*$+(%4"6)%"*-34%$6#.7,* –! @-,6(*D(&h2&,)1%M&9$2(&* !"*#.+*0%33%26"4*+>+$(6,+O*G%'*2633*',+* •! B$&%;I!8%-.% #.+,+*-34%$6#.7,*-"&*(%7/-$+*#.+*$+,'3#,** –! J*-&&6)%"-3*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&% –! +>#$-(#*&%7-6"*#+$7,*0$%7*-*(%33+()%"*%0* •! A+$7*0$+5'+"(G*6"8+$,+*&%('7+"#*0$+5'+"(G*H•<6&0K* &%('7+"#,* •! a<}-3'+* @-(14$%'"&*6,*(%8+$+&*6"* •! J*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&* •! `+6$&"+,,* #.+%$G*,36&+,*H€[vl<*[IvK* –! 67/3+7+"#*-&&6)%"-3*-34%$6#.7,* •! T3%,,-$G*+>#$-()%"*HT3%,,+>K* –! +8-3'-#+**&6{+$+"#*-34%$6#.7,*'"&+$*#.+*,-7+* •! A+$7*+>#$-(#%$*HA+$7+>K* 0$-7+2%$1* •! (-"*@+*',+&*-,*-*(%77-"&<36"+*@-,+&*-//36(-)%"* 9&8-"(+&*#%/6(**2633*@+* (%8+$+&*@$6+zG*
  • 26. Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* •! ^.G%-.%2$&%;I!8%* •! ^.G%-.%2$&%;I!8%* –! V%(-#+*G%'$*]9AC*0%3&+$* –! ,#-$#*#.+*-//36(-)%"O*+;4;O*#.+*0$+5'+"(G*7+-,'$+* uk.ac.shef.wit.jatr.debug.TestFrequency –! (%"D4'$+*G%'$*-//36(-)%"*6"*Z9-(30(.0&(:&$*6"* pG%'$=N-#$q?#+,#* –! 26#.*76"67'7*7+7%$G*•7>J[^7* •! N-#$;,G,#+7;"3/rpG%'$=N-#$q?"3/=$+,%'$(+,* –! (%/G*N-#$*-"&*3%4Y*/$%/+$)+,*D3+,*#%*G%'$*(3-,,+,*0%3&+$* *9((+,,*#%*ZVU*#%%3,*$+5'6$+&*@G*]9AC* -|+$*(3+-"*-"&*$+(%7/63+* •! N-#$;,G,#+7;#+$7;7->2%$&,rJ* –! ,++*D3+*L5'6(1,#-$#;#>#M*6"*]9AC*0%3&+$*0%$*-&&6)%"-3* *g->67'7*"'7@+$*%0*2%$&,*6"*-*#+$7* 6"0%$7-)%"* •! N-#$;,G,#+7;#+$7;64"%$+=&646#,r#$'+* *a-"*-*#+$7*(%"#-6"*&646#,k %% –! #+,#*26#.*#.+*#26:+$*-"&*0-(+@%%1*(%$/%$-* –! (%/G*/$%/+$)+,*D3+,*#%*pG%'$=N-#$q?(3-,,+,* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* 82,,#,+%-"&%-&$-$%G#-"%9,-% 82,,#,+%-"&%-&$-$%G#-"%9,-* •! -"#*,($6/#*,+#*'/*#%*$'"*0$%7*0%3&+$*pG%'$=N-#$q?#+,#* •! &+0-'3#*-$4'7+"#,* –! /-#.=#%=(%$/',*r*0%3&+$*6"*pG%'$=N-#$q?#+,#?w)"Gw* –! /-#.=#%=$+0+$+"(+=(%$/',=,#-#,*r*pG%'$=N-#$q?w"3/=$+,%'$(+,? @"(='"60$5,;"%$7-3w* •! #%*',+*-3#+$"-)8+*-$4,*+"#+$*%"+*%$*@%#.*%0* –! ant -Dpath_to_corpus=alt_corpus_path -Dpath_to_reference_corpus_stats=alt_reference_corpus_stats_path •! %'#/'#*#%*#+,#*0%3&+$* –! /-:+$"S*!"#$%&'()*+),*B*9AC=9VTEC!Ang;#>#* –! %$*$'"*9VV*#+,#,*@G*(-336"4*LI/+.(#-"M!&$-&(M*
  • 27. Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* •! B,*&($-9,*#,+%-"&%.2-02-* •! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi* –! A.+*/$%(+,,*%0*#.+*-//36(-)%"*6,*3%44+&*6"*cN-#$;3%4d* –! 3%%1*-#*$'"HK*7+#.%&*6"* –! A.+*$+,'3#,*-$+*%'#/'#*#%*-*D3+*(-33+&** uk.ac.shef.wit.jatr.debug.TestFrequency.java *p-34%$6#.7="-7+q=9AC=934%$6#.7;#>#O*+;4;O* cF67/3+=#+$7=0$+5'+"(G=9AC=9VTEC!Ang;#>#d* Part 1: Extracting candidate terms by NLP9*c,#%/*2%$&d*36,#*6,* ',+&*#%*$+7%8+*"%6,+* –! 2.6(.*(%"#-6",*$-"1+&*36,#*%0*#+$7,*+>#$-(#+&*0$%7*#.+* //stop word list 2%$&,O*+;4;O*L#.+MO*L-"&M* StopList stop = new StopList(true); (%$/',O*%"+*#+$7*/+$*36"+S* //lemmatiser 2%$3&('/*‚`ECVQaeU*‚`%$3&a'/*‚2%$3&('/*‚`%$3&('/ ***[u^Y;_* Lemmatiser lemmatizer = new Lemmatiser(); V+77-),-)%"*6,*',+&*#%* //noun phrase extractor "%$7-36,+*#+$7,*#%*#.+6$* (-"%"6(-3*0%$7,*H,++*#.+%$G* A.+*D$,#*#+$7* CandidateTermExtractornpextractor = new A.+*"'7@+$*6,* ,36&+,*[ll<*[lvK* 6,*#.+* A.+*%#.+$*#+$7,*-$+* NounPhraseExtractorOpenNLP(stop, lemmatizer); #.+*(-3('3-#+&* (-"%"6(-3*0%$7* #.+*8-$6-"#,*0%'"&*6"* ,(%$+*0%$*#.-#* …… %0*-33*%0*6#,* #.+*(%$/',* #+$7** ]9AC*',+,*-*&+0-'3#*%/+"<"3/* 8-$6-"#,** @-,+&*"%'"*/.$-,+*(.'"1+$*#%* +>#$-(#*(-"&6&-#+*#+$7,* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC* •! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi* •! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi* –! 3%%1*-#*$'"HK*7+#.%&*6"* U$%(+,,%$,* Part 1: Extracting candidate terms by NLP cont. uk.ac.shef.wit.jatr.debug.TestFrequency.java TermFreqCounter npcounter = new TermFreqCounter();$+5'6$+&*0%$* (%'")"4*#+$7* WordCounter wordcounter = new WordCounter(); 0$+5'+"(6+,* Rooney, fails, to, end, goal, drought, . //create global resource index builder, which indexes 9*c,#%/*2%$&d*36,#* Part 1: Extracting candidate terms by NLP global resources, //stop word list 6,*',+&*#%*$+7%8+* "%6,+*2%$&,* //such as documents and terms and their relations StopList stop = new StopList(true); GlobalResourceIndexBuilder builder = new //lemmatiser GlobalResourceIndexBuilder(); Lemmatiserlemmatizer = new Lemmatiser(); V+77-),-)%"*6,*',+&*#%* //build the global resource index //noun phrase extractor "%$7-36,+*#+$7,*#%*#.+6$* GlobalResourceIndex termDocIndex = builder.build(new W%$*6"&+>6"4*#+$7,* (-"%"6(-3*0%$7,*H,++*#.+%$G* CandidateTermExtractornpextractor = new CorpusImpl(args[0]), npextractor); -"&*&%('7+"#,* ,36&+,*[ll<*[lvK* NounPhraseExtractorOpenNLP(stop, lemmatizer); …. …… !"8%16"4*ZVU*/$%(+,,+,*#%*$+-&*6"* ]9AC*',+,*-*&+0-'3#*%/+"<"3/* &%('7+"#,O*,+47+"#*,+"#+"(+,O* @-,+&*"%'"*/.$-,+*(.'"1+$*#%* -//3G*#%1+"6,-)%"O*UEF*#-446"4O* +>#$-(#*(-"&6&-#+*#+$7,* -"&*/.$-,+*(.'"16"4*