SlideShare uma empresa Scribd logo
1 de 31
2010/06/24
                       
kaneko.satoko(at)ocha.ac.jp 
                   
 
    Bioconductor(Biostrings)        
            (p distance)        
                                            
Bioconductor Biostrings                                   
Biostrings 
> source("h>p://www.bioconductor.org/biocLite.R") 
> biocLite(“Biostrings”)    #         1               




> library(Biostrings)     #   R                               
Bioconductor/Biostrings                              1 
> ls(“package:Biostrings”)        #Biostrings                       

> x <‐ "CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT" 
> DNAString(x)     #DNA           
  54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> s <‐ DNAString(x)       #   s DNA        (x)    
> length(s)   
[1] 54          #      s    DNA                           54 

> length(x)     
[1] 1      #       x                                      1 
Bioconductor/Biostrings                      2 
 54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> alphabetFrequency(s, baseOnly=TRUE)    #            
      A  C  G  T other 
[1,] 12 12 15 15     0 

> reverseComplement(s)       #       
  54‐le>er "DNAString" instance 
seq: AGCATCGATCAGCTAGCATCGATCAGCTAGCTAGCTAGCTAGCTACTACGTACG 

> dna2rna(s)             #RNA    (T ‐>U) 
  54‐le>er "RNAString" instance 
seq: CGUACGUAGUAGCUAGCUAGCUAGCUAGCUGAUCGAUGCUAGCUGAUCGAUGCU 
Bioconductor/Biostrings                      3 
 54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> m1 <‐ matchPa>ern(“GCTA”, s)  #                       
> m1 
  Views on a 54‐le>er DNAString subject 
subject: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 
views: 
    start end width 
[1]    12  15     4 [GCTA] 
[2]    16  19     4 [GCTA] 
[3]    20  23     4 [GCTA] 
[4]    24  27     4 [GCTA] 
[5]    38  41     4 [GCTA] 
1
   Makorin1 
   22.61kb 
   (CDS:1446bp) 

  Makorin1‐p1 
  1592bp 
                   region A         region B            region C 

Makorin1‐p1   Makorin1                         processed pseudogene         
Makorin1‐p1 regionB                        Makorin1 regionB                            
Makorin1  mRNA                                              regionB          mRNA  
                                                                        regionC   
                                 

regionB                                                                                            
                                                                     
                                                    regionC                                    
regionB                                    

          regionB regionC                                                   
                                                     
1         

    Makorin1 
    22.61kb 
    (CDS:1446bp) 

    Makorin1‐p1 
    1592bp 
                    region A     region B            region C 

       
1) Makorin1‐p1                      Makorin1                                      

2)                       Makorin1 mRNA                                                
    Makorin1‐p1 Makorin1                                        Makorin1‐p1                       
    Makorin1                                                 

3) Makorin1 mRNA                                                                          
     Makorin1‐p1                     (regionB                                                 
                                                )                         

    regionB regionC                                                           
                                                         
 –             1‐


                                      

 (Null hypothesis)            




                      
                          
                                  
                                          
 –         2‐
                           2                                    

                               
     
         
                   

                  
                       

                      
               


         False negagve                         False posigve        
2
                                         Makorin1‐p1                         
 Makorin1‐p1                        ortholog rat                                   
 Makorin1‐p1                                                                                    
 Mus musculus domes3cus                             5              
                                                                 Subgenus
                      M. booduga
                     M. fragilicauda    India+Lao/Thai
                                           booduga
                    M. terricolor
             1.5    M. macedonicus
       4.3   mya    M. spicilegus
       mya
                     M. spretus
                     M. musculus castaneus
                                                    Palearctic
                                                                 Mus
                                                    musculus
                     M. m. domesticus
                     M. m. molossinus
                     M. caroli
                    M. cookii                   Southeast Asia
                    M. cervicolor                cervicolor
                     M. pahari Coelomys
                       M. mattheyi Nannomys
                   M. platythrix Pyromys
                    Apodemus agrarius
                             Micromys minutus
                      Rattus norvegicus                          (from Suzuki et al. 2004 Mol. Phylogenet. Evol.
0.01                                                                                    33:626-646, Figure 1, 4.)
3
                                                                

       
                                 
                     Makorin1‐p1.fasta 
                                                     
 Mus musculus domes3cus
 dom
                 [Macintosh HD/        /tg03/bin]
 Mus musculus molossinus
 mol
                      
 Mus musculus castaneus
       cas
                                              Makorin1‐p1.fasta                          
 Mus musculus musculus
        mus
           regionB 1‐617, regionC 618‐1256        
 Mus spretus
                  spr
 Mus caroli
                   car

p distance        
         2                          (number of differences)/                                  
             alignment                       
dom    CCTGCCCCAA ATGTCAGATC ACATCTCACT TTGTCATTCC AAGTAATCAC TGGGTGGAGT
spr1   .......... ...C...... .......... .......... ......GT.. ..........
car1   .......... ...C.GA... ......A... ..T....... ....G.GT.. .........G

dom‐spr1: 3/60 = 0.05 
dom‐car1: 9/60 = 0.15 
spr1‐car1: 6/60 =  0.10          
region B region C                                                   
regionB regionC                                    number of differences      p distance                       
                  region B                          bp
             region C                           bp
pair 
         number of differences
               p distance
   number of differences
               p distance
 dom – mol
 dom – cas
 dom – mus
 dom – spr
 dom – car
  mol ‐ cas
 mol – mus
 mol – spr
 mol – car
 cas – mus
  cas – spr
  cas – car
 mus – spr 
 mus – car
  spr – car
region B region C                                                  
        Makorin1‐p1                      (region B, regionC)            
                                    p distance             

      
1) domesgcus          Biostrings DNAstring            
2) B              
3) B                     
4) C              
5) C                     
6) domesgcus                      DNAstring          B         C            
7)                                        
8)                                              4                               
Biostrings                        p distance                                          1
library(Biostrings)  #R                                            

#Makorin1‐p1.fasta domesgcus              ””                               
# DNA              dom               
> dom <‐ DNAString("") 

#dom          1           617                  domB            
> domB <‐ substring(dom,1,617) 

#domB                           lengthB         (p distance                                )        
> lengthB <‐ length(domB) 

#dom          618          1256                  domC                  
> domC <‐ substring(dom, 618,1256) 

#domC                           lengthC         (p distance                                )        
> lengthC <‐ length(domC) 

#                                                                                  
#        lengthB                                                               
> lengthB 
[1] 617 
Biostrings                        p distance                       2
#Makorin1‐p1.fasta molossinus         ””                     
# DNA              mol             
> mol <‐ DNAString("") 

#mol         1        617                  molB      
> molB <‐ substring(mol,1,617) 

#mol         618          1256               molC        
> molC <‐ substring(mol, 618,1256) 


#                         castaneus(cas), musculus(mus), spretus(spr), caroli(car) 
#                                                  number of differences p distance  
#                            
Biostrings                                 p distance           3
> x <‐ domB 
> y <‐ molB 

#      x       y(    domB molB)                          
>  comp<‐ c(compareStrings(x,y)) 

#               ?                          ?    
> subt <‐ gsub("(['?'])", "", comp) 

#subt DNA                ide            
> ide <‐ DNAString(subt) 

#ide           len        
> len <‐ length(ide) 

#x y                         dif            
> dif <‐ (lengthB – len ) 
> dif       #x y                                             

#regionB  p distance                
> pdis <‐ dif/lengthB 
> pdis  #p distance                                4             
Biostrings                           p distance                       4
CotEditor                                   pdistanceB.R         
[Macintosh HD/          /tg03/bin]                    

comp <‐ c(compareStrings(x,y)) 
                                            2       lengthB lengthC         
subt <‐ gsub("(['?'])", "", comp) 
                                             pdistanceC.R      bin              
ide <‐ DNAString(subt) 
len <‐ length(ide) 
dif <‐ (lengthB ‐ len) 
pdis <‐ dif/lengthB 

                                               x y           
> x <‐ 
> y <‐ 
> source("/Users/tg03/bin/pdistanceB.R") 
                           bin                        
> source("pdistanceB.R") 
                2                 
> dif 
> pdis 
region B region C                                              (           )
                 region B                    617  bp
     region C                 639  bp
pair 
        number of differences
      p distance
    number of differences
   p distance
dom – mol
             6
                  0.010
                  7               0.011 
dom – cas
             6
                  0.010
                  7
              0.011 
dom – mus
             8
                  0.013
                  8
              0.013 
dom – spr
            16
                  0.026
               14
                0.022 
dom – car
            30
                  0.049
               39
                0.061 
mol – cas
             0
                     0
                   0
                0 
mol – mus
             4
                  0.006
                  1
              0.002 
mol – spr
            14
                  0.023
               17
                0.027  
mol – car
            28
                  0.045
               38
                0.059
cas – mus
             4
                  0.006
                  1
              0.002 
cas – spr
            14
                  0.023
               17
                0.027
cas – car
            28
                  0.045
               38
                0.059
mus – spr 
           14
                  0.023
               18
                0.028
mus – car
            28
                  0.045
               39
                0.061
                                                                                              
spr – car
            32
                  0.052
               37
                0.058
 1
1) regionB p distance x                   x         
> x <‐ c(x         ) 

2) regionC   p distance y                 y             
> y <‐ c(y           ) 

3)                                x   y                         

4) plot()                
> plot(x,y,xlim=c(            ,       ), ylim=c(           ,       )) 
 1            
1) regionB p distance x                          x        
> x <‐ c(0.010, 0.010, 0.013, 0.026, 0.049, 0, 0.006 ,0.023, 0.045, 0.006 , 0.023 , 0.045, 
0.023, 0.045, 0.052 ) 

2) regionC p distance y                          y          
> y <‐ c(0.011, 0.011, 0.013, 0.022, 0.061, 0, 0.002, 0.027, 0.059, 0.002, 0.027, 0.059, 
0.028, 0.061, 0.058 ) 

3) max()                                            x    y                                   
> max(x) 
[1] 0.052  

> max(y) 
[1] 0.061 

4) plot()                  
> plot(x,y,xlim=c(0,0.065), ylim=c(0,0.065)) 
2
4’)                                              
> plot(x,y,xlab='regionB',ylab='regionC', xlim=c(0,0.065), ylim=c(0,0.065)) 




regionB regionC    p distance                                 
regionB regionC                                                                 
                         
                                               
1
                                                           
                                                    (d)            
                                             

                                                 
                                                               


                      (x3,y3)

                      d3
                                 d4
(x1,y1)
                    (x4,y4)
    d1
        d2

           (x2,y2)
2
                                                                                  

                                          
                           



                                                                          
                                                                              



> xdev <‐ (x‐mean(x))    # x                    
> ydev <‐ (y‐mean(y))    # y                    
> bmul<‐ xdev*ydev       # x y                      
> bnum <‐ sum(bmul)      # x y                                  (   ) 
> bsqu <‐ xdev^2          # x                2  
> bden <‐ sum(bsqu)      #  x                2         (   ) 
> b <‐ bnum/bden         #      (   ) 
> b 
[1] 1.317939 
3
                                                  
                                              
                                 




> a1 <‐ sum(y)/length(y) 
> a2 <‐ b*(sum(x)/length(x)) 
> a <‐ a1‐a2 
[1] ‐0.003636326 


> abline (a,b)   
#a b                      y = a + bx      
regionB regionC                             y=x                     
                           regionB regionC              
          y=‐0.0036+1.3x                            
(y=‐0.0036+1.3x   y=x                                  ) 

      regionB regionC                           
                                                             
 
p distance                                             1
‐pdis_line.R‐   

library("Biostrings"); 
 x <‐""
dom <‐ "[domesgcus              ]"; 
mol <‐ "[molossinus            ]"; 
cas <‐ "[castaneus          ]"; 
mus <‐ "[musculus            ]"; 
spr <‐ "[spretus         ]"; 
car <‐ "[caroli        ]";
 
seqs     <‐ c(dom,mol,cas,mus,spr,car);
seqnames <‐ c("dom","mol","cas","mus","spr","car");
nseqs <‐ length(seqs);
npoints <‐ length(x); 
x = vector(length=npoints); 
y = vector(length=npoints); 
k = 0;  
                    
 
p distance                                        2
for (i1 in 1:(nseqs‐1)){
  for (i2 in (i1+1):nseqs ){
    k = k + 1; 
#    cat(sprint("%d %dn",i1,i2));
    seq1 = DNAString(seqs[i1]);
    seq2 = DNAString(seqs[i2]);
    seq_b1 = substring( seq1, 1,   617 );
    seq_c1 = substring( seq1, 618, 1256 );
    seq_b2 = substring( seq2, 1,   617 );
    seq_c2 = substring( seq2, 618, 1256 );
    len_b  = length( seq_b1 );
    cmp_b  = c(compareStrings(seq_b1,seq_b2));
    sub_b  = gsub("(['?'])","",cmp_b);
    subt_b = DNAString(sub_b);
    dif_b  = length(subt_b);
    n_b    = len_b ‐ dif_b;
    pdis_b = n_b / len_b; 

                  
 
p distance                                                 3
‐pdis_line.R‐ 

 x[k]   = pdis_b;
     len_c  = length( seq_c1 );
     cmp_c  = c(compareStrings(seq_c1,seq_c2));
     sub_c  = gsub("(['?'])","",cmp_c);
     subt_c = DNAString(sub_c);
     dif_c  = length(subt_c);
     n_c    = len_c ‐ dif_c;
     pdis_c = n_c / len_c;
     y[k]   = pdis_c;
     cat(sprint('%s %s %d %g %gn',seqnames[i1],seqnames[i2],k,pdis_b,pdis_c)); 
   }
 }
 xdev <‐ x‐mean(x);
 ydev <‐ y‐mean(y);
 b    <‐ sum(xdev*ydev)/sum(xdev*xdev);
 a    <‐ mean(y) ‐ b*mean(x);
  
 cat(sprint('a=%g, b=%gn',a,b));
 
p distance                                           4
pdis_line.R         
                                                           




         R                    
                                              
                
(                      bin            path                    ) 

                        (p distance              )                  
 
                     
 

         

Mais conteúdo relacionado

Destaque

100513_homology_search(ensembl)
100513_homology_search(ensembl)100513_homology_search(ensembl)
100513_homology_search(ensembl)ocha_kaneko
 
100610_blastclustalw
100610_blastclustalw100610_blastclustalw
100610_blastclustalwocha_kaneko
 
100701_statistics3
100701_statistics3100701_statistics3
100701_statistics3ocha_kaneko
 
100617_statistics1
100617_statistics1100617_statistics1
100617_statistics1ocha_kaneko
 

Destaque (6)

100513_homology_search(ensembl)
100513_homology_search(ensembl)100513_homology_search(ensembl)
100513_homology_search(ensembl)
 
100520_dotplot
100520_dotplot100520_dotplot
100520_dotplot
 
090601-dotplot
090601-dotplot090601-dotplot
090601-dotplot
 
100610_blastclustalw
100610_blastclustalw100610_blastclustalw
100610_blastclustalw
 
100701_statistics3
100701_statistics3100701_statistics3
100701_statistics3
 
100617_statistics1
100617_statistics1100617_statistics1
100617_statistics1
 

Mais de ocha_kaneko

100506-unix-ensembl
100506-unix-ensembl100506-unix-ensembl
100506-unix-ensemblocha_kaneko
 
100422-intro,setup
100422-intro,setup100422-intro,setup
100422-intro,setupocha_kaneko
 
090622_blast-clustalw
090622_blast-clustalw090622_blast-clustalw
090622_blast-clustalwocha_kaneko
 
090615-TogoWS SOAP
090615-TogoWS SOAP090615-TogoWS SOAP
090615-TogoWS SOAPocha_kaneko
 
090608-TogoWS REST
090608-TogoWS REST090608-TogoWS REST
090608-TogoWS RESTocha_kaneko
 
090518_unix-ensembl
090518_unix-ensembl090518_unix-ensembl
090518_unix-ensemblocha_kaneko
 
090511-intro, setup
090511-intro, setup090511-intro, setup
090511-intro, setupocha_kaneko
 

Mais de ocha_kaneko (8)

100506-unix-ensembl
100506-unix-ensembl100506-unix-ensembl
100506-unix-ensembl
 
100422-intro,setup
100422-intro,setup100422-intro,setup
100422-intro,setup
 
Statistics_R
Statistics_RStatistics_R
Statistics_R
 
090622_blast-clustalw
090622_blast-clustalw090622_blast-clustalw
090622_blast-clustalw
 
090615-TogoWS SOAP
090615-TogoWS SOAP090615-TogoWS SOAP
090615-TogoWS SOAP
 
090608-TogoWS REST
090608-TogoWS REST090608-TogoWS REST
090608-TogoWS REST
 
090518_unix-ensembl
090518_unix-ensembl090518_unix-ensembl
090518_unix-ensembl
 
090511-intro, setup
090511-intro, setup090511-intro, setup
090511-intro, setup
 

Último

SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptxSKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptxAmanpreet Kaur
 
Making communications land - Are they received and understood as intended? we...
Making communications land - Are they received and understood as intended? we...Making communications land - Are they received and understood as intended? we...
Making communications land - Are they received and understood as intended? we...Association for Project Management
 
Spellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please PractiseSpellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please PractiseAnaAcapella
 
How to Manage Global Discount in Odoo 17 POS
How to Manage Global Discount in Odoo 17 POSHow to Manage Global Discount in Odoo 17 POS
How to Manage Global Discount in Odoo 17 POSCeline George
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...christianmathematics
 
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...Nguyen Thanh Tu Collection
 
ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.MaryamAhmad92
 
Understanding Accommodations and Modifications
Understanding  Accommodations and ModificationsUnderstanding  Accommodations and Modifications
Understanding Accommodations and ModificationsMJDuyan
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfAdmir Softic
 
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptxMaritesTamaniVerdade
 
microwave assisted reaction. General introduction
microwave assisted reaction. General introductionmicrowave assisted reaction. General introduction
microwave assisted reaction. General introductionMaksud Ahmed
 
psychiatric nursing HISTORY COLLECTION .docx
psychiatric  nursing HISTORY  COLLECTION  .docxpsychiatric  nursing HISTORY  COLLECTION  .docx
psychiatric nursing HISTORY COLLECTION .docxPoojaSen20
 
Activity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdfActivity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdfciinovamais
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdfQucHHunhnh
 
Micro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfMicro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfPoh-Sun Goh
 
Application orientated numerical on hev.ppt
Application orientated numerical on hev.pptApplication orientated numerical on hev.ppt
Application orientated numerical on hev.pptRamjanShidvankar
 
Unit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptxUnit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptxVishalSingh1417
 

Último (20)

Asian American Pacific Islander Month DDSD 2024.pptx
Asian American Pacific Islander Month DDSD 2024.pptxAsian American Pacific Islander Month DDSD 2024.pptx
Asian American Pacific Islander Month DDSD 2024.pptx
 
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptxSKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
 
Making communications land - Are they received and understood as intended? we...
Making communications land - Are they received and understood as intended? we...Making communications land - Are they received and understood as intended? we...
Making communications land - Are they received and understood as intended? we...
 
Spellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please PractiseSpellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please Practise
 
Spatium Project Simulation student brief
Spatium Project Simulation student briefSpatium Project Simulation student brief
Spatium Project Simulation student brief
 
How to Manage Global Discount in Odoo 17 POS
How to Manage Global Discount in Odoo 17 POSHow to Manage Global Discount in Odoo 17 POS
How to Manage Global Discount in Odoo 17 POS
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
 
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
 
ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.
 
Understanding Accommodations and Modifications
Understanding  Accommodations and ModificationsUnderstanding  Accommodations and Modifications
Understanding Accommodations and Modifications
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdf
 
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
 
microwave assisted reaction. General introduction
microwave assisted reaction. General introductionmicrowave assisted reaction. General introduction
microwave assisted reaction. General introduction
 
psychiatric nursing HISTORY COLLECTION .docx
psychiatric  nursing HISTORY  COLLECTION  .docxpsychiatric  nursing HISTORY  COLLECTION  .docx
psychiatric nursing HISTORY COLLECTION .docx
 
Activity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdfActivity 01 - Artificial Culture (1).pdf
Activity 01 - Artificial Culture (1).pdf
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdf
 
Micro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfMicro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdf
 
Application orientated numerical on hev.ppt
Application orientated numerical on hev.pptApplication orientated numerical on hev.ppt
Application orientated numerical on hev.ppt
 
Unit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptxUnit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptx
 
Mehran University Newsletter Vol-X, Issue-I, 2024
Mehran University Newsletter Vol-X, Issue-I, 2024Mehran University Newsletter Vol-X, Issue-I, 2024
Mehran University Newsletter Vol-X, Issue-I, 2024
 

100624_statistics2

  • 1. 2010/06/24   kaneko.satoko(at)ocha.ac.jp   
  • 2.     Bioconductor(Biostrings)     (p distance)      
  • 3. Bioconductor Biostrings   Biostrings  > source("h>p://www.bioconductor.org/biocLite.R")  > biocLite(“Biostrings”)    # 1   > library(Biostrings)   # R  
  • 4. Bioconductor/Biostrings  1  > ls(“package:Biostrings”)    #Biostrings   > x <‐ "CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT"  > DNAString(x)   #DNA     54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > s <‐ DNAString(x)    # s DNA (x)   > length(s)    [1] 54     # s  DNA 54  > length(x)    [1] 1    # x 1 
  • 5. Bioconductor/Biostrings  2   54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > alphabetFrequency(s, baseOnly=TRUE)  #         A  C  G  T other  [1,] 12 12 15 15     0  > reverseComplement(s)   #     54‐le>er "DNAString" instance  seq: AGCATCGATCAGCTAGCATCGATCAGCTAGCTAGCTAGCTAGCTACTACGTACG  > dna2rna(s)      #RNA (T ‐>U)    54‐le>er "RNAString" instance  seq: CGUACGUAGUAGCUAGCUAGCUAGCUAGCUGAUCGAUGCUAGCUGAUCGAUGCU 
  • 6. Bioconductor/Biostrings  3   54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > m1 <‐ matchPa>ern(“GCTA”, s)  #   > m1    Views on a 54‐le>er DNAString subject  subject: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  views:      start end width  [1]    12  15     4 [GCTA]  [2]    16  19     4 [GCTA]  [3]    20  23     4 [GCTA]  [4]    24  27     4 [GCTA]  [5]    38  41     4 [GCTA] 
  • 7. 1 Makorin1  22.61kb  (CDS:1446bp)  Makorin1‐p1  1592bp  region A   region B   region C  Makorin1‐p1 Makorin1 processed pseudogene   Makorin1‐p1 regionB Makorin1 regionB   Makorin1  mRNA regionB mRNA   regionC     regionB     regionC   regionB   regionB regionC    
  • 8. 1 Makorin1  22.61kb  (CDS:1446bp)  Makorin1‐p1  1592bp  region A   region B   region C    1) Makorin1‐p1 Makorin1   2)  Makorin1 mRNA       Makorin1‐p1 Makorin1 Makorin1‐p1       Makorin1   3) Makorin1 mRNA        Makorin1‐p1 (regionB   )    regionB regionC      
  • 9.  – 1‐   (Null hypothesis)          
  • 10.  – 2‐ 2   False negagve  False posigve  
  • 11. 2 Makorin1‐p1   Makorin1‐p1 ortholog rat   Makorin1‐p1   Mus musculus domes3cus 5   Subgenus M. booduga M. fragilicauda India+Lao/Thai booduga M. terricolor 1.5 M. macedonicus 4.3 mya M. spicilegus mya M. spretus M. musculus castaneus Palearctic Mus musculus M. m. domesticus M. m. molossinus M. caroli M. cookii Southeast Asia M. cervicolor cervicolor M. pahari Coelomys M. mattheyi Nannomys M. platythrix Pyromys Apodemus agrarius Micromys minutus Rattus norvegicus (from Suzuki et al. 2004 Mol. Phylogenet. Evol. 0.01 33:626-646, Figure 1, 4.)
  • 12. 3   Makorin1‐p1.fasta    Mus musculus domes3cus dom [Macintosh HD/ /tg03/bin] Mus musculus molossinus mol   Mus musculus castaneus cas Makorin1‐p1.fasta   Mus musculus musculus mus regionB 1‐617, regionC 618‐1256   Mus spretus spr Mus caroli car p distance   2 (number of differences)/   alignment   dom CCTGCCCCAA ATGTCAGATC ACATCTCACT TTGTCATTCC AAGTAATCAC TGGGTGGAGT spr1 .......... ...C...... .......... .......... ......GT.. .......... car1 .......... ...C.GA... ......A... ..T....... ....G.GT.. .........G dom‐spr1: 3/60 = 0.05  dom‐car1: 9/60 = 0.15  spr1‐car1: 6/60 =  0.10   
  • 13. region B region C regionB regionC number of differences p distance   region B                          bp region C                           bp pair  number of differences p distance number of differences p distance dom – mol dom – cas dom – mus dom – spr dom – car mol ‐ cas mol – mus mol – spr mol – car cas – mus cas – spr cas – car mus – spr  mus – car spr – car
  • 14. region B region C Makorin1‐p1 (region B, regionC)   p distance     1) domesgcus Biostrings DNAstring   2) B   3) B   4) C   5) C   6) domesgcus DNAstring B C   7)    8)  4  
  • 15. Biostrings p distance 1 library(Biostrings)  #R   #Makorin1‐p1.fasta domesgcus ””   # DNA dom   > dom <‐ DNAString("")  #dom 1 617 domB   > domB <‐ substring(dom,1,617)  #domB lengthB (p distance )   > lengthB <‐ length(domB)  #dom 618 1256 domC   > domC <‐ substring(dom, 618,1256)  #domC lengthC (p distance )   > lengthC <‐ length(domC)  #   # lengthB   > lengthB  [1] 617 
  • 16. Biostrings p distance 2 #Makorin1‐p1.fasta molossinus ””   # DNA mol   > mol <‐ DNAString("")  #mol 1 617 molB   > molB <‐ substring(mol,1,617)  #mol 618 1256 molC   > molC <‐ substring(mol, 618,1256)  # castaneus(cas), musculus(mus), spretus(spr), caroli(car)  # number of differences p distance   #  
  • 17. Biostrings p distance 3 > x <‐ domB  > y <‐ molB  # x y( domB molB)   >  comp<‐ c(compareStrings(x,y))  # ? ?   > subt <‐ gsub("(['?'])", "", comp)  #subt DNA ide   > ide <‐ DNAString(subt)  #ide len   > len <‐ length(ide)  #x y dif   > dif <‐ (lengthB – len )  > dif   #x y   #regionB  p distance   > pdis <‐ dif/lengthB  > pdis  #p distance 4  
  • 18. Biostrings p distance 4 CotEditor pdistanceB.R   [Macintosh HD/ /tg03/bin]   comp <‐ c(compareStrings(x,y))  2 lengthB lengthC   subt <‐ gsub("(['?'])", "", comp)  pdistanceC.R bin   ide <‐ DNAString(subt)  len <‐ length(ide)  dif <‐ (lengthB ‐ len)  pdis <‐ dif/lengthB  x y   > x <‐  > y <‐  > source("/Users/tg03/bin/pdistanceB.R")  bin   > source("pdistanceB.R")  2   > dif  > pdis 
  • 19. region B region C ( ) region B                    617  bp region C                 639  bp pair  number of differences p distance number of differences p distance dom – mol 6 0.010 7   0.011  dom – cas 6 0.010 7 0.011  dom – mus 8 0.013 8 0.013  dom – spr 16 0.026 14 0.022  dom – car 30 0.049 39 0.061  mol – cas 0 0 0 0  mol – mus 4 0.006 1 0.002  mol – spr 14 0.023 17 0.027   mol – car 28 0.045 38 0.059 cas – mus 4 0.006 1 0.002  cas – spr 14 0.023 17 0.027 cas – car 28 0.045 38 0.059 mus – spr  14 0.023 18 0.028 mus – car 28 0.045 39 0.061 spr – car 32 0.052 37 0.058
  • 20.  1 1) regionB p distance x x   > x <‐ c(x )  2) regionC p distance y y   > y <‐ c(y )  3) x y   4) plot()   > plot(x,y,xlim=c( , ), ylim=c( , )) 
  • 21.  1  1) regionB p distance x x   > x <‐ c(0.010, 0.010, 0.013, 0.026, 0.049, 0, 0.006 ,0.023, 0.045, 0.006 , 0.023 , 0.045,  0.023, 0.045, 0.052 )  2) regionC p distance y y   > y <‐ c(0.011, 0.011, 0.013, 0.022, 0.061, 0, 0.002, 0.027, 0.059, 0.002, 0.027, 0.059,  0.028, 0.061, 0.058 )  3) max() x y   > max(x)  [1] 0.052   > max(y)  [1] 0.061  4) plot()   > plot(x,y,xlim=c(0,0.065), ylim=c(0,0.065)) 
  • 22. 2 4’)    > plot(x,y,xlab='regionB',ylab='regionC', xlim=c(0,0.065), ylim=c(0,0.065))  regionB regionC p distance   regionB regionC      
  • 23. 1   (d)         (x3,y3) d3 d4 (x1,y1) (x4,y4) d1 d2 (x2,y2)
  • 24. 2           > xdev <‐ (x‐mean(x))  # x   > ydev <‐ (y‐mean(y))  # y   > bmul<‐ xdev*ydev  # x y   > bnum <‐ sum(bmul)  # x y ( )  > bsqu <‐ xdev^2     # x 2   > bden <‐ sum(bsqu)  #  x 2 ( )  > b <‐ bnum/bden    #      ( )  > b  [1] 1.317939 
  • 25. 3       > a1 <‐ sum(y)/length(y)  > a2 <‐ b*(sum(x)/length(x))  > a <‐ a1‐a2  [1] ‐0.003636326  > abline (a,b)    #a b y = a + bx   
  • 26. regionB regionC y=x   regionB regionC   y=‐0.0036+1.3x   (y=‐0.0036+1.3x y=x )  regionB regionC    
  • 27.   p distance 1 ‐pdis_line.R‐    library("Biostrings");   x <‐"" dom <‐ "[domesgcus ]";  mol <‐ "[molossinus ]";  cas <‐ "[castaneus ]";  mus <‐ "[musculus ]";  spr <‐ "[spretus ]";  car <‐ "[caroli ]";   seqs     <‐ c(dom,mol,cas,mus,spr,car); seqnames <‐ c("dom","mol","cas","mus","spr","car"); nseqs <‐ length(seqs); npoints <‐ length(x);  x = vector(length=npoints);  y = vector(length=npoints);  k = 0;    
  • 28.   p distance 2 for (i1 in 1:(nseqs‐1)){   for (i2 in (i1+1):nseqs ){     k = k + 1;  #    cat(sprint("%d %dn",i1,i2));     seq1 = DNAString(seqs[i1]);     seq2 = DNAString(seqs[i2]);     seq_b1 = substring( seq1, 1,   617 );     seq_c1 = substring( seq1, 618, 1256 );     seq_b2 = substring( seq2, 1,   617 );     seq_c2 = substring( seq2, 618, 1256 );     len_b  = length( seq_b1 );     cmp_b  = c(compareStrings(seq_b1,seq_b2));     sub_b  = gsub("(['?'])","",cmp_b);     subt_b = DNAString(sub_b);     dif_b  = length(subt_b);     n_b    = len_b ‐ dif_b;     pdis_b = n_b / len_b;   
  • 29.   p distance 3 ‐pdis_line.R‐  x[k]   = pdis_b;     len_c  = length( seq_c1 );     cmp_c  = c(compareStrings(seq_c1,seq_c2));     sub_c  = gsub("(['?'])","",cmp_c);     subt_c = DNAString(sub_c);     dif_c  = length(subt_c);     n_c    = len_c ‐ dif_c;     pdis_c = n_c / len_c;     y[k]   = pdis_c;     cat(sprint('%s %s %d %g %gn',seqnames[i1],seqnames[i2],k,pdis_b,pdis_c));    } } xdev <‐ x‐mean(x); ydev <‐ y‐mean(y); b    <‐ sum(xdev*ydev)/sum(xdev*xdev); a    <‐ mean(y) ‐ b*mean(x);   cat(sprint('a=%g, b=%gn',a,b));
  • 30.   p distance 4 pdis_line.R      R       ( bin path )  (p distance )  
  • 31.