SlideShare uma empresa Scribd logo
1 de 24
Baixar para ler offline
google-snappy   
     (machy)
google-snappy                          
Snappy is a compression/decompression library. It does not aim for maximum
compression, or compatibility with any other compression library; instead,
it aims for very high speeds and reasonable compression. For instance,
compared to the fastest mode of zlib, Snappy is an order of magnitude faster
for most inputs, but the resulting compressed files are anywhere from 20% to
100% bigger. (For more information, see "Performance", below.)

                                                   README                      

•  snappy           /                          snappy


                                        zlib
                    20    100%
      1                                               ”Performance”
•                               1.0.3
• 
   http://code.google.com/p/snappy/
•  google-gflags




•  google-gflags, google-snappy

     WARNING: Compiled with assertions enabled, will be slow.

     ./configure CXXFLAGS=“-g -O2 –DNDEBUG” --with-gflags
     --with-gflags       gflags
     configure                       gflags
snappy_unittest                             
•                        snappy_unittest

• 
     ./snappy_unittest
• 
     ./snappy_unittest -run_microbenchmarks=false -
     write_compressed aaa.txt
                 aaa.txt.comp
• 
   ./snappy_unittest -run_microbenchmarks=false -
   write_uncompressed aaa.txt.comp
                          aaa.txt.comp.uncomp
•  zlib
   ./snappy_unittest -run_microbenchmarks=false -zlib testdata/*
•  snappy




                      
                    
                
            alice29.txt
                              
   149KB
            html_x_4
      400KB   HTML
                  400KB
             urls.10K
     1       URL     
              686KB
      baddata1.snappy
                         (?)
       27KB
            house.jpg
             
                      124KB
zlib                                                         
                  
                   
         
                
          
                      snappy
              99.1 MB/s
    293.3 MB/s
   59.8 %
   alice29.txt
       zlib fastest
        20.7 MB/s
     81.5 MB/s
   42.8 %
                      zlib default
         6.6 MB/s
     90.4 MB/s
   35.8 %
                      snappy
             230.2 MB/s
    557.9 MB/s
   23.6 %
   html_x_4
          zlib fastest
        45.6 MB/s
    154.5 MB/s
   16.5 %
                      zlib default
       20.7 MB /s
    177.7 MB/s
   13.0 %
                      snappy
             132.6 MB/s
    411.2 MB/s
   50.9 %
    urls.10K
         zlib fastest
        24.7 MB/s
     94.8 MB/s
   36.1 %
                      zlib default
        12.2 MB/s
    102.4 MB/s
   31.7 %
                      snappy
             137.5 MB/s    1068.7 MB/s
   97.0 %
baddata1.snappy
      zlib fastest
        12.3 MB/s
     57.0 MB/s
   84.1 %
                      zlib default
        10.8 MB/s
     58.9 MB/s
   83.4 %
                      snappy
             933.7 MB/s
   7271.6 MB/s
   99.9 %
   house.jpg
         zlib fastest
        11.9 MB/s
     89.6 MB/s
   99.6 %
                      zlib default
        11.5 MB/s
    122.4 MB/s
   99.6 %
zlib                                                               
                  
                   
             
                  
          
                      snappy
                  99.1 MB/s
      293.3 MB/s
   59.8 %
   alice29.txt
       zlib fastest
            20.7 MB/s
       81.5 MB/s
   42.8 %
                      zlib default
             6.6 MB/s
       90.4 MB/s
   35.8 %
                      snappy
                230.2 MB/s
       557.9 MB/s
   23.6 %
   html_x_4
          zlib fastest
            45.6 MB/s
      154.5 MB/s
   16.5 %
                      zlib default
           20.7 MB /s
      177.7 MB/s
   13.0 %
                      snappy
                132.6 MB/s
       411.2 MB/s
   50.9 %
    urls.10K
         zlib fastest
            24.7 MB/s
       94.8 MB/s
   36.1 %
                      zlib default
            12.2 MB/s
      102.4 MB/s
   31.7 %
                      snappy
                137.5 MB/s      1068.7 MB/s
    97.0 %
baddata1.snappy
      zlib fastest
            12.3 MB/s
       57.0 MB/s
   84.1 %
                                              zlib fastest(level=1) 5
                      zlib default
                                           3.510.8 MB/s
        58.9 MB/s
   83.4 %
                      snappy
             1.2 933.7 MB/s
                                                1.4           7271.6 MB/s
                                                                      
      99.9 %
   house.jpg
         zlib fastest
            11.9 MB/s
       89.6 MB/s
   99.6 %
                      zlib default
            11.5 MB/s
      122.4 MB/s
   99.6 %
zlib                                                             
                  
                   
         
                    
          
                      snappy
              99.1 MB/s
        293.3 MB/s
   59.8 %
   alice29.txt
       zlib fastest
        20.7 MB/s
         81.5 MB/s
   42.8 %
                      zlib default
         6.6 MB/s
         90.4 MB/s
   35.8 %
                      snappy
             230.2 MB/s
        557.9 MB/s
   23.6 %
                                           snappy
   html_x_4
          zlib fastest
        45.6 MB/s
        154.5 MB/s
   16.5 %
                      zlib default
       20.7 MB /s
   
    177.7 MB/s
   13.0 %
                      snappy
             132.6 MB/s
        411.2 MB/s
   50.9 %
    urls.10K
         zlib fastest
        24.7 MB/s
         94.8 MB/s
   36.1 %
                      zlib default
        12.2 MB/s
        102.4 MB/s
   31.7 %
                      snappy
             137.5 MB/s        1068.7 MB/s
   97.0 %
baddata1.snappy
      zlib fastest
        12.3 MB/s
         57.0 MB/s
   84.1 %
                      zlib default
        10.8 MB/s
         58.9 MB/s
   83.4 %
                      snappy
             933.7 MB/s
       7271.6 MB/s
   99.9 %
   house.jpg
         zlib fastest
        11.9 MB/s
         89.6 MB/s
   99.6 %
                      zlib default
        11.5 MB/s
        122.4 MB/s
   99.6 %
lzo                                                             
                  
                 
               
                
           
                      snappy
                  85.9 MB/s
    259.7 MB/s
    59.8 %
   alice29.txt
                      lzo
                     90.6 MB/s
    178.2 MB/s
    57.8 %
                      snappy
              206.7 MB/s
       463.1 MB/s
    23.6 %
   html_x_4
                      lzo
                 203.3 MB/s
       421.6 MB/s
    21.8 %
                      snappy
              119.4 MB/s
       363.2 MB/s
    50.9 %
    urls.10K
                      lzo
                 125.3 MB/s
       308.5 MB/s
    49.3 %
                      snappy
              109.6 MB/s       1048.1 MB/s
    97.0 %
baddata1.snappy
                      lzo
                 353.4 MB/s
      2267.1 MB/s
   100.4 %
                      snappy
              846.4 MB/s
      6642.0 MB/s
    99.9 %
   house.jpg
                      lzo
                 672.6 MB/s
      2024.4 MB/s
   100.3 %


                                hadoop               lzo
           snappy                                           snappy
                                         lzo
snappy       zlib       



zlib(deflate)

         
          
               
   




snappy
government_of_the_people,_by_the_people,_for_the_people



government_of_the_people,_by[15,13]for[16,11]

                             15        13   

HAHAHAHAHA...


HA[2,8]...
18bit




15bit
snappy             
                           LITERAL
                                                 




         LITERAL       

   
        

                             11byte

                           64byte
           4byte   
                                       64byte
snappy             
                           LITERAL
                                                 




            Byte                       
         LITERAL       

   
        

                             11byte

                           64byte
           4byte   
                                       64byte
snappy                                                      
               16KB      fragment             



                                          




      fragment (16KB)
              fragment (16KB)
   fragment



fragment
snappy                                                          2 
             (     8192        )            byte                            

4byte                                                     

government_of_the_people,_by_the_people,_for_the_people
                  13 : Hash(“f_th”)=7
             

        0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
        6
       0
 4
             1
 13
              5
 8
 10
 11
   3

government_of_the_people,_by_the_people,_for_the_people
                   14 : Hash(“_the”)=9
            

        0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
        6
       0
 4
             1
 13
     14
 5
 8
 10
 11
        3
snappy                                                    3 
                                                   

government_of_the_people,_by_the_people,_for_the_people

                                  29 : Hash(“_the”)=9
      0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
       6
 23
 0
 4
 27
 26
 22
 13
 24
 14
 25
 15
 10
 28
 16
 20
     =13
government_of_the_people,_by_the_people,_for_the_people

[government_of_the_people,_by][15,13]
                                                    32
            
government_of_the_people,_by_the_people,_for_the_people
static inline uint32 HashBytes(uint32 bytes, int shift) {
   uint32 kMul = 0x1e35a7bd;
   return (bytes * kMul) >> shift;
 }
static inline uint32 HashBytes(uint32 bytes, int shift) {
   uint32 kMul = 0x1e35a7bd;
   return (bytes * kMul) >> shift;
 }

                   4byte
    4byte                         32bit
                         CPU                               
    #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
    #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
    #else
    inline uint32 UNALIGNED_LOAD32(const void *p) {
      uint32 t;
      memcpy(&t, p, sizeof t);
      return t;
    }
static inline uint32 HashBytes(uint32 bytes, int shift) {
   uint32 kMul = 0x1e35a7bd;
   return (bytes * kMul) >> shift;
 }
shift
               8192     (2   13   )       32-13=19
              8192
JPEG

(~5% performance, ~0.1% density)       

...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...


...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...
                       32                  2       

...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...


...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...
                 32                    1           
16KB                               1008
•              4byte

• 


•        CPU

• 
• 



     (   zlib   deflateBound(),compressBound()
                         )
•  snappy


• 
•  16KB     fragment
                   CPU

•                             (zlib
                         )

Mais conteúdo relacionado

Destaque

snappyについて
snappyについてsnappyについて
snappyについてmoai kids
 
Gamification: o conceito de jogo na rede social Foursquare
Gamification: o conceito de jogo na rede social FoursquareGamification: o conceito de jogo na rede social Foursquare
Gamification: o conceito de jogo na rede social FoursquareCarolina Cruz
 
[Assignment 7.1] Market reshearch - Audit vs panel
[Assignment 7.1] Market reshearch - Audit vs panel[Assignment 7.1] Market reshearch - Audit vs panel
[Assignment 7.1] Market reshearch - Audit vs panelHung Van
 
Report of servay
Report of servayReport of servay
Report of servayAllenChuah
 
Life of an Fluentd event
Life of an Fluentd eventLife of an Fluentd event
Life of an Fluentd eventKiyoto Tamura
 
Basics Of Surveying
Basics Of SurveyingBasics Of Surveying
Basics Of Surveyingstooty s
 
Presentation of surveying
Presentation of surveyingPresentation of surveying
Presentation of surveyingShaker Ullah
 

Destaque (9)

snappyについて
snappyについてsnappyについて
snappyについて
 
Gamification: o conceito de jogo na rede social Foursquare
Gamification: o conceito de jogo na rede social FoursquareGamification: o conceito de jogo na rede social Foursquare
Gamification: o conceito de jogo na rede social Foursquare
 
Servay
ServayServay
Servay
 
[Assignment 7.1] Market reshearch - Audit vs panel
[Assignment 7.1] Market reshearch - Audit vs panel[Assignment 7.1] Market reshearch - Audit vs panel
[Assignment 7.1] Market reshearch - Audit vs panel
 
Report of servay
Report of servayReport of servay
Report of servay
 
Life of an Fluentd event
Life of an Fluentd eventLife of an Fluentd event
Life of an Fluentd event
 
Basics Of Surveying
Basics Of SurveyingBasics Of Surveying
Basics Of Surveying
 
Surveying
Surveying Surveying
Surveying
 
Presentation of surveying
Presentation of surveyingPresentation of surveying
Presentation of surveying
 

Último

Generative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfGenerative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfIngrid Airi González
 
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...Wes McKinney
 
A Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersA Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersNicole Novielli
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsNathaniel Shimoni
 
Potential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and InsightsPotential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and InsightsRavi Sanghani
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfLoriGlavin3
 
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesHow to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesThousandEyes
 
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical InfrastructureVarsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructureitnewsafrica
 
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc
 
MuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotes
MuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotesMuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotes
MuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotesManik S Magar
 
Glenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security ObservabilityGlenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security Observabilityitnewsafrica
 
QCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architecturesQCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architecturesBernd Ruecker
 
Design pattern talk by Kaya Weers - 2024 (v2)
Design pattern talk by Kaya Weers - 2024 (v2)Design pattern talk by Kaya Weers - 2024 (v2)
Design pattern talk by Kaya Weers - 2024 (v2)Kaya Weers
 
2024 April Patch Tuesday
2024 April Patch Tuesday2024 April Patch Tuesday
2024 April Patch TuesdayIvanti
 
Emixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native developmentEmixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native developmentPim van der Noll
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.Curtis Poe
 
Testing tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesTesting tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesKari Kakkonen
 
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...itnewsafrica
 
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS:  6 Ways to Automate Your Data IntegrationBridging Between CAD & GIS:  6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integrationmarketing932765
 
Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Hiroshi SHIBATA
 

Último (20)

Generative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfGenerative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdf
 
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
 
A Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersA Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software Developers
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directions
 
Potential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and InsightsPotential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and Insights
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdf
 
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesHow to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
 
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical InfrastructureVarsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
 
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
 
MuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotes
MuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotesMuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotes
MuleSoft Online Meetup Group - B2B Crash Course: Release SparkNotes
 
Glenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security ObservabilityGlenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security Observability
 
QCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architecturesQCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architectures
 
Design pattern talk by Kaya Weers - 2024 (v2)
Design pattern talk by Kaya Weers - 2024 (v2)Design pattern talk by Kaya Weers - 2024 (v2)
Design pattern talk by Kaya Weers - 2024 (v2)
 
2024 April Patch Tuesday
2024 April Patch Tuesday2024 April Patch Tuesday
2024 April Patch Tuesday
 
Emixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native developmentEmixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native development
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.
 
Testing tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesTesting tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examples
 
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
 
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS:  6 Ways to Automate Your Data IntegrationBridging Between CAD & GIS:  6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
 
Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024
 

Snappy servay

  • 1. google-snappy (machy)
  • 2. google-snappy Snappy is a compression/decompression library. It does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression. For instance, compared to the fastest mode of zlib, Snappy is an order of magnitude faster for most inputs, but the resulting compressed files are anywhere from 20% to 100% bigger. (For more information, see "Performance", below.) README •  snappy / snappy zlib 20 100% 1 ”Performance”
  • 3. •  1.0.3 •  http://code.google.com/p/snappy/ •  google-gflags •  google-gflags, google-snappy WARNING: Compiled with assertions enabled, will be slow. ./configure CXXFLAGS=“-g -O2 –DNDEBUG” --with-gflags --with-gflags gflags configure gflags
  • 4. snappy_unittest •  snappy_unittest •  ./snappy_unittest •  ./snappy_unittest -run_microbenchmarks=false - write_compressed aaa.txt aaa.txt.comp •  ./snappy_unittest -run_microbenchmarks=false - write_uncompressed aaa.txt.comp aaa.txt.comp.uncomp •  zlib ./snappy_unittest -run_microbenchmarks=false -zlib testdata/*
  • 5. •  snappy alice29.txt 149KB html_x_4 400KB HTML 400KB urls.10K 1 URL 686KB baddata1.snappy (?) 27KB house.jpg 124KB
  • 6. zlib snappy 99.1 MB/s 293.3 MB/s 59.8 % alice29.txt zlib fastest 20.7 MB/s 81.5 MB/s 42.8 % zlib default 6.6 MB/s 90.4 MB/s 35.8 % snappy 230.2 MB/s 557.9 MB/s 23.6 % html_x_4 zlib fastest 45.6 MB/s 154.5 MB/s 16.5 % zlib default 20.7 MB /s 177.7 MB/s 13.0 % snappy 132.6 MB/s 411.2 MB/s 50.9 % urls.10K zlib fastest 24.7 MB/s 94.8 MB/s 36.1 % zlib default 12.2 MB/s 102.4 MB/s 31.7 % snappy 137.5 MB/s 1068.7 MB/s 97.0 % baddata1.snappy zlib fastest 12.3 MB/s 57.0 MB/s 84.1 % zlib default 10.8 MB/s 58.9 MB/s 83.4 % snappy 933.7 MB/s 7271.6 MB/s 99.9 % house.jpg zlib fastest 11.9 MB/s 89.6 MB/s 99.6 % zlib default 11.5 MB/s 122.4 MB/s 99.6 %
  • 7. zlib snappy 99.1 MB/s 293.3 MB/s 59.8 % alice29.txt zlib fastest 20.7 MB/s 81.5 MB/s 42.8 % zlib default 6.6 MB/s 90.4 MB/s 35.8 % snappy 230.2 MB/s 557.9 MB/s 23.6 % html_x_4 zlib fastest 45.6 MB/s 154.5 MB/s 16.5 % zlib default 20.7 MB /s 177.7 MB/s 13.0 % snappy 132.6 MB/s 411.2 MB/s 50.9 % urls.10K zlib fastest 24.7 MB/s 94.8 MB/s 36.1 % zlib default 12.2 MB/s 102.4 MB/s 31.7 % snappy 137.5 MB/s 1068.7 MB/s 97.0 % baddata1.snappy zlib fastest 12.3 MB/s 57.0 MB/s 84.1 % zlib fastest(level=1) 5 zlib default 3.510.8 MB/s 58.9 MB/s 83.4 % snappy 1.2 933.7 MB/s 1.4 7271.6 MB/s 99.9 % house.jpg zlib fastest 11.9 MB/s 89.6 MB/s 99.6 % zlib default 11.5 MB/s 122.4 MB/s 99.6 %
  • 8. zlib snappy 99.1 MB/s 293.3 MB/s 59.8 % alice29.txt zlib fastest 20.7 MB/s 81.5 MB/s 42.8 % zlib default 6.6 MB/s 90.4 MB/s 35.8 % snappy 230.2 MB/s 557.9 MB/s 23.6 % snappy html_x_4 zlib fastest 45.6 MB/s 154.5 MB/s 16.5 % zlib default 20.7 MB /s 177.7 MB/s 13.0 % snappy 132.6 MB/s 411.2 MB/s 50.9 % urls.10K zlib fastest 24.7 MB/s 94.8 MB/s 36.1 % zlib default 12.2 MB/s 102.4 MB/s 31.7 % snappy 137.5 MB/s 1068.7 MB/s 97.0 % baddata1.snappy zlib fastest 12.3 MB/s 57.0 MB/s 84.1 % zlib default 10.8 MB/s 58.9 MB/s 83.4 % snappy 933.7 MB/s 7271.6 MB/s 99.9 % house.jpg zlib fastest 11.9 MB/s 89.6 MB/s 99.6 % zlib default 11.5 MB/s 122.4 MB/s 99.6 %
  • 9. lzo snappy 85.9 MB/s 259.7 MB/s 59.8 % alice29.txt lzo 90.6 MB/s 178.2 MB/s 57.8 % snappy 206.7 MB/s 463.1 MB/s 23.6 % html_x_4 lzo 203.3 MB/s 421.6 MB/s 21.8 % snappy 119.4 MB/s 363.2 MB/s 50.9 % urls.10K lzo 125.3 MB/s 308.5 MB/s 49.3 % snappy 109.6 MB/s 1048.1 MB/s 97.0 % baddata1.snappy lzo 353.4 MB/s 2267.1 MB/s 100.4 % snappy 846.4 MB/s 6642.0 MB/s 99.9 % house.jpg lzo 672.6 MB/s 2024.4 MB/s 100.3 % hadoop lzo snappy snappy lzo
  • 10. snappy zlib zlib(deflate) snappy
  • 13. snappy LITERAL LITERAL 11byte 64byte 4byte 64byte
  • 14. snappy LITERAL Byte LITERAL 11byte 64byte 4byte 64byte
  • 15. snappy 16KB fragment fragment (16KB) fragment (16KB) fragment fragment
  • 16. snappy 2 ( 8192 ) byte 4byte government_of_the_people,_by_the_people,_for_the_people 13 : Hash(“f_th”)=7 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 0 4 1 13 5 8 10 11 3 government_of_the_people,_by_the_people,_for_the_people 14 : Hash(“_the”)=9 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 0 4 1 13 14 5 8 10 11 3
  • 17. snappy 3 government_of_the_people,_by_the_people,_for_the_people 29 : Hash(“_the”)=9 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 23 0 4 27 26 22 13 24 14 25 15 10 28 16 20 =13 government_of_the_people,_by_the_people,_for_the_people [government_of_the_people,_by][15,13] 32 government_of_the_people,_by_the_people,_for_the_people
  • 18. static inline uint32 HashBytes(uint32 bytes, int shift) { uint32 kMul = 0x1e35a7bd; return (bytes * kMul) >> shift; }
  • 19. static inline uint32 HashBytes(uint32 bytes, int shift) { uint32 kMul = 0x1e35a7bd; return (bytes * kMul) >> shift; } 4byte 4byte 32bit CPU #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) #else inline uint32 UNALIGNED_LOAD32(const void *p) { uint32 t; memcpy(&t, p, sizeof t); return t; }
  • 20. static inline uint32 HashBytes(uint32 bytes, int shift) { uint32 kMul = 0x1e35a7bd; return (bytes * kMul) >> shift; } shift 8192 (2 13 ) 32-13=19 8192
  • 21. JPEG (~5% performance, ~0.1% density) ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... 32 2 ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... 32 1 16KB 1008
  • 22. •  4byte •  •  CPU • 
  • 23. •  ( zlib deflateBound(),compressBound() )
  • 24. •  snappy •  •  16KB fragment CPU •  (zlib )