SlideShare uma empresa Scribd logo
1 de 42
Java
(      Unicode)

             2011 4    16
            twitter: @zaki50
Who am I

•          YAMAZAKI Makoto(twitter: @zaki50)
•   Android

    •
    •   StickyShortcut
•           Java
•           (CharacterSet)
    (Encoding)

•

• UTF-16

• UTF-8
•   Unicode 6.0
    11
•
• US ASCII
• Shift JIS
• JIS X 208
• UCS-2
• UCS-4
•
                        Unicode

    • UTF-8
    • UTF-16
    • UTF-32
    • ...
               (   : US ASCII, Shift JIS)
Unicode
Unicode
•



• Xerox              Microsoft, Apple, Sun
    Microsystems, HP, JUST System
          The Unicode Consortium

•         iso10646
Unicode iso10646


•

•
    Unicode
Unicode
•       The Unicode Consortium
    ( http://www.unicode.org/ )

•



•
•       Unicode




    •

    •
(            , Ligature)
•



    •   (U+3075) +   (U+309A) =

    •
                (             (U+3077) )
Unicode

• Unicode




  Unicode
• NFC(Normalization Form C)
 • NFD(Normalization Form D)
 • NFKC(Normalization Form KC)
 • NFKD(Normalization Form KD)


C(Composition,   )/D(Decomposition,   )
          K(Compatibility,   )
•      NFC

• MacOS X          (HFS+)
             NFD

• NFKC, NFKD
Unicode
• C(Composition)




• D(Decomposition)
Unicode
• K(Compatibility)


         1




     :       (U+3000)        (U+0020)
             (     )    (5   )
Unicode

     K        K




C




D
Eclipse
UTF-16
UTF-16

•

    • Java   String

    • Windows
CodePoint                       UTF-16




U+0000-U+FFFF xxxx xxxx xxxx xxxx       xxxx xxxx xxxx xxxx



 U+010000-U      0000 0000 000u uuuu   1101 10ww wwxx xxxx
 +0010FFFF       xxxx xxxx xxxx xxxx    1101 11xx xxxx xxxx

                                           x,u,w ∈ {0,1}
                                         wwww = uuuuu - 1
UTF-16


•   2

•   2
• 1     16       16bit * 2

 • 1         16bit                16
                      20bit

 • U+D800-U+DFFF
        (11bit                )

 • 0xD800-0xDBFF, 0xDC00-xDFFF
• UTF-16       2       1


       2

U+3000 = 0x3000       0x30, 0x00   UTF-16BE
U+3000 = 0x3000       0x00, 0x30   UTF-16LE
BOM(byte order mark)
•                  U+FEFF



    • U+FFFE

    •                    U+FEFF
               ZERO WIDTH NON-
        BREAKING SPAEC
UTF-8
CodePoint                          bit                         bit

   U+00-U+7F                         0xxx xxx                  7bits

U+0080-U+07FF                   110y yyyx 10xx xxxx            11bits

U+0800-U+FFFF           1110 yyyy 10yxx xxxx (10xx xxxx) * 1   16bits

 U+010000-U+1FFFFF      1111 0yyy 10yy xxxx (10xx xxxx) * 2    21bits

U+00200000-U+03FFFFFF   1111 10yy 10yy yxxx (10xx xxxx) * 3    26bits

U+04000000-U+7FFFFFFF   1111 110y 10yy yyxx (10xx xxxx) * 4    31bits

                  x,y ∈ {0,1}                                  y    1
UTF-8
• US ASCII                US
    ASCII

•

    • 0x80-0xbf   2   ,



• 2
•



    •
• UTF-8    1




•         BOM
BOM(byte order mark)
•                   0xEF, 0xBB, 0xBF



• byte order mark



• UTF-8
1-6          2^31
UTF-8
         (1-4)        (2^21)


UTF-16   2-4     2^16-2*2^10+2^20



UTF-32    4         2^16+2^20
•             (CharacterSet)       (Encoding)



•             1



•             1
    (UTF-16                    )     2
• The Unicode Consortium(http://www.unicode.org/)
  • Unicode 6.0.0(http://www.unicode.org/versions/Unicode6.0.0/)
• WikiPedia
  • Unicode(http://ja.wikipedia.org/wiki/Unicode)
  • UTF-8(http://ja.wikipedia.org/wiki/UTF-8)
  • UTF-16(http://ja.wikipedia.org/wiki/UTF-16)
• Unicode                 (http://homepage1.nifty.com/nomenclator/
  unicode/normalization.htm)
(   )
1
                                   Unicode
package org.zakky.gudanama;

import java.text.Normalizer;

public class NormalizeMain {

                                                                                   private static void printCodePoints(String str) {
    public static void main(String[] args) {
                                                                                       StringBuilder sb = new StringBuilder();
        // [ ϓ ], [ ϔ ], [ ẛ ]
                                                                                       int index = 0;
        final String src = "u03d3u03d4u1e9b";
                                                                                       while(index < str.length()) {
        printCodePoints(src);
                                                                                           char c = str.charAt(index);
                                                                                           sb.append("U+");
       // [ ϓ ], [ ϔ ], [ ẛ ]
                                                                                           if (Character.isHighSurrogate(c)) {
       System.out.print("NFC: ");
                                                                                               sb.append(Character.toCodePoint(c, str.charAt(index+1)));
       printCodePoints(Normalizer.normalize(src,     Normalizer.Form.NFC));
                                                                                               index++;
       // [ ϒ ], [ ́ ], [ ϒ ], [ ̈ ], [ ſ ], [ ̇ ]
                                                                                           } else {
       System.out.print("NFD: ");
                                                                                               sb.append((int) c);
       printCodePoints(Normalizer.normalize(src,     Normalizer.Form.NFD));
                                                                                           }
       // [ Ύ ], [ Ϋ ], [ ṡ ]
                                                                                           index++;
       System.out.print("NFKC: ");
                                                                                           sb.append(' ');
       printCodePoints(Normalizer.normalize(src,     Normalizer.Form.NFKC));
                                                                                       }
       // [ Υ ], [ ́ ], [ Υ ], [ ̈ ], [ s ], [ ̇ ]
                                                                                       if (sb.length() != 0) {
       System.out.print("NFKD: ");
                                                                                           sb.setLength(sb.length() - 1);
       printCodePoints(Normalizer.normalize(src,     Normalizer.Form.NFKD));
                                                                                       }
                                                                                       System.out.println(sb);
   }
                                                                                   }
                                                                               }
2

package org.zakky.gudanama;

public class SurrogatePairMain {

    public static void main(String[] args) {
        final String str = " ";
        System.out.println("str: " + str);
        System.out.println("length: " + str.length());
        System.out.println("code point count" + str.codePointCount(0, str.length()));
        System.out.println("code point of str: "
                + Character.toCodePoint(str.charAt(0), str.charAt(1)) + "(U+"
                + Integer.toHexString(Character.toCodePoint(str.charAt(0), str.charAt(1))) + ")");
        System.out.println("str[0] is high surrogate: " + Character.isHighSurrogate(str.charAt(0)));
        System.out.println("str[0] is low surrogate: " + Character.isLowSurrogate(str.charAt(0)));
        System.out.println("str[1] is high surrogate: " + Character.isHighSurrogate(str.charAt(1)));
        System.out.println("str[1] is low surrogate: " + Character.isLowSurrogate(str.charAt(1)));
        System.out.println(new String(new char[] {str.charAt(0), str.charAt(1)
        }));
    }
}
3
                         BOM
package org.zakky.gudanama;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;

public class BomMain {

    public static void main(String[] args) throws IOException {

        Reader r = null;

        // skip bom
        int first = r.read();
        if (!isBom(first)) {
            PushbackReader pushbackReader = new PushbackReader(r);
            pushbackReader.unread(first);
            r = pushbackReader;
        }

    }

    private static boolean isBom(int codepoint) {
        return codepoint == 0xfeff;
    }
}

Mais conteúdo relacionado

Mais procurados

MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤Takahiro Inoue
 
Inside MongoDB: the Internals of an Open-Source Database
Inside MongoDB: the Internals of an Open-Source DatabaseInside MongoDB: the Internals of an Open-Source Database
Inside MongoDB: the Internals of an Open-Source DatabaseMike Dirolf
 
The Ring programming language version 1.8 book - Part 59 of 202
The Ring programming language version 1.8 book - Part 59 of 202The Ring programming language version 1.8 book - Part 59 of 202
The Ring programming language version 1.8 book - Part 59 of 202Mahmoud Samir Fayed
 
Reverse Engineering a (M)MORPG
Reverse Engineering a (M)MORPGReverse Engineering a (M)MORPG
Reverse Engineering a (M)MORPGAntonin Beaujeant
 
Zend Framework 1 + Doctrine 2
Zend Framework 1 + Doctrine 2Zend Framework 1 + Doctrine 2
Zend Framework 1 + Doctrine 2Ralph Schindler
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance TuningPuneet Behl
 
03 standard class library
03 standard class library03 standard class library
03 standard class libraryeleksdev
 
The Ring programming language version 1.6 book - Part 28 of 189
The Ring programming language version 1.6 book - Part 28 of 189The Ring programming language version 1.6 book - Part 28 of 189
The Ring programming language version 1.6 book - Part 28 of 189Mahmoud Samir Fayed
 
The Ring programming language version 1.10 book - Part 64 of 212
The Ring programming language version 1.10 book - Part 64 of 212The Ring programming language version 1.10 book - Part 64 of 212
The Ring programming language version 1.10 book - Part 64 of 212Mahmoud Samir Fayed
 
MongoDB全機能解説2
MongoDB全機能解説2MongoDB全機能解説2
MongoDB全機能解説2Takahiro Inoue
 
Patterns for slick database applications
Patterns for slick database applicationsPatterns for slick database applications
Patterns for slick database applicationsSkills Matter
 
Beyond Breakpoints: Advanced Debugging with XCode
Beyond Breakpoints: Advanced Debugging with XCodeBeyond Breakpoints: Advanced Debugging with XCode
Beyond Breakpoints: Advanced Debugging with XCodeAijaz Ansari
 
The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189Mahmoud Samir Fayed
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212Mahmoud Samir Fayed
 
The Ring programming language version 1.5.2 book - Part 52 of 181
The Ring programming language version 1.5.2 book - Part 52 of 181The Ring programming language version 1.5.2 book - Part 52 of 181
The Ring programming language version 1.5.2 book - Part 52 of 181Mahmoud Samir Fayed
 
Python 내장 함수
Python 내장 함수Python 내장 함수
Python 내장 함수용 최
 

Mais procurados (20)

MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤
 
Inside MongoDB: the Internals of an Open-Source Database
Inside MongoDB: the Internals of an Open-Source DatabaseInside MongoDB: the Internals of an Open-Source Database
Inside MongoDB: the Internals of an Open-Source Database
 
The Ring programming language version 1.8 book - Part 59 of 202
The Ring programming language version 1.8 book - Part 59 of 202The Ring programming language version 1.8 book - Part 59 of 202
The Ring programming language version 1.8 book - Part 59 of 202
 
Reverse Engineering a (M)MORPG
Reverse Engineering a (M)MORPGReverse Engineering a (M)MORPG
Reverse Engineering a (M)MORPG
 
Zend Framework 1 + Doctrine 2
Zend Framework 1 + Doctrine 2Zend Framework 1 + Doctrine 2
Zend Framework 1 + Doctrine 2
 
Clojure Deep Dive
Clojure Deep DiveClojure Deep Dive
Clojure Deep Dive
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance Tuning
 
Intro
IntroIntro
Intro
 
03 standard class library
03 standard class library03 standard class library
03 standard class library
 
The Ring programming language version 1.6 book - Part 28 of 189
The Ring programming language version 1.6 book - Part 28 of 189The Ring programming language version 1.6 book - Part 28 of 189
The Ring programming language version 1.6 book - Part 28 of 189
 
The Ring programming language version 1.10 book - Part 64 of 212
The Ring programming language version 1.10 book - Part 64 of 212The Ring programming language version 1.10 book - Part 64 of 212
The Ring programming language version 1.10 book - Part 64 of 212
 
MongoDB全機能解説2
MongoDB全機能解説2MongoDB全機能解説2
MongoDB全機能解説2
 
Intro22
Intro22Intro22
Intro22
 
Patterns for slick database applications
Patterns for slick database applicationsPatterns for slick database applications
Patterns for slick database applications
 
Beyond Breakpoints: Advanced Debugging with XCode
Beyond Breakpoints: Advanced Debugging with XCodeBeyond Breakpoints: Advanced Debugging with XCode
Beyond Breakpoints: Advanced Debugging with XCode
 
The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212
 
3 1-1
3 1-13 1-1
3 1-1
 
The Ring programming language version 1.5.2 book - Part 52 of 181
The Ring programming language version 1.5.2 book - Part 52 of 181The Ring programming language version 1.5.2 book - Part 52 of 181
The Ring programming language version 1.5.2 book - Part 52 of 181
 
Python 내장 함수
Python 내장 함수Python 내장 함수
Python 내장 함수
 

Semelhante a ぐだ生 Java入門第三回(文字コードの話)(Keynote版)

スマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみた
スマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみたスマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみた
スマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみたTaro Matsuzawa
 
Let’s talk about microbenchmarking
Let’s talk about microbenchmarkingLet’s talk about microbenchmarking
Let’s talk about microbenchmarkingAndrey Akinshin
 
On Beyond (PostgreSQL) Data Types
On Beyond (PostgreSQL) Data TypesOn Beyond (PostgreSQL) Data Types
On Beyond (PostgreSQL) Data TypesJonathan Katz
 
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321Teddy Hsiung
 
C Programming Training in Ambala ! Batra Computer Centre
C Programming Training in Ambala ! Batra Computer CentreC Programming Training in Ambala ! Batra Computer Centre
C Programming Training in Ambala ! Batra Computer Centrejatin batra
 
JCConf 2020 - New Java Features Released in 2020
JCConf 2020 - New Java Features Released in 2020JCConf 2020 - New Java Features Released in 2020
JCConf 2020 - New Java Features Released in 2020Joseph Kuo
 
Text Rendering Tech
Text Rendering TechText Rendering Tech
Text Rendering TechDavid Ding
 
Getting started cpp full
Getting started cpp   fullGetting started cpp   full
Getting started cpp fullVõ Hòa
 
Java Simple Programs
Java Simple ProgramsJava Simple Programs
Java Simple ProgramsUpender Upr
 
PVS-Studio, a solution for resource intensive applications development
PVS-Studio, a solution for resource intensive applications developmentPVS-Studio, a solution for resource intensive applications development
PVS-Studio, a solution for resource intensive applications developmentOOO "Program Verification Systems"
 
Arduino coding class part ii
Arduino coding class part iiArduino coding class part ii
Arduino coding class part iiJonah Marrs
 
Games no Windows (FATEC 2015)
Games no Windows (FATEC 2015)Games no Windows (FATEC 2015)
Games no Windows (FATEC 2015)Fabrício Catae
 
Abstracting Vector Architectures in Library Generators: Case Study Convolutio...
Abstracting Vector Architectures in Library Generators: Case Study Convolutio...Abstracting Vector Architectures in Library Generators: Case Study Convolutio...
Abstracting Vector Architectures in Library Generators: Case Study Convolutio...ETH Zurich
 
bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018AlastairRobertson9
 
Design Patterns - Compiler Case Study - Hands-on Examples
Design Patterns - Compiler Case Study - Hands-on ExamplesDesign Patterns - Compiler Case Study - Hands-on Examples
Design Patterns - Compiler Case Study - Hands-on ExamplesGanesh Samarthyam
 
Score (smart contract for icon)
Score (smart contract for icon) Score (smart contract for icon)
Score (smart contract for icon) Doyun Hwang
 
C Code and the Art of Obfuscation
C Code and the Art of ObfuscationC Code and the Art of Obfuscation
C Code and the Art of Obfuscationguest9006ab
 

Semelhante a ぐだ生 Java入門第三回(文字コードの話)(Keynote版) (20)

スマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみた
スマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみたスマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみた
スマートフォン勉強会@関東 #11 どう考えてもdisconなものをiPhoneに移植してみた
 
Let’s talk about microbenchmarking
Let’s talk about microbenchmarkingLet’s talk about microbenchmarking
Let’s talk about microbenchmarking
 
On Beyond (PostgreSQL) Data Types
On Beyond (PostgreSQL) Data TypesOn Beyond (PostgreSQL) Data Types
On Beyond (PostgreSQL) Data Types
 
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
 
C Programming Training in Ambala ! Batra Computer Centre
C Programming Training in Ambala ! Batra Computer CentreC Programming Training in Ambala ! Batra Computer Centre
C Programming Training in Ambala ! Batra Computer Centre
 
JCConf 2020 - New Java Features Released in 2020
JCConf 2020 - New Java Features Released in 2020JCConf 2020 - New Java Features Released in 2020
JCConf 2020 - New Java Features Released in 2020
 
Text Rendering Tech
Text Rendering TechText Rendering Tech
Text Rendering Tech
 
Getting started cpp full
Getting started cpp   fullGetting started cpp   full
Getting started cpp full
 
Java Simple Programs
Java Simple ProgramsJava Simple Programs
Java Simple Programs
 
PVS-Studio, a solution for resource intensive applications development
PVS-Studio, a solution for resource intensive applications developmentPVS-Studio, a solution for resource intensive applications development
PVS-Studio, a solution for resource intensive applications development
 
Arduino coding class part ii
Arduino coding class part iiArduino coding class part ii
Arduino coding class part ii
 
Games no Windows (FATEC 2015)
Games no Windows (FATEC 2015)Games no Windows (FATEC 2015)
Games no Windows (FATEC 2015)
 
Abstracting Vector Architectures in Library Generators: Case Study Convolutio...
Abstracting Vector Architectures in Library Generators: Case Study Convolutio...Abstracting Vector Architectures in Library Generators: Case Study Convolutio...
Abstracting Vector Architectures in Library Generators: Case Study Convolutio...
 
bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018
 
Design Patterns - Compiler Case Study - Hands-on Examples
Design Patterns - Compiler Case Study - Hands-on ExamplesDesign Patterns - Compiler Case Study - Hands-on Examples
Design Patterns - Compiler Case Study - Hands-on Examples
 
Libtcc and gwan
Libtcc and gwanLibtcc and gwan
Libtcc and gwan
 
Libtcc and gwan
Libtcc and gwanLibtcc and gwan
Libtcc and gwan
 
Score (smart contract for icon)
Score (smart contract for icon) Score (smart contract for icon)
Score (smart contract for icon)
 
C Code and the Art of Obfuscation
C Code and the Art of ObfuscationC Code and the Art of Obfuscation
C Code and the Art of Obfuscation
 
Parameters
ParametersParameters
Parameters
 

Mais de Makoto Yamazaki

20150425 DroidKaigi つかえるGradleプロジェクトの作り方
20150425 DroidKaigi つかえるGradleプロジェクトの作り方20150425 DroidKaigi つかえるGradleプロジェクトの作り方
20150425 DroidKaigi つかえるGradleプロジェクトの作り方Makoto Yamazaki
 
Custom lintcheckをつくろう
Custom lintcheckをつくろうCustom lintcheckをつくろう
Custom lintcheckをつくろうMakoto Yamazaki
 
20120516 第7回ウフィカ社内ハンズオン Git基礎
20120516 第7回ウフィカ社内ハンズオン Git基礎20120516 第7回ウフィカ社内ハンズオン Git基礎
20120516 第7回ウフィカ社内ハンズオン Git基礎Makoto Yamazaki
 
ICS ホットトピック
ICS ホットトピックICS ホットトピック
ICS ホットトピックMakoto Yamazaki
 
DevQuiz 2011 の模範解答 Android編
DevQuiz 2011 の模範解答 Android編DevQuiz 2011 の模範解答 Android編
DevQuiz 2011 の模範解答 Android編Makoto Yamazaki
 
USB Host APIで遊んでみた
USB Host APIで遊んでみたUSB Host APIで遊んでみた
USB Host APIで遊んでみたMakoto Yamazaki
 
20110619 live view ideathon_logcatonliveview
20110619 live view ideathon_logcatonliveview20110619 live view ideathon_logcatonliveview
20110619 live view ideathon_logcatonliveviewMakoto Yamazaki
 
I/O 2011 報告会 ADKで遊んでみた
I/O 2011 報告会 ADKで遊んでみたI/O 2011 報告会 ADKで遊んでみた
I/O 2011 報告会 ADKで遊んでみたMakoto Yamazaki
 
ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)Makoto Yamazaki
 
ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)Makoto Yamazaki
 
ぐだ生 Java入門第一回(equals hash code_tostring)
ぐだ生 Java入門第一回(equals hash code_tostring)ぐだ生 Java入門第一回(equals hash code_tostring)
ぐだ生 Java入門第一回(equals hash code_tostring)Makoto Yamazaki
 
20110326 ネットプリントの紹介
20110326 ネットプリントの紹介20110326 ネットプリントの紹介
20110326 ネットプリントの紹介Makoto Yamazaki
 
20110109 abc2010w gingerbread_api_storage
20110109 abc2010w gingerbread_api_storage20110109 abc2010w gingerbread_api_storage
20110109 abc2010w gingerbread_api_storageMakoto Yamazaki
 

Mais de Makoto Yamazaki (13)

20150425 DroidKaigi つかえるGradleプロジェクトの作り方
20150425 DroidKaigi つかえるGradleプロジェクトの作り方20150425 DroidKaigi つかえるGradleプロジェクトの作り方
20150425 DroidKaigi つかえるGradleプロジェクトの作り方
 
Custom lintcheckをつくろう
Custom lintcheckをつくろうCustom lintcheckをつくろう
Custom lintcheckをつくろう
 
20120516 第7回ウフィカ社内ハンズオン Git基礎
20120516 第7回ウフィカ社内ハンズオン Git基礎20120516 第7回ウフィカ社内ハンズオン Git基礎
20120516 第7回ウフィカ社内ハンズオン Git基礎
 
ICS ホットトピック
ICS ホットトピックICS ホットトピック
ICS ホットトピック
 
DevQuiz 2011 の模範解答 Android編
DevQuiz 2011 の模範解答 Android編DevQuiz 2011 の模範解答 Android編
DevQuiz 2011 の模範解答 Android編
 
USB Host APIで遊んでみた
USB Host APIで遊んでみたUSB Host APIで遊んでみた
USB Host APIで遊んでみた
 
20110619 live view ideathon_logcatonliveview
20110619 live view ideathon_logcatonliveview20110619 live view ideathon_logcatonliveview
20110619 live view ideathon_logcatonliveview
 
I/O 2011 報告会 ADKで遊んでみた
I/O 2011 報告会 ADKで遊んでみたI/O 2011 報告会 ADKで遊んでみた
I/O 2011 報告会 ADKで遊んでみた
 
ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)
 
ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)ぐだ生 Java入門第ニ回(synchronized and lock)
ぐだ生 Java入門第ニ回(synchronized and lock)
 
ぐだ生 Java入門第一回(equals hash code_tostring)
ぐだ生 Java入門第一回(equals hash code_tostring)ぐだ生 Java入門第一回(equals hash code_tostring)
ぐだ生 Java入門第一回(equals hash code_tostring)
 
20110326 ネットプリントの紹介
20110326 ネットプリントの紹介20110326 ネットプリントの紹介
20110326 ネットプリントの紹介
 
20110109 abc2010w gingerbread_api_storage
20110109 abc2010w gingerbread_api_storage20110109 abc2010w gingerbread_api_storage
20110109 abc2010w gingerbread_api_storage
 

Último

How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerThousandEyes
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Servicegiselly40
 
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024The Digital Insurer
 
Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024The Digital Insurer
 
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEarley Information Science
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...apidays
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?Antenna Manufacturer Coco
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsJoaquim Jorge
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking MenDelhi Call girls
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024The Digital Insurer
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationMichael W. Hawkins
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...Martijn de Jong
 
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time AutomationFrom Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time AutomationSafe Software
 
Handwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed textsHandwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed textsMaria Levchenko
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking MenDelhi Call girls
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreternaman860154
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptxHampshireHUG
 
[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdf[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdfhans926745
 
Strategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherStrategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherRemote DBA Services
 

Último (20)

How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Service
 
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
 
Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024
 
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day Presentation
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...
 
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time AutomationFrom Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
 
Handwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed textsHandwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed texts
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreter
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
 
[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdf[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdf
 
Strategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherStrategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a Fresher
 

ぐだ生 Java入門第三回(文字コードの話)(Keynote版)

  • 1. Java ( Unicode) 2011 4 16 twitter: @zaki50
  • 2. Who am I • YAMAZAKI Makoto(twitter: @zaki50) • Android • • StickyShortcut • Java
  • 3. (CharacterSet) (Encoding) • • UTF-16 • UTF-8
  • 4.
  • 5. Unicode 6.0 11 •
  • 6. • US ASCII • Shift JIS • JIS X 208 • UCS-2 • UCS-4
  • 7. Unicode • UTF-8 • UTF-16 • UTF-32 • ... ( : US ASCII, Shift JIS)
  • 9. Unicode • • Xerox Microsoft, Apple, Sun Microsystems, HP, JUST System The Unicode Consortium • iso10646
  • 11. Unicode • The Unicode Consortium ( http://www.unicode.org/ ) • •
  • 12. Unicode • •
  • 13.
  • 14. ( , Ligature) • • (U+3075) + (U+309A) = • ( (U+3077) )
  • 16. • NFC(Normalization Form C) • NFD(Normalization Form D) • NFKC(Normalization Form KC) • NFKD(Normalization Form KD) C(Composition, )/D(Decomposition, ) K(Compatibility, )
  • 17. NFC • MacOS X (HFS+) NFD • NFKC, NFKD
  • 19. Unicode • K(Compatibility) 1 : (U+3000) (U+0020) ( ) (5 )
  • 20. Unicode K K C D
  • 23. UTF-16 • • Java String • Windows
  • 24. CodePoint UTF-16 U+0000-U+FFFF xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx U+010000-U 0000 0000 000u uuuu 1101 10ww wwxx xxxx +0010FFFF xxxx xxxx xxxx xxxx 1101 11xx xxxx xxxx x,u,w ∈ {0,1} wwww = uuuuu - 1
  • 25. UTF-16 • 2 • 2
  • 26. • 1 16 16bit * 2 • 1 16bit 16 20bit • U+D800-U+DFFF (11bit ) • 0xD800-0xDBFF, 0xDC00-xDFFF
  • 27. • UTF-16 2 1 2 U+3000 = 0x3000 0x30, 0x00 UTF-16BE U+3000 = 0x3000 0x00, 0x30 UTF-16LE
  • 28. BOM(byte order mark) • U+FEFF • U+FFFE • U+FEFF ZERO WIDTH NON- BREAKING SPAEC
  • 29. UTF-8
  • 30. CodePoint bit bit U+00-U+7F 0xxx xxx 7bits U+0080-U+07FF 110y yyyx 10xx xxxx 11bits U+0800-U+FFFF 1110 yyyy 10yxx xxxx (10xx xxxx) * 1 16bits U+010000-U+1FFFFF 1111 0yyy 10yy xxxx (10xx xxxx) * 2 21bits U+00200000-U+03FFFFFF 1111 10yy 10yy yxxx (10xx xxxx) * 3 26bits U+04000000-U+7FFFFFFF 1111 110y 10yy yyxx (10xx xxxx) * 4 31bits x,y ∈ {0,1} y 1
  • 31. UTF-8 • US ASCII US ASCII • • 0x80-0xbf 2 , • 2
  • 32.
  • 33. • UTF-8 1 • BOM
  • 34. BOM(byte order mark) • 0xEF, 0xBB, 0xBF • byte order mark • UTF-8
  • 35. 1-6 2^31 UTF-8 (1-4) (2^21) UTF-16 2-4 2^16-2*2^10+2^20 UTF-32 4 2^16+2^20
  • 36.
  • 37. (CharacterSet) (Encoding) • 1 • 1 (UTF-16 ) 2
  • 38. • The Unicode Consortium(http://www.unicode.org/) • Unicode 6.0.0(http://www.unicode.org/versions/Unicode6.0.0/) • WikiPedia • Unicode(http://ja.wikipedia.org/wiki/Unicode) • UTF-8(http://ja.wikipedia.org/wiki/UTF-8) • UTF-16(http://ja.wikipedia.org/wiki/UTF-16) • Unicode (http://homepage1.nifty.com/nomenclator/ unicode/normalization.htm)
  • 39. ( )
  • 40. 1 Unicode package org.zakky.gudanama; import java.text.Normalizer; public class NormalizeMain { private static void printCodePoints(String str) { public static void main(String[] args) { StringBuilder sb = new StringBuilder(); // [ ϓ ], [ ϔ ], [ ẛ ] int index = 0; final String src = "u03d3u03d4u1e9b"; while(index < str.length()) { printCodePoints(src); char c = str.charAt(index); sb.append("U+"); // [ ϓ ], [ ϔ ], [ ẛ ] if (Character.isHighSurrogate(c)) { System.out.print("NFC: "); sb.append(Character.toCodePoint(c, str.charAt(index+1))); printCodePoints(Normalizer.normalize(src, Normalizer.Form.NFC)); index++; // [ ϒ ], [ ́ ], [ ϒ ], [ ̈ ], [ ſ ], [ ̇ ] } else { System.out.print("NFD: "); sb.append((int) c); printCodePoints(Normalizer.normalize(src, Normalizer.Form.NFD)); } // [ Ύ ], [ Ϋ ], [ ṡ ] index++; System.out.print("NFKC: "); sb.append(' '); printCodePoints(Normalizer.normalize(src, Normalizer.Form.NFKC)); } // [ Υ ], [ ́ ], [ Υ ], [ ̈ ], [ s ], [ ̇ ] if (sb.length() != 0) { System.out.print("NFKD: "); sb.setLength(sb.length() - 1); printCodePoints(Normalizer.normalize(src, Normalizer.Form.NFKD)); } System.out.println(sb); } } }
  • 41. 2 package org.zakky.gudanama; public class SurrogatePairMain { public static void main(String[] args) { final String str = " "; System.out.println("str: " + str); System.out.println("length: " + str.length()); System.out.println("code point count" + str.codePointCount(0, str.length())); System.out.println("code point of str: " + Character.toCodePoint(str.charAt(0), str.charAt(1)) + "(U+" + Integer.toHexString(Character.toCodePoint(str.charAt(0), str.charAt(1))) + ")"); System.out.println("str[0] is high surrogate: " + Character.isHighSurrogate(str.charAt(0))); System.out.println("str[0] is low surrogate: " + Character.isLowSurrogate(str.charAt(0))); System.out.println("str[1] is high surrogate: " + Character.isHighSurrogate(str.charAt(1))); System.out.println("str[1] is low surrogate: " + Character.isLowSurrogate(str.charAt(1))); System.out.println(new String(new char[] {str.charAt(0), str.charAt(1) })); } }
  • 42. 3 BOM package org.zakky.gudanama; import java.io.IOException; import java.io.PushbackReader; import java.io.Reader; public class BomMain { public static void main(String[] args) throws IOException { Reader r = null; // skip bom int first = r.read(); if (!isBom(first)) { PushbackReader pushbackReader = new PushbackReader(r); pushbackReader.unread(first); r = pushbackReader; } } private static boolean isBom(int codepoint) { return codepoint == 0xfeff; } }

Notas do Editor

  1. \n
  2. \n
  3. \n
  4. \n
  5. \n
  6. \n
  7. \n
  8. \n
  9. \n
  10. \n
  11. \n
  12. \n
  13. \n
  14. \n
  15. \n
  16. \n
  17. \n
  18. \n
  19. \n
  20. \n
  21. \n
  22. \n
  23. \n
  24. \n
  25. \n
  26. \n
  27. \n
  28. \n
  29. \n
  30. \n
  31. \n
  32. \n
  33. \n
  34. \n
  35. \n
  36. \n
  37. \n
  38. \n