SlideShare uma empresa Scribd logo
1 de 35
Processing XML and Spreadsheet in Go
续 日
Gopher China Conference
Beijing 2021 6/26 - 6/27
Self Introduction
The author of the Excelize - Go language spreadsheet library. Familiar
with Go language programming, middleware, and big data solution.
Working Experiences
Alibaba Group - Software Engineer
Baidu Inc. - Software Engineer
Qihoo 360 – Server-side Software Engineer
GitHub: @xuri
Twitter: @xurime
Blog: https://xuri.me
Agenda
Serialize and Deserialize 01
• Document Object Model
• Event-driven (Simple API for XML)
• Serialize and Deserialize Control
Handle Complex XML 02
• Partial Load
• Namespace & Entity
• Ser/Deserialize Idempotence
High Performance Processing 03
• XML Schema Definition
• DOM or SAX
OOXML Spreadsheets 04
• Excel XML Specification
• Charset Encoding
• Streaming I/O
Serialize and Deserialize
Document Object Model
<?xml version="1.0" encoding="utf-8"?>
<Person>
<Name>Tom</Name>
<Email where="home">
<Addr>tom@example.com</Addr>
</Email>
</Person>
type Person struct {
Name string
Email struct {
Where string `xml:"where,attr"`
Addr string
}
}
encoding/xml
var p Person
if err := xml.Unmarshal([]byte(data), &p); err != nil {
fmt.Println(err)
}
fmt.Printf("%+vn", p)
// {Name:Tom Email:{Where:home Addr:tom@example.com}}
XML Finite State Machine
0
start start tag
NAME
TEXT
equal
end tag
value
value
end value
COMMENT
version
blank/enter
letter
digit
<
?
?>
-->
!--
= " "
' '
>
blank
/
letter
>
blank
Unmarshal
Unmarshal
NewDecoder
Decoder.unmarshal Decoder.switchToReader
unmarshalPath
unmarshalAttr unmarshalInterface
unmarshalTextInterface Decoder.RawToken
Decoder.pushElement Decoder.pushNs
encoding/xml
marshal.go
typeinfo.go
xml.go
read.go
example & test
Go XML Parser
type Decoder struct {
Strict bool
AutoClose []string
Entity map[string]string
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
DefaultSpace string
r io.ByteReader
t TokenReader
buf bytes.Buffer
saved *bytes.Buffer
stk *stack
free *stack
needClose bool
toClose Name
nextToken Token
nextByte int
ns map[string]string
err error
line int
offset int64
unmarshalDepth int
}
encoding/xml:xml.go
StartElement
EndElement
CharData
Comment
ProcInst
Directive
Event-driven (Simple API for XML)
decoder := xml.NewDecoder(strings.NewReader(data))
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
fmt.Printf("%+vn", element)
case xml.EndElement:
fmt.Printf("%+vn", element)
}
}
<?xml version="1.0" encoding="utf-8"?>
<Person>
<Name>Tom</Name>
<Email where="home">
<Addr>tom@example.com</Addr>
</Email>
</Person>
{Name:{Space: Local:Person} Attr:[]}
{Name:{Space: Local:Name} Attr:[]}
{Name:{Space: Local:Name}}
{Name:{Space: Local:Email} Attr:[{Name:{Space: Local:where} Value:home}]}
{Name:{Space: Local:Addr} Attr:[]}
{Name:{Space: Local:Addr}}
{Name:{Space: Local:Email}}
{Name:{Space: Local:Person}}
Serialize and Deserialize Control
switch flag {
case "attr":
finfo.flags |= fAttr
case "cdata":
finfo.flags |= fCDATA
case "chardata":
finfo.flags |= fCharData
case "innerxml":
finfo.flags |= fInnerXML
case "comment":
finfo.flags |= fComment
case "any":
finfo.flags |= fAny
case "omitempty":
finfo.flags |= fOmitEmpty
}
encoding/xml:typeinfo.go
type Person struct {
Name string
Email struct {
Where string `xml:"where,attr,omitempty"`
Addr string
}
}
attribute with the field name in the XML element
written as character data, not as an XML element
written as character data wrapped in one or more <![CDATA[ ... ]]> tags
written verbatim, not subject to the usual marshaling procedure
unmatched rule, maps the sub-element to that struct field
omitted if the field value is empty
Partial Load
<?xml version="1.0" encoding="utf-8"?>
<Person>
<Name>Tom</Name>
<Email>
<Addr>tom@example.com</Addr>
</Email>
</Person>
type Person struct {
Name string
Email partialXML
}
type partialXML struct {
Content string `xml:",innerxml"`
}
var p Person
err := xml.Unmarshal([]byte(data), &p)
if err != nil {
fmt.Println(err)
}
fmt.Printf("%+vn", p)
{Name:Tom Email:{Content:
<Addr>tom@example.com</Addr>
}}
Handle Complex XML
Datatypes
Go Datatypes XML Datatypes
string
anyType, ENTITY,ID, IDREF, NCName,
NMTOKEN, Name, anyURI, duration,
language, normalizedString, string, token,
xml:lang, xml:space, xml:base,xml:id
[]string ENTITIES, IDREFS, NMTOKENS, NOTATION
xml.Name QName
[]byte base64Binary, hexBinary, unsignedByte
bool boolean
byte byte
float64 decimal, double, float,
int64
int, integer, long, negativeInteger,
nonNegativeInteger, nonPositiveInteger,
positiveInteger, short
uint64 unsignedInt, unsignedLong, unsignedShort
time.Time
date, dateTime, gDay, gMonth, gMonthDay,
gYear, gYearMonth,time
anyType
anySimpleType
all complex types
gYearMonth gYear gMonthDay gDay gMonth
date
time
dateTime
duration
boolean base64Binary hexBinary float double anyURI QName NOTATION
decimal
string
normalizedString integer
token long nonNegativeInteger
nonPostitveInteger
language Name NMTOKEN negativeInteger int unsignedLong positiveInteger
NCName NMTOKENS sort unsignedInt
ID IDREF ENTITY
ENTITIES
IDREFS
bytes unsignedSort
unsignedByte
ur types
build-in primitive types
build-in derived types
complex types
derived by restriction
Derived by list
Derived by extension or
restriction
Entity
XML Entity
exp := `<!ENTITYs+([^s]+)s+"([^"]+)">`
entities := map[string]string{}
d := xml.NewDecoder(strings.NewReader(input))
var rEntity = regexp.MustCompile(exp)
for {
tok, err := d.Token()
if err != nil {
break
}
dir, ok := tok.(xml.Directive)
if !ok {
continue
}
fmt.Println(string(dir))
for _, m := range rEntity.FindAllSubmatch(dir,-1) {
entities[string(m[1])] = string(m[2])
}
}
fmt.Println("entities", entities)
type Person struct {
XMLName xml.Name `xml:"person"`
Name string `xml:"name"`
Address string `xml:"address"`
}
entities map[name:Tom email:tom@example.com]
Get Entity
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE person[
<!ENTITY name "Tom">
<!ENTITY email "tom@example.com">
]>
<person>
<name>&name;</name>
<address>&email;</address>
</person>
Entity
XML Entity
d = xml.NewDecoder(strings.NewReader(input))
d.Strict = false
d.Entity = entities
err := d.Decode(&v)
if err != nil {
fmt.Printf("error: %v", err)
return
}
fmt.Printf("%+vn", v)
{XMLName:{Space: Local:company} Name:Jack Address:Tom}
Decode with Entity
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE person[
<!ENTITY name "Tom">
<!ENTITY email "tom@example.com">
]>
<person>
<name>&name;</name>
<address>&email;</address>
</person>
type Person struct {
XMLName xml.Name `xml:"person"`
Name string `xml:"name"`
Address string `xml:"address"`
}
Namespace & Ser/Deserialize Idempotence
<?xml version="1.0" encoding="utf-8"?>
<person
xmlns="http://example.com/default"
xmlns:m="http://example.com/main"
xmlns:h="http://example.com/home"
xmlns:w="http://example.com/work">
<name>Tom</name>
<m:email h:addr="HOME" w:addr="WORK" />
</person>
type Person struct {
XMLName xml.Name `xml:"http://example.com/default person"`
Name string `xml:"name"`
Email struct {
XMLName xml.Name `xml:"http://example.com/main email"`
HomeAddr string `xml:"http://example.com/home addr,attr"`
WorkAddr string `xml:"http://example.com/work addr,attr"`
} // TAG NOT HERE: `xml:"email"`
}
<person xmlns="http://example.com/default">
<name>Tom</name>
<email xmlns="http://example.com/main"
xmlns:home="http://example.com/home"
home:addr="HOME"
xmlns:work="http://example.com/work"
work:addr="WORK"></email>
</person>
Inline Namespace Declare
Root Element
NS Missing!
Namespace Local Name
Ser/Deserialize Idempotence
encoding/xml:xml.go
type Token interface{}
type EndElement struct {
Name Name
}
type Name struct {
Space, Local string
}
type Attr struct {
Name Name
Value string
}
type StartElement struct {
Name Name
Attr []Attr
}
// getRootEleAttr extract root element attributes by
// given XML decoder.
func getRootEleAttr(d *xml.Decoder) []xml.Attr {
tokenIdx := 0
for {
token, _ := d.Token()
if token == nil {
break
}
switch startElement := token.(type) {
case xml.StartElement:
tokenIdx++
if tokenIdx == 1 {
return startElement.Attr
}
}
}
return nil
}
Ser/Deserialize Idempotence
<?xml version="1.0" encoding="utf-8"?>
<person
xmlns="http://example.com/default"
xmlns:m="http://example.com/main"
xmlns:h="http://example.com/home"
xmlns:w="http://example.com/work">
<name>Tom</name>
<m:email h:addr="HOME" w:addr="WORK" />
</person>
decoder := xml.NewDecoder(strings.NewReader(data))
marshalXML := ""
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
for _, attr := range element.Attr {
if element.Name.Local == "person" {
colon := ""
if attr.Name.Space != "" {
colon = ":"
}
marshalXML += fmt.Sprintf("%s%s%s="%s" ",
attr.Name.Space, colon,
attr.Name.Local, attr.Value)
}
}
}
}
fmt.Printf("<person %s>n", marshalXML)
<person xmlns="http://example.com/default"
xmlns:m="http://example.com/main"
xmlns:h="http://example.com/home"
xmlns:w="http://example.com/work" >
<name>Tom</name>
<email xmlns="http://example.com/main"
xmlns:home="http://example.com/home"
home:addr="HOME"
xmlns:work="http://example.com/work"
work:addr="WORK"></email>
</person>
<?xml version="1.0" encoding="utf-8"?>
High Performance Processing
XML Components Data Model
<?xml version="1.0"?>
<note xmlns:m="http://example.com/main">
<to>Tom</to>
<from>Bob</from>
<heading>Reminder</heading>
<m:body>Don't forget me this weekend!</m:body>
</note>
<?xml version="1.0"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:m="http://example.com/main">
<xsd:import namespace="http://example.com/main" schemaLocation="shared.xsd"/>
<xs:element name="note">
<xs:complexType>
<xs:sequence>
<xs:element name="to" type="xs:string"/>
<xs:element name="from" type="xs:string"/>
<xs:element name="heading" type="xs:string"/>
<xs:element name="m:body" use="required"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
<?xml version="1.0"?>
<xs:schema
xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="body" type="xs:string"/>
</xs:schema>
shared.xsd
XML Components Data Model
type Note struct {
XMLName xml.Name `xml:"note"`
To string `xml:"to"`
From string `xml:"from"`
Heading string `xml:"heading"`
Body string `xml:"http://example.com/main body"`
}
<?xml version="1.0"?>
<note xmlns:m="http://example.com/main">
<to>Tom</to>
<from>Bob</from>
<heading>Reminder</heading>
<m:body>Don't forget me this weekend!</m:body>
</note>
XML Components Data Model
XSD: XML Schema Definition Process
is a named component and has two additional
properties - name and target namespace
is an un-named component
cmd
parser
NSResolver
proto
generator
Language Code
Attribute Attribute Group ComplexType Element Enumeration FractionDigits
Pattern
SimpleType
Schema
List
Length
Import
Include
Group
Restriction TotalDigits Union WhiteSpace MaxLength MinLength MinExclusive
Attribute Attribute Group FieldName FieldType
ComplexType
Element
Group
SimpleType
Generator
SAX Parser
Schema
Notation Declaration
system identifier
public identifier
Element Declaration
scope
value constraint
nillable
substitution group affiliation
substitution group exclusions
disallowed substitutions
abstract
Simple Type Declaration
facets
final
variety
Attribute Declaration
scope
value constraint
Identity-constraint Declaration
identity-constraint category
selector
fields
referenced key
Complex Type Declaration
derivation method
final
abstract
prohibited substitutions
Model Group Definition
Attribute Group Definition
Attribute Use
required
value constraint
Wildcard
namespace constraint
process contents
Particle
min occurs
max occurs
Model Group
compositor
notation declarations attribute declarations
type definitions
element declarations
attribute group definitions
model group definitions
type
definitions
identity-constraint
definitions
content type
type
definition
type
definition
term
content type
base type
definition
base
type
definition
base
type
definition
attribute
uses
attribute
wildcard
term
term
particles
model
group
attribute
wildcard
type
definition
attribute definitions
attribute uses
https://github.com/xuri/xgen
SAX or DOM
SAX Parser DOM Parser
Simple API for XML Parsing Document Object Model
Event-based parser Stays in a tree structure
Low memory usage High memory usage
Best for the larger size of XML files Best for the smaller sizes of files
Read-only Insert or delete nodes
Backward navigation is not possible Backward and forward search is possible
A small part of the XML file is only loaded in memory It loads whole XML documents in memory
OOXML Spreadsheets
OOXML ISO/IEC 29500 ECMA-376 Specification
SVG
SOAP
ODF
OOXML
C#
C++/CLI
MIME
XSLT
XHTML
DITA
0
200
400
600
800
1000
1200
0 1000 2000 3000 4000 5000 6000 7000
Technical
Committee
effort
(days)
Specification length (pages)
Specification Speed
OOXML Specification
WordprocessingML SpreadsheetML PresentationML
CustomML
Vocabularies
DrawingML Bibliography
Metadata Equations
VML (legacy)
Relationships Content Types Digital Signatures
Markup Languages
Open Packaging Convention
ZIP XML + Unicode
Core Technologies
OOXML Specification
WordprocessingML SpreadsheetML PresentationML
CustomML
Vocabularies
DrawingML Bibliography
Metadata Equations
VML (legacy)
Relationships Content Types Digital Signatures
Markup Languages
Open Packaging Convention
ZIP XML + Unicode
Core Technologies
Drawing 9918
Excel 6328
OPC 3147
Typical XML of the Cell Over 10589
Elements & Attributes
Shared Strings
Calc Chain
XML Maps
Styles
Theme
Sheets
Table
Chart Pivot Table
Pivot Cache
Pivot Records
Workbook
<c r="D3" s="7" t="s">
<v>0</v>
</c>
<xf numFmtId="0" fontId="4" fillId="2" borderId="2" xfId="1" applyBorder="1" />
<font>
<sz val="11"/>
<color theme="0"/>
<name val="Calibri"/>
<scheme val="minor"/>
</font>
<fill>
<patternFill patternType="solid">
<fgColor theme="4"/>
<bgColor theme=”4"/>
</patternFill>
</fill>
<a:theme xmlns:a="http://schemas.openxmlformats.org
<a:themeElements>
<a:clrScheme name="Office">
<a:dk1>
<a:sysClr val="windowText" />
</a:dk1>
<a:lt1>
<a:sysClr val="window" />
</a:lt1>
<a:dk2>
<a:srgbClr val="1F497D"/>
</a:dk2>
<a:lt2>
<a:srgbClr val="FAF3E8"/>
</a:lt2>
<a:accent1>
<a:srgbClr val="5C93B4"/>
</a:accent1>
<cellStyleXfs count="12">
<xf numFmtId="0" fontId="0" fillId="0" borderId="0"/>
<xf numFmtId="0" fontId="4" fillId="2" borderId="0" applyBorder="1" applyAlignment="1" applyProtection="1"/>
</protection>
</xf>
<cellStyles count="2">
<cellStyle name="Accent1" xfId="1" builtinId="29"/>
B C D E
Q1 Q2
Revenue 412.52
Start
(build-in) <border>
<left style="thick">
<color auto="1"/>
</left>
<right style="thick">
<color auto="1"/>
</right>
<top style="thick">
<color auto="1"/>
</top>
<bottom style="thick">
<color auto="1"/>
</bottom>
<diagonal/>
</border>
Charset Encoding
<?xml version='1.0' encoding='character encoding' standalone='yes|no'?>
import (
"golang.org/x/net/html/charset”
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
)
decoder = xml.NewDecoder(strings.NewReader(data))
decoder.CharsetReader = charset.NewReaderLabel
Ref: https://encoding.spec.whatwg.org
Name Labels
UTF-8 6
Legacy single-byte encodings 168
Legacy multi-byte Chinese
(simplified) encodings
10
Legacy multi-byte Chinese
(traditional) encodings
5
Legacy multi-byte Japanese
encodings
13
Legacy multi-byte Korean
encodings
10
Legacy miscellaneous encodings 16
228 Labels
// CharsetReader Decoder from all codepages to UTF-8
func CharsetReader(charset string, input io.Reader) io.Reader {
var enc encoding.Encoding
for i := range charmap.All {
item = charmap.All[i]
if strings.EqualFold(sm, nm) {
enc = item
}
}
return enc.NewDecoder().Reader(input)
}
Custom Charset Reader
Streaming I/O
Common Package Parts
Package
Relationships
Core Properties
Digital Signatures
Specific Format Parts
Office Document
Part
Relationships
XML Part
XML Part
Part
Rels
Etc…
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006">
<dimension ref="B2"/>
<sheetViews>
<sheetView tabSelected="1" workbookViewId="0" />
</sheetViews>
<sheetFormatPr baseColWidth="10" defaultRowHeight="16" />
<sheetData>
<row r="2">
<c r="B2">
<v>123</v>
</c>
</row>
</sheetData>
<pageMargins left="0.7" right="0.7" />
</worksheet>
A B C
1
2 123
3
4
Set Row
<sheetData>
<row r="2">
<c r="B2">
<v>123</v>
</c>
</row>
</sheetData>
func writeCell(buf *bufferedWriter, c xlsxC) {
_, _ = buf.WriteString(`<c`)
if c.XMLSpace.Value != "" {
fmt.Fprintf(buf, ` xml:%s="%s"`,
c.XMLSpace.Name.Local, c.XMLSpace.Value)
}
fmt.Fprintf(buf, ` r="%s"`, c.R)
if c.S != 0 {
fmt.Fprintf(buf, ` s="%d"`, c.S)
}
if c.T != "" {
fmt.Fprintf(buf, ` t="%s"`, c.T)
}
_, _ = buf.WriteString(`>`)
if c.F != nil {
_, _ = buf.WriteString(`<f>`)
_ = xml.EscapeText(buf, []byte(c.F.Content))
_, _ = buf.WriteString(`</f>`)
}
if c.V != "" {
_, _ = buf.WriteString(`<v>`)
_ = xml.EscapeText(buf, []byte(c.V))
_, _ = buf.WriteString(`</v>`)
}
_, _ = buf.WriteString(`</c>`)
}
type StreamWriter struct {
File *File
Sheet string
SheetID int
worksheet *xlsxWorksheet
rawData bufferedWriter
mergeCellsCount int
mergeCells string
tableParts string
}
Flush
func (sw *StreamWriter) Flush() error {
_, _ = sw.rawData.WriteString(`</sheetData>`)
bulkAppendFields(&sw.rawData, sw.worksheet, 8, 15)
if sw.mergeCellsCount > 0 {
sw.mergeCells = fmt.Sprintf(`<mergeCells
count="%d">%s</mergeCells>`, sw.mergeCellsCount, sw.mergeCells)
}
_, _ = sw.rawData.WriteString(sw.mergeCells)
bulkAppendFields(&sw.rawData, sw.worksheet, 17, 38)
_, _ = sw.rawData.WriteString(sw.tableParts)
bulkAppendFields(&sw.rawData, sw.worksheet, 40, 40)
_, _ = sw.rawData.WriteString(`</worksheet>`)
if err := sw.rawData.Flush(); err != nil {
return err
}
// ...
}
type StreamWriter struct {
File *File
Sheet string
SheetID int
worksheet *xlsxWorksheet
rawData bufferedWriter
mergeCellsCount int
mergeCells string
tableParts string
}
Generate XML Part
Save Spreadsheet
func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
buf := new(bytes.Buffer)
zw := zip.NewWriter(buf)
f.calcChainWriter()
f.commentsWriter()
f.contentTypesWriter()
f.drawingsWriter()
f.vmlDrawingWriter()
f.workBookWriter()
f.workSheetWriter()
f.relsWriter()
f.sharedStringsWriter()
f.styleSheetWriter()
for path, stream := range f.streams {
// Save stream data
stream.rawData.Close()
}
for path, content := range f.XLSX {
// Save preserve data
}
}
XML Part to ZIP
Common Package Parts
Package
Relationships
Specific Format Parts
Office Document
Part
Relationships
Style / Theme / Calc Chain
Chart / PivotTable / Comments
Part
Rels
Etc…
Worksheets / SST
Workbook
Performance
102400 Row x 50 Columns, 6 Chars / Cell
0 5 10 15 20 25 30
Excelize 2.3.1@9316028 Streaming Write
go1.15.2 darwin/amd64
Excelize 2.3.1@9316028
go1.15.2 darwin/amd64
Time Cost (s)
Less is better
5.12 Million Cells
0 200 400 600 800 1000 1200 1400 1600 1800
Excelize 2.3.1@9316028 Streaming Write
go1.15.2 darwin/amd64
Excelize 2.3.1@9316028
go1.15.2 darwin/amd64
Memory Usage (MB)
Less is better
https://github.com/xuri/excelize
Processing XML and Spreadsheet in Go
Gopher China Conference
Beijing 2021 6/26 - 6/27
续日

Mais conteúdo relacionado

Mais procurados

[2D1]Elasticsearch 성능 최적화
[2D1]Elasticsearch 성능 최적화[2D1]Elasticsearch 성능 최적화
[2D1]Elasticsearch 성능 최적화NAVER D2
 
Event In JavaScript
Event In JavaScriptEvent In JavaScript
Event In JavaScriptShahDhruv21
 
Introduction to xml
Introduction to xmlIntroduction to xml
Introduction to xmlGtu Booker
 
Modern API Security with JSON Web Tokens
Modern API Security with JSON Web TokensModern API Security with JSON Web Tokens
Modern API Security with JSON Web TokensJonathan LeBlanc
 
Elasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txt
Elasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txtElasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txt
Elasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txt용진 조
 
Introduction to OpenID Connect
Introduction to OpenID Connect Introduction to OpenID Connect
Introduction to OpenID Connect Nat Sakimura
 
フラットなPHPからフレームワークへ
フラットなPHPからフレームワークへフラットなPHPからフレームワークへ
フラットなPHPからフレームワークへMasao Maeda
 
aggregation and indexing with suitable example using MongoDB.
aggregation and indexing with suitable example using MongoDB.aggregation and indexing with suitable example using MongoDB.
aggregation and indexing with suitable example using MongoDB.bhavesh lande
 
Introduction to Mongodb execution plan and optimizer
Introduction to Mongodb execution plan and optimizerIntroduction to Mongodb execution plan and optimizer
Introduction to Mongodb execution plan and optimizerMydbops
 
IBM: Hey FIDO, Meet Passkey!.pptx
IBM: Hey FIDO, Meet Passkey!.pptxIBM: Hey FIDO, Meet Passkey!.pptx
IBM: Hey FIDO, Meet Passkey!.pptxFIDO Alliance
 
MongoDB .local Toronto 2019: Tips and Tricks for Effective Indexing
MongoDB .local Toronto 2019: Tips and Tricks for Effective IndexingMongoDB .local Toronto 2019: Tips and Tricks for Effective Indexing
MongoDB .local Toronto 2019: Tips and Tricks for Effective IndexingMongoDB
 
Implementing Microservices with NATS
Implementing Microservices with NATSImplementing Microservices with NATS
Implementing Microservices with NATSNATS
 
Secure Coding 101 - OWASP University of Ottawa Workshop
Secure Coding 101 - OWASP University of Ottawa WorkshopSecure Coding 101 - OWASP University of Ottawa Workshop
Secure Coding 101 - OWASP University of Ottawa WorkshopPaul Ionescu
 
Secure Coding principles by example: Build Security In from the start - Carlo...
Secure Coding principles by example: Build Security In from the start - Carlo...Secure Coding principles by example: Build Security In from the start - Carlo...
Secure Coding principles by example: Build Security In from the start - Carlo...Codemotion
 
Building your First gRPC Service
Building your First gRPC ServiceBuilding your First gRPC Service
Building your First gRPC ServiceJessie Barnett
 

Mais procurados (20)

[2D1]Elasticsearch 성능 최적화
[2D1]Elasticsearch 성능 최적화[2D1]Elasticsearch 성능 최적화
[2D1]Elasticsearch 성능 최적화
 
Android local sockets in native code
Android local sockets in native code Android local sockets in native code
Android local sockets in native code
 
Event In JavaScript
Event In JavaScriptEvent In JavaScript
Event In JavaScript
 
Introduction to xml
Introduction to xmlIntroduction to xml
Introduction to xml
 
Pentesting jwt
Pentesting jwtPentesting jwt
Pentesting jwt
 
Modern API Security with JSON Web Tokens
Modern API Security with JSON Web TokensModern API Security with JSON Web Tokens
Modern API Security with JSON Web Tokens
 
Elasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txt
Elasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txtElasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txt
Elasticsearch 엘라스틱서치 (검색서비스) 에 대해 알아보자.txt
 
CSS selectors
CSS selectorsCSS selectors
CSS selectors
 
Introduction to OpenID Connect
Introduction to OpenID Connect Introduction to OpenID Connect
Introduction to OpenID Connect
 
フラットなPHPからフレームワークへ
フラットなPHPからフレームワークへフラットなPHPからフレームワークへ
フラットなPHPからフレームワークへ
 
Json
JsonJson
Json
 
aggregation and indexing with suitable example using MongoDB.
aggregation and indexing with suitable example using MongoDB.aggregation and indexing with suitable example using MongoDB.
aggregation and indexing with suitable example using MongoDB.
 
Introduction to Mongodb execution plan and optimizer
Introduction to Mongodb execution plan and optimizerIntroduction to Mongodb execution plan and optimizer
Introduction to Mongodb execution plan and optimizer
 
IBM: Hey FIDO, Meet Passkey!.pptx
IBM: Hey FIDO, Meet Passkey!.pptxIBM: Hey FIDO, Meet Passkey!.pptx
IBM: Hey FIDO, Meet Passkey!.pptx
 
MongoDB .local Toronto 2019: Tips and Tricks for Effective Indexing
MongoDB .local Toronto 2019: Tips and Tricks for Effective IndexingMongoDB .local Toronto 2019: Tips and Tricks for Effective Indexing
MongoDB .local Toronto 2019: Tips and Tricks for Effective Indexing
 
Javascript
JavascriptJavascript
Javascript
 
Implementing Microservices with NATS
Implementing Microservices with NATSImplementing Microservices with NATS
Implementing Microservices with NATS
 
Secure Coding 101 - OWASP University of Ottawa Workshop
Secure Coding 101 - OWASP University of Ottawa WorkshopSecure Coding 101 - OWASP University of Ottawa Workshop
Secure Coding 101 - OWASP University of Ottawa Workshop
 
Secure Coding principles by example: Build Security In from the start - Carlo...
Secure Coding principles by example: Build Security In from the start - Carlo...Secure Coding principles by example: Build Security In from the start - Carlo...
Secure Coding principles by example: Build Security In from the start - Carlo...
 
Building your First gRPC Service
Building your First gRPC ServiceBuilding your First gRPC Service
Building your First gRPC Service
 

Semelhante a Processing XML and Spreadsheet data in Go (20)

Xml11
Xml11Xml11
Xml11
 
Xml Presentation-1
Xml Presentation-1Xml Presentation-1
Xml Presentation-1
 
Xml
XmlXml
Xml
 
Xml overview
Xml overviewXml overview
Xml overview
 
XML.ppt
XML.pptXML.ppt
XML.ppt
 
Xml
XmlXml
Xml
 
Xml
XmlXml
Xml
 
Internet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web Technology
Internet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web TechnologyInternet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web Technology
Internet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web Technology
 
Xml
XmlXml
Xml
 
2310 b 12
2310 b 122310 b 12
2310 b 12
 
XXE
XXEXXE
XXE
 
Xxe
XxeXxe
Xxe
 
Understanding XML DOM
Understanding XML DOMUnderstanding XML DOM
Understanding XML DOM
 
Xml
XmlXml
Xml
 
Xml intro1
Xml intro1Xml intro1
Xml intro1
 
Dtd
DtdDtd
Dtd
 
Creating Domain Specific Languages in Python
Creating Domain Specific Languages in PythonCreating Domain Specific Languages in Python
Creating Domain Specific Languages in Python
 
XML-Unit 1.ppt
XML-Unit 1.pptXML-Unit 1.ppt
XML-Unit 1.ppt
 
WEB PROGRAMMING
WEB PROGRAMMINGWEB PROGRAMMING
WEB PROGRAMMING
 
Simple xml in .net
Simple xml in .netSimple xml in .net
Simple xml in .net
 

Último

一比一原版田纳西大学毕业证如何办理
一比一原版田纳西大学毕业证如何办理一比一原版田纳西大学毕业证如何办理
一比一原版田纳西大学毕业证如何办理F
 
Russian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girls
Russian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girlsRussian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girls
Russian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girlsMonica Sydney
 
20240507 QFM013 Machine Intelligence Reading List April 2024.pdf
20240507 QFM013 Machine Intelligence Reading List April 2024.pdf20240507 QFM013 Machine Intelligence Reading List April 2024.pdf
20240507 QFM013 Machine Intelligence Reading List April 2024.pdfMatthew Sinclair
 
Best SEO Services Company in Dallas | Best SEO Agency Dallas
Best SEO Services Company in Dallas | Best SEO Agency DallasBest SEO Services Company in Dallas | Best SEO Agency Dallas
Best SEO Services Company in Dallas | Best SEO Agency DallasDigicorns Technologies
 
Meaning of On page SEO & its process in detail.
Meaning of On page SEO & its process in detail.Meaning of On page SEO & its process in detail.
Meaning of On page SEO & its process in detail.krishnachandrapal52
 
在线制作约克大学毕业证(yu毕业证)在读证明认证可查
在线制作约克大学毕业证(yu毕业证)在读证明认证可查在线制作约克大学毕业证(yu毕业证)在读证明认证可查
在线制作约克大学毕业证(yu毕业证)在读证明认证可查ydyuyu
 
pdfcoffee.com_business-ethics-q3m7-pdf-free.pdf
pdfcoffee.com_business-ethics-q3m7-pdf-free.pdfpdfcoffee.com_business-ethics-q3m7-pdf-free.pdf
pdfcoffee.com_business-ethics-q3m7-pdf-free.pdfJOHNBEBONYAP1
 
20240509 QFM015 Engineering Leadership Reading List April 2024.pdf
20240509 QFM015 Engineering Leadership Reading List April 2024.pdf20240509 QFM015 Engineering Leadership Reading List April 2024.pdf
20240509 QFM015 Engineering Leadership Reading List April 2024.pdfMatthew Sinclair
 
Vip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac Room
Vip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac RoomVip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac Room
Vip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac Roommeghakumariji156
 
哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查
哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查
哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查ydyuyu
 
一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样
一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样
一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样ayvbos
 
Abu Dhabi Escorts Service 0508644382 Escorts in Abu Dhabi
Abu Dhabi Escorts Service 0508644382 Escorts in Abu DhabiAbu Dhabi Escorts Service 0508644382 Escorts in Abu Dhabi
Abu Dhabi Escorts Service 0508644382 Escorts in Abu DhabiMonica Sydney
 
APNIC Updates presented by Paul Wilson at ARIN 53
APNIC Updates presented by Paul Wilson at ARIN 53APNIC Updates presented by Paul Wilson at ARIN 53
APNIC Updates presented by Paul Wilson at ARIN 53APNIC
 
Indian Escort in Abu DHabi 0508644382 Abu Dhabi Escorts
Indian Escort in Abu DHabi 0508644382 Abu Dhabi EscortsIndian Escort in Abu DHabi 0508644382 Abu Dhabi Escorts
Indian Escort in Abu DHabi 0508644382 Abu Dhabi EscortsMonica Sydney
 
Call girls Service in Ajman 0505086370 Ajman call girls
Call girls Service in Ajman 0505086370 Ajman call girlsCall girls Service in Ajman 0505086370 Ajman call girls
Call girls Service in Ajman 0505086370 Ajman call girlsMonica Sydney
 
原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查
原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查
原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查ydyuyu
 
Ballia Escorts Service Girl ^ 9332606886, WhatsApp Anytime Ballia
Ballia Escorts Service Girl ^ 9332606886, WhatsApp Anytime BalliaBallia Escorts Service Girl ^ 9332606886, WhatsApp Anytime Ballia
Ballia Escorts Service Girl ^ 9332606886, WhatsApp Anytime Balliameghakumariji156
 
Trump Diapers Over Dems t shirts Sweatshirt
Trump Diapers Over Dems t shirts SweatshirtTrump Diapers Over Dems t shirts Sweatshirt
Trump Diapers Over Dems t shirts Sweatshirtrahman018755
 
Russian Escort Abu Dhabi 0503464457 Abu DHabi Escorts
Russian Escort Abu Dhabi 0503464457 Abu DHabi EscortsRussian Escort Abu Dhabi 0503464457 Abu DHabi Escorts
Russian Escort Abu Dhabi 0503464457 Abu DHabi EscortsMonica Sydney
 

Último (20)

一比一原版田纳西大学毕业证如何办理
一比一原版田纳西大学毕业证如何办理一比一原版田纳西大学毕业证如何办理
一比一原版田纳西大学毕业证如何办理
 
Russian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girls
Russian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girlsRussian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girls
Russian Call girls in Abu Dhabi 0508644382 Abu Dhabi Call girls
 
20240507 QFM013 Machine Intelligence Reading List April 2024.pdf
20240507 QFM013 Machine Intelligence Reading List April 2024.pdf20240507 QFM013 Machine Intelligence Reading List April 2024.pdf
20240507 QFM013 Machine Intelligence Reading List April 2024.pdf
 
Best SEO Services Company in Dallas | Best SEO Agency Dallas
Best SEO Services Company in Dallas | Best SEO Agency DallasBest SEO Services Company in Dallas | Best SEO Agency Dallas
Best SEO Services Company in Dallas | Best SEO Agency Dallas
 
Meaning of On page SEO & its process in detail.
Meaning of On page SEO & its process in detail.Meaning of On page SEO & its process in detail.
Meaning of On page SEO & its process in detail.
 
在线制作约克大学毕业证(yu毕业证)在读证明认证可查
在线制作约克大学毕业证(yu毕业证)在读证明认证可查在线制作约克大学毕业证(yu毕业证)在读证明认证可查
在线制作约克大学毕业证(yu毕业证)在读证明认证可查
 
pdfcoffee.com_business-ethics-q3m7-pdf-free.pdf
pdfcoffee.com_business-ethics-q3m7-pdf-free.pdfpdfcoffee.com_business-ethics-q3m7-pdf-free.pdf
pdfcoffee.com_business-ethics-q3m7-pdf-free.pdf
 
20240509 QFM015 Engineering Leadership Reading List April 2024.pdf
20240509 QFM015 Engineering Leadership Reading List April 2024.pdf20240509 QFM015 Engineering Leadership Reading List April 2024.pdf
20240509 QFM015 Engineering Leadership Reading List April 2024.pdf
 
Vip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac Room
Vip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac RoomVip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac Room
Vip Firozabad Phone 8250092165 Escorts Service At 6k To 30k Along With Ac Room
 
哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查
哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查
哪里办理美国迈阿密大学毕业证(本硕)umiami在读证明存档可查
 
一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样
一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样
一比一原版(Curtin毕业证书)科廷大学毕业证原件一模一样
 
Abu Dhabi Escorts Service 0508644382 Escorts in Abu Dhabi
Abu Dhabi Escorts Service 0508644382 Escorts in Abu DhabiAbu Dhabi Escorts Service 0508644382 Escorts in Abu Dhabi
Abu Dhabi Escorts Service 0508644382 Escorts in Abu Dhabi
 
APNIC Updates presented by Paul Wilson at ARIN 53
APNIC Updates presented by Paul Wilson at ARIN 53APNIC Updates presented by Paul Wilson at ARIN 53
APNIC Updates presented by Paul Wilson at ARIN 53
 
Indian Escort in Abu DHabi 0508644382 Abu Dhabi Escorts
Indian Escort in Abu DHabi 0508644382 Abu Dhabi EscortsIndian Escort in Abu DHabi 0508644382 Abu Dhabi Escorts
Indian Escort in Abu DHabi 0508644382 Abu Dhabi Escorts
 
call girls in Anand Vihar (delhi) call me [🔝9953056974🔝] escort service 24X7
call girls in Anand Vihar (delhi) call me [🔝9953056974🔝] escort service 24X7call girls in Anand Vihar (delhi) call me [🔝9953056974🔝] escort service 24X7
call girls in Anand Vihar (delhi) call me [🔝9953056974🔝] escort service 24X7
 
Call girls Service in Ajman 0505086370 Ajman call girls
Call girls Service in Ajman 0505086370 Ajman call girlsCall girls Service in Ajman 0505086370 Ajman call girls
Call girls Service in Ajman 0505086370 Ajman call girls
 
原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查
原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查
原版制作美国爱荷华大学毕业证(iowa毕业证书)学位证网上存档可查
 
Ballia Escorts Service Girl ^ 9332606886, WhatsApp Anytime Ballia
Ballia Escorts Service Girl ^ 9332606886, WhatsApp Anytime BalliaBallia Escorts Service Girl ^ 9332606886, WhatsApp Anytime Ballia
Ballia Escorts Service Girl ^ 9332606886, WhatsApp Anytime Ballia
 
Trump Diapers Over Dems t shirts Sweatshirt
Trump Diapers Over Dems t shirts SweatshirtTrump Diapers Over Dems t shirts Sweatshirt
Trump Diapers Over Dems t shirts Sweatshirt
 
Russian Escort Abu Dhabi 0503464457 Abu DHabi Escorts
Russian Escort Abu Dhabi 0503464457 Abu DHabi EscortsRussian Escort Abu Dhabi 0503464457 Abu DHabi Escorts
Russian Escort Abu Dhabi 0503464457 Abu DHabi Escorts
 

Processing XML and Spreadsheet data in Go

  • 1. Processing XML and Spreadsheet in Go 续 日 Gopher China Conference Beijing 2021 6/26 - 6/27
  • 2. Self Introduction The author of the Excelize - Go language spreadsheet library. Familiar with Go language programming, middleware, and big data solution. Working Experiences Alibaba Group - Software Engineer Baidu Inc. - Software Engineer Qihoo 360 – Server-side Software Engineer GitHub: @xuri Twitter: @xurime Blog: https://xuri.me
  • 3. Agenda Serialize and Deserialize 01 • Document Object Model • Event-driven (Simple API for XML) • Serialize and Deserialize Control Handle Complex XML 02 • Partial Load • Namespace & Entity • Ser/Deserialize Idempotence High Performance Processing 03 • XML Schema Definition • DOM or SAX OOXML Spreadsheets 04 • Excel XML Specification • Charset Encoding • Streaming I/O
  • 5. Document Object Model <?xml version="1.0" encoding="utf-8"?> <Person> <Name>Tom</Name> <Email where="home"> <Addr>tom@example.com</Addr> </Email> </Person> type Person struct { Name string Email struct { Where string `xml:"where,attr"` Addr string } } encoding/xml var p Person if err := xml.Unmarshal([]byte(data), &p); err != nil { fmt.Println(err) } fmt.Printf("%+vn", p) // {Name:Tom Email:{Where:home Addr:tom@example.com}}
  • 6. XML Finite State Machine 0 start start tag NAME TEXT equal end tag value value end value COMMENT version blank/enter letter digit < ? ?> --> !-- = " " ' ' > blank / letter > blank
  • 7. Unmarshal Unmarshal NewDecoder Decoder.unmarshal Decoder.switchToReader unmarshalPath unmarshalAttr unmarshalInterface unmarshalTextInterface Decoder.RawToken Decoder.pushElement Decoder.pushNs encoding/xml marshal.go typeinfo.go xml.go read.go example & test
  • 8. Go XML Parser type Decoder struct { Strict bool AutoClose []string Entity map[string]string CharsetReader func(charset string, input io.Reader) (io.Reader, error) DefaultSpace string r io.ByteReader t TokenReader buf bytes.Buffer saved *bytes.Buffer stk *stack free *stack needClose bool toClose Name nextToken Token nextByte int ns map[string]string err error line int offset int64 unmarshalDepth int } encoding/xml:xml.go StartElement EndElement CharData Comment ProcInst Directive
  • 9. Event-driven (Simple API for XML) decoder := xml.NewDecoder(strings.NewReader(data)) for { token, _ := decoder.Token() if token == nil { break } switch element := token.(type) { case xml.StartElement: fmt.Printf("%+vn", element) case xml.EndElement: fmt.Printf("%+vn", element) } } <?xml version="1.0" encoding="utf-8"?> <Person> <Name>Tom</Name> <Email where="home"> <Addr>tom@example.com</Addr> </Email> </Person> {Name:{Space: Local:Person} Attr:[]} {Name:{Space: Local:Name} Attr:[]} {Name:{Space: Local:Name}} {Name:{Space: Local:Email} Attr:[{Name:{Space: Local:where} Value:home}]} {Name:{Space: Local:Addr} Attr:[]} {Name:{Space: Local:Addr}} {Name:{Space: Local:Email}} {Name:{Space: Local:Person}}
  • 10. Serialize and Deserialize Control switch flag { case "attr": finfo.flags |= fAttr case "cdata": finfo.flags |= fCDATA case "chardata": finfo.flags |= fCharData case "innerxml": finfo.flags |= fInnerXML case "comment": finfo.flags |= fComment case "any": finfo.flags |= fAny case "omitempty": finfo.flags |= fOmitEmpty } encoding/xml:typeinfo.go type Person struct { Name string Email struct { Where string `xml:"where,attr,omitempty"` Addr string } } attribute with the field name in the XML element written as character data, not as an XML element written as character data wrapped in one or more <![CDATA[ ... ]]> tags written verbatim, not subject to the usual marshaling procedure unmatched rule, maps the sub-element to that struct field omitted if the field value is empty
  • 11. Partial Load <?xml version="1.0" encoding="utf-8"?> <Person> <Name>Tom</Name> <Email> <Addr>tom@example.com</Addr> </Email> </Person> type Person struct { Name string Email partialXML } type partialXML struct { Content string `xml:",innerxml"` } var p Person err := xml.Unmarshal([]byte(data), &p) if err != nil { fmt.Println(err) } fmt.Printf("%+vn", p) {Name:Tom Email:{Content: <Addr>tom@example.com</Addr> }}
  • 13. Datatypes Go Datatypes XML Datatypes string anyType, ENTITY,ID, IDREF, NCName, NMTOKEN, Name, anyURI, duration, language, normalizedString, string, token, xml:lang, xml:space, xml:base,xml:id []string ENTITIES, IDREFS, NMTOKENS, NOTATION xml.Name QName []byte base64Binary, hexBinary, unsignedByte bool boolean byte byte float64 decimal, double, float, int64 int, integer, long, negativeInteger, nonNegativeInteger, nonPositiveInteger, positiveInteger, short uint64 unsignedInt, unsignedLong, unsignedShort time.Time date, dateTime, gDay, gMonth, gMonthDay, gYear, gYearMonth,time anyType anySimpleType all complex types gYearMonth gYear gMonthDay gDay gMonth date time dateTime duration boolean base64Binary hexBinary float double anyURI QName NOTATION decimal string normalizedString integer token long nonNegativeInteger nonPostitveInteger language Name NMTOKEN negativeInteger int unsignedLong positiveInteger NCName NMTOKENS sort unsignedInt ID IDREF ENTITY ENTITIES IDREFS bytes unsignedSort unsignedByte ur types build-in primitive types build-in derived types complex types derived by restriction Derived by list Derived by extension or restriction
  • 14. Entity XML Entity exp := `<!ENTITYs+([^s]+)s+"([^"]+)">` entities := map[string]string{} d := xml.NewDecoder(strings.NewReader(input)) var rEntity = regexp.MustCompile(exp) for { tok, err := d.Token() if err != nil { break } dir, ok := tok.(xml.Directive) if !ok { continue } fmt.Println(string(dir)) for _, m := range rEntity.FindAllSubmatch(dir,-1) { entities[string(m[1])] = string(m[2]) } } fmt.Println("entities", entities) type Person struct { XMLName xml.Name `xml:"person"` Name string `xml:"name"` Address string `xml:"address"` } entities map[name:Tom email:tom@example.com] Get Entity <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE person[ <!ENTITY name "Tom"> <!ENTITY email "tom@example.com"> ]> <person> <name>&name;</name> <address>&email;</address> </person>
  • 15. Entity XML Entity d = xml.NewDecoder(strings.NewReader(input)) d.Strict = false d.Entity = entities err := d.Decode(&v) if err != nil { fmt.Printf("error: %v", err) return } fmt.Printf("%+vn", v) {XMLName:{Space: Local:company} Name:Jack Address:Tom} Decode with Entity <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE person[ <!ENTITY name "Tom"> <!ENTITY email "tom@example.com"> ]> <person> <name>&name;</name> <address>&email;</address> </person> type Person struct { XMLName xml.Name `xml:"person"` Name string `xml:"name"` Address string `xml:"address"` }
  • 16. Namespace & Ser/Deserialize Idempotence <?xml version="1.0" encoding="utf-8"?> <person xmlns="http://example.com/default" xmlns:m="http://example.com/main" xmlns:h="http://example.com/home" xmlns:w="http://example.com/work"> <name>Tom</name> <m:email h:addr="HOME" w:addr="WORK" /> </person> type Person struct { XMLName xml.Name `xml:"http://example.com/default person"` Name string `xml:"name"` Email struct { XMLName xml.Name `xml:"http://example.com/main email"` HomeAddr string `xml:"http://example.com/home addr,attr"` WorkAddr string `xml:"http://example.com/work addr,attr"` } // TAG NOT HERE: `xml:"email"` } <person xmlns="http://example.com/default"> <name>Tom</name> <email xmlns="http://example.com/main" xmlns:home="http://example.com/home" home:addr="HOME" xmlns:work="http://example.com/work" work:addr="WORK"></email> </person> Inline Namespace Declare Root Element NS Missing! Namespace Local Name
  • 17. Ser/Deserialize Idempotence encoding/xml:xml.go type Token interface{} type EndElement struct { Name Name } type Name struct { Space, Local string } type Attr struct { Name Name Value string } type StartElement struct { Name Name Attr []Attr } // getRootEleAttr extract root element attributes by // given XML decoder. func getRootEleAttr(d *xml.Decoder) []xml.Attr { tokenIdx := 0 for { token, _ := d.Token() if token == nil { break } switch startElement := token.(type) { case xml.StartElement: tokenIdx++ if tokenIdx == 1 { return startElement.Attr } } } return nil }
  • 18. Ser/Deserialize Idempotence <?xml version="1.0" encoding="utf-8"?> <person xmlns="http://example.com/default" xmlns:m="http://example.com/main" xmlns:h="http://example.com/home" xmlns:w="http://example.com/work"> <name>Tom</name> <m:email h:addr="HOME" w:addr="WORK" /> </person> decoder := xml.NewDecoder(strings.NewReader(data)) marshalXML := "" for { token, _ := decoder.Token() if token == nil { break } switch element := token.(type) { case xml.StartElement: for _, attr := range element.Attr { if element.Name.Local == "person" { colon := "" if attr.Name.Space != "" { colon = ":" } marshalXML += fmt.Sprintf("%s%s%s="%s" ", attr.Name.Space, colon, attr.Name.Local, attr.Value) } } } } fmt.Printf("<person %s>n", marshalXML) <person xmlns="http://example.com/default" xmlns:m="http://example.com/main" xmlns:h="http://example.com/home" xmlns:w="http://example.com/work" > <name>Tom</name> <email xmlns="http://example.com/main" xmlns:home="http://example.com/home" home:addr="HOME" xmlns:work="http://example.com/work" work:addr="WORK"></email> </person> <?xml version="1.0" encoding="utf-8"?>
  • 20. XML Components Data Model <?xml version="1.0"?> <note xmlns:m="http://example.com/main"> <to>Tom</to> <from>Bob</from> <heading>Reminder</heading> <m:body>Don't forget me this weekend!</m:body> </note> <?xml version="1.0"?> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:m="http://example.com/main"> <xsd:import namespace="http://example.com/main" schemaLocation="shared.xsd"/> <xs:element name="note"> <xs:complexType> <xs:sequence> <xs:element name="to" type="xs:string"/> <xs:element name="from" type="xs:string"/> <xs:element name="heading" type="xs:string"/> <xs:element name="m:body" use="required"/> </xs:sequence> </xs:complexType> </xs:element> </xs:schema> <?xml version="1.0"?> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="body" type="xs:string"/> </xs:schema> shared.xsd
  • 21. XML Components Data Model type Note struct { XMLName xml.Name `xml:"note"` To string `xml:"to"` From string `xml:"from"` Heading string `xml:"heading"` Body string `xml:"http://example.com/main body"` } <?xml version="1.0"?> <note xmlns:m="http://example.com/main"> <to>Tom</to> <from>Bob</from> <heading>Reminder</heading> <m:body>Don't forget me this weekend!</m:body> </note>
  • 22. XML Components Data Model XSD: XML Schema Definition Process is a named component and has two additional properties - name and target namespace is an un-named component cmd parser NSResolver proto generator Language Code Attribute Attribute Group ComplexType Element Enumeration FractionDigits Pattern SimpleType Schema List Length Import Include Group Restriction TotalDigits Union WhiteSpace MaxLength MinLength MinExclusive Attribute Attribute Group FieldName FieldType ComplexType Element Group SimpleType Generator SAX Parser Schema Notation Declaration system identifier public identifier Element Declaration scope value constraint nillable substitution group affiliation substitution group exclusions disallowed substitutions abstract Simple Type Declaration facets final variety Attribute Declaration scope value constraint Identity-constraint Declaration identity-constraint category selector fields referenced key Complex Type Declaration derivation method final abstract prohibited substitutions Model Group Definition Attribute Group Definition Attribute Use required value constraint Wildcard namespace constraint process contents Particle min occurs max occurs Model Group compositor notation declarations attribute declarations type definitions element declarations attribute group definitions model group definitions type definitions identity-constraint definitions content type type definition type definition term content type base type definition base type definition base type definition attribute uses attribute wildcard term term particles model group attribute wildcard type definition attribute definitions attribute uses https://github.com/xuri/xgen
  • 23. SAX or DOM SAX Parser DOM Parser Simple API for XML Parsing Document Object Model Event-based parser Stays in a tree structure Low memory usage High memory usage Best for the larger size of XML files Best for the smaller sizes of files Read-only Insert or delete nodes Backward navigation is not possible Backward and forward search is possible A small part of the XML file is only loaded in memory It loads whole XML documents in memory
  • 25. OOXML ISO/IEC 29500 ECMA-376 Specification SVG SOAP ODF OOXML C# C++/CLI MIME XSLT XHTML DITA 0 200 400 600 800 1000 1200 0 1000 2000 3000 4000 5000 6000 7000 Technical Committee effort (days) Specification length (pages) Specification Speed
  • 26. OOXML Specification WordprocessingML SpreadsheetML PresentationML CustomML Vocabularies DrawingML Bibliography Metadata Equations VML (legacy) Relationships Content Types Digital Signatures Markup Languages Open Packaging Convention ZIP XML + Unicode Core Technologies
  • 27. OOXML Specification WordprocessingML SpreadsheetML PresentationML CustomML Vocabularies DrawingML Bibliography Metadata Equations VML (legacy) Relationships Content Types Digital Signatures Markup Languages Open Packaging Convention ZIP XML + Unicode Core Technologies Drawing 9918 Excel 6328 OPC 3147
  • 28. Typical XML of the Cell Over 10589 Elements & Attributes Shared Strings Calc Chain XML Maps Styles Theme Sheets Table Chart Pivot Table Pivot Cache Pivot Records Workbook <c r="D3" s="7" t="s"> <v>0</v> </c> <xf numFmtId="0" fontId="4" fillId="2" borderId="2" xfId="1" applyBorder="1" /> <font> <sz val="11"/> <color theme="0"/> <name val="Calibri"/> <scheme val="minor"/> </font> <fill> <patternFill patternType="solid"> <fgColor theme="4"/> <bgColor theme=”4"/> </patternFill> </fill> <a:theme xmlns:a="http://schemas.openxmlformats.org <a:themeElements> <a:clrScheme name="Office"> <a:dk1> <a:sysClr val="windowText" /> </a:dk1> <a:lt1> <a:sysClr val="window" /> </a:lt1> <a:dk2> <a:srgbClr val="1F497D"/> </a:dk2> <a:lt2> <a:srgbClr val="FAF3E8"/> </a:lt2> <a:accent1> <a:srgbClr val="5C93B4"/> </a:accent1> <cellStyleXfs count="12"> <xf numFmtId="0" fontId="0" fillId="0" borderId="0"/> <xf numFmtId="0" fontId="4" fillId="2" borderId="0" applyBorder="1" applyAlignment="1" applyProtection="1"/> </protection> </xf> <cellStyles count="2"> <cellStyle name="Accent1" xfId="1" builtinId="29"/> B C D E Q1 Q2 Revenue 412.52 Start (build-in) <border> <left style="thick"> <color auto="1"/> </left> <right style="thick"> <color auto="1"/> </right> <top style="thick"> <color auto="1"/> </top> <bottom style="thick"> <color auto="1"/> </bottom> <diagonal/> </border>
  • 29. Charset Encoding <?xml version='1.0' encoding='character encoding' standalone='yes|no'?> import ( "golang.org/x/net/html/charset” "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" ) decoder = xml.NewDecoder(strings.NewReader(data)) decoder.CharsetReader = charset.NewReaderLabel Ref: https://encoding.spec.whatwg.org Name Labels UTF-8 6 Legacy single-byte encodings 168 Legacy multi-byte Chinese (simplified) encodings 10 Legacy multi-byte Chinese (traditional) encodings 5 Legacy multi-byte Japanese encodings 13 Legacy multi-byte Korean encodings 10 Legacy miscellaneous encodings 16 228 Labels // CharsetReader Decoder from all codepages to UTF-8 func CharsetReader(charset string, input io.Reader) io.Reader { var enc encoding.Encoding for i := range charmap.All { item = charmap.All[i] if strings.EqualFold(sm, nm) { enc = item } } return enc.NewDecoder().Reader(input) } Custom Charset Reader
  • 30. Streaming I/O Common Package Parts Package Relationships Core Properties Digital Signatures Specific Format Parts Office Document Part Relationships XML Part XML Part Part Rels Etc… <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"> <dimension ref="B2"/> <sheetViews> <sheetView tabSelected="1" workbookViewId="0" /> </sheetViews> <sheetFormatPr baseColWidth="10" defaultRowHeight="16" /> <sheetData> <row r="2"> <c r="B2"> <v>123</v> </c> </row> </sheetData> <pageMargins left="0.7" right="0.7" /> </worksheet> A B C 1 2 123 3 4
  • 31. Set Row <sheetData> <row r="2"> <c r="B2"> <v>123</v> </c> </row> </sheetData> func writeCell(buf *bufferedWriter, c xlsxC) { _, _ = buf.WriteString(`<c`) if c.XMLSpace.Value != "" { fmt.Fprintf(buf, ` xml:%s="%s"`, c.XMLSpace.Name.Local, c.XMLSpace.Value) } fmt.Fprintf(buf, ` r="%s"`, c.R) if c.S != 0 { fmt.Fprintf(buf, ` s="%d"`, c.S) } if c.T != "" { fmt.Fprintf(buf, ` t="%s"`, c.T) } _, _ = buf.WriteString(`>`) if c.F != nil { _, _ = buf.WriteString(`<f>`) _ = xml.EscapeText(buf, []byte(c.F.Content)) _, _ = buf.WriteString(`</f>`) } if c.V != "" { _, _ = buf.WriteString(`<v>`) _ = xml.EscapeText(buf, []byte(c.V)) _, _ = buf.WriteString(`</v>`) } _, _ = buf.WriteString(`</c>`) } type StreamWriter struct { File *File Sheet string SheetID int worksheet *xlsxWorksheet rawData bufferedWriter mergeCellsCount int mergeCells string tableParts string }
  • 32. Flush func (sw *StreamWriter) Flush() error { _, _ = sw.rawData.WriteString(`</sheetData>`) bulkAppendFields(&sw.rawData, sw.worksheet, 8, 15) if sw.mergeCellsCount > 0 { sw.mergeCells = fmt.Sprintf(`<mergeCells count="%d">%s</mergeCells>`, sw.mergeCellsCount, sw.mergeCells) } _, _ = sw.rawData.WriteString(sw.mergeCells) bulkAppendFields(&sw.rawData, sw.worksheet, 17, 38) _, _ = sw.rawData.WriteString(sw.tableParts) bulkAppendFields(&sw.rawData, sw.worksheet, 40, 40) _, _ = sw.rawData.WriteString(`</worksheet>`) if err := sw.rawData.Flush(); err != nil { return err } // ... } type StreamWriter struct { File *File Sheet string SheetID int worksheet *xlsxWorksheet rawData bufferedWriter mergeCellsCount int mergeCells string tableParts string } Generate XML Part
  • 33. Save Spreadsheet func (f *File) WriteToBuffer() (*bytes.Buffer, error) { buf := new(bytes.Buffer) zw := zip.NewWriter(buf) f.calcChainWriter() f.commentsWriter() f.contentTypesWriter() f.drawingsWriter() f.vmlDrawingWriter() f.workBookWriter() f.workSheetWriter() f.relsWriter() f.sharedStringsWriter() f.styleSheetWriter() for path, stream := range f.streams { // Save stream data stream.rawData.Close() } for path, content := range f.XLSX { // Save preserve data } } XML Part to ZIP Common Package Parts Package Relationships Specific Format Parts Office Document Part Relationships Style / Theme / Calc Chain Chart / PivotTable / Comments Part Rels Etc… Worksheets / SST Workbook
  • 34. Performance 102400 Row x 50 Columns, 6 Chars / Cell 0 5 10 15 20 25 30 Excelize 2.3.1@9316028 Streaming Write go1.15.2 darwin/amd64 Excelize 2.3.1@9316028 go1.15.2 darwin/amd64 Time Cost (s) Less is better 5.12 Million Cells 0 200 400 600 800 1000 1200 1400 1600 1800 Excelize 2.3.1@9316028 Streaming Write go1.15.2 darwin/amd64 Excelize 2.3.1@9316028 go1.15.2 darwin/amd64 Memory Usage (MB) Less is better https://github.com/xuri/excelize
  • 35. Processing XML and Spreadsheet in Go Gopher China Conference Beijing 2021 6/26 - 6/27 续日