package xml

import "encoding/xml"

Package xml implements a simple XML 1.0 parser that understands XML name spaces.

Example (CustomMarshalXML)
package main

import (
	"encoding/xml"
	"fmt"
	"log"
	"strings"
)

type Animal int

const (
	Unknown Animal = iota
	Gopher
	Zebra
)

func (a *Animal) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
	var s string
	if err := d.DecodeElement(&s, &start); err != nil {
		return err
	}
	switch strings.ToLower(s) {
	default:
		*a = Unknown
	case "gopher":
		*a = Gopher
	case "zebra":
		*a = Zebra
	}

	return nil
}

func (a Animal) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
	var s string
	switch a {
	default:
		s = "unknown"
	case Gopher:
		s = "gopher"
	case Zebra:
		s = "zebra"
	}
	return e.EncodeElement(s, start)
}

func main() {
	blob := `
	<animals>
		<animal>gopher</animal>
		<animal>armadillo</animal>
		<animal>zebra</animal>
		<animal>unknown</animal>
		<animal>gopher</animal>
		<animal>bee</animal>
		<animal>gopher</animal>
		<animal>zebra</animal>
	</animals>`
	var zoo struct {
		Animals []Animal `xml:"animal"`
	}
	if err := xml.Unmarshal([]byte(blob), &zoo); err != nil {
		log.Fatal(err)
	}

	census := make(map[Animal]int)
	for _, animal := range zoo.Animals {
		census[animal] += 1
	}

	fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras:  %d\n* Unknown: %d\n",
		census[Gopher], census[Zebra], census[Unknown])

}

Output:

Zoo Census:
* Gophers: 3
* Zebras:  2
* Unknown: 3
Example (TextMarshalXML)
package main

import (
	"encoding/xml"
	"fmt"
	"log"
	"strings"
)

type Size int

const (
	Unrecognized Size = iota
	Small
	Large
)

func (s *Size) UnmarshalText(text []byte) error {
	switch strings.ToLower(string(text)) {
	default:
		*s = Unrecognized
	case "small":
		*s = Small
	case "large":
		*s = Large
	}
	return nil
}

func (s Size) MarshalText() ([]byte, error) {
	var name string
	switch s {
	default:
		name = "unrecognized"
	case Small:
		name = "small"
	case Large:
		name = "large"
	}
	return []byte(name), nil
}

func main() {
	blob := `
	<sizes>
		<size>small</size>
		<size>regular</size>
		<size>large</size>
		<size>unrecognized</size>
		<size>small</size>
		<size>normal</size>
		<size>small</size>
		<size>large</size>
	</sizes>`
	var inventory struct {
		Sizes []Size `xml:"size"`
	}
	if err := xml.Unmarshal([]byte(blob), &inventory); err != nil {
		log.Fatal(err)
	}

	counts := make(map[Size]int)
	for _, size := range inventory.Sizes {
		counts[size] += 1
	}

	fmt.Printf("Inventory Counts:\n* Small:        %d\n* Large:        %d\n* Unrecognized: %d\n",
		counts[Small], counts[Large], counts[Unrecognized])

}

Output:

Inventory Counts:
* Small:        3
* Large:        2
* Unrecognized: 3

Index

Examples

Constants

const (
	// Header is a generic XML header suitable for use with the output of [Marshal].
	// This is not automatically added to any output of this package,
	// it is provided as a convenience.
	Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n"
)

Variables

var HTMLAutoClose []string = htmlAutoClose

HTMLAutoClose is the set of HTML elements that should be considered to close automatically.

See the [Decoder.Strict] and [Decoder.Entity] fields' documentation.

var HTMLEntity map[string]string = htmlEntity

HTMLEntity is an entity map containing translations for the standard HTML entity characters.

See the [Decoder.Strict] and [Decoder.Entity] fields' documentation.

Functions

func Escape

func Escape(w io.Writer, s []byte)

Escape is like EscapeText but omits the error return value. It is provided for backwards compatibility with Go 1.0. Code targeting Go 1.1 or later should use EscapeText.

func EscapeText

func EscapeText(w io.Writer, s []byte) error

EscapeText writes to w the properly escaped XML equivalent of the plain text data s.

func Marshal

func Marshal(v any) ([]byte, error)

Marshal returns the XML encoding of v.

Marshal handles an array or slice by marshaling each of the elements. Marshal handles a pointer by marshaling the value it points at or, if the pointer is nil, by writing nothing. Marshal handles an interface value by marshaling the value it contains or, if the interface value is nil, by writing nothing. Marshal handles all other data by writing one or more XML elements containing the data.

The name for the XML elements is taken from, in order of preference:

The XML element for a struct contains marshaled elements for each of the exported fields of the struct, with these exceptions:

If a field uses a tag "a>b>c", then the element c will be nested inside parent elements a and b. Fields that appear next to each other that name the same parent will be enclosed in one XML element.

If the XML name for a struct field is defined by both the field tag and the struct's XMLName field, the names must match.

See MarshalIndent for an example.

Marshal will return an error if asked to marshal a channel, function, or map.

func MarshalIndent

func MarshalIndent(v any, prefix, indent string) ([]byte, error)

MarshalIndent works like Marshal, but each XML element begins on a new indented line that starts with prefix and is followed by one or more copies of indent according to the nesting depth.

Example
package main

import (
	"encoding/xml"
	"fmt"
	"os"
)

func main() {
	type Address struct {
		City, State string
	}
	type Person struct {
		XMLName   xml.Name `xml:"person"`
		Id        int      `xml:"id,attr"`
		FirstName string   `xml:"name>first"`
		LastName  string   `xml:"name>last"`
		Age       int      `xml:"age"`
		Height    float32  `xml:"height,omitempty"`
		Married   bool
		Address
		Comment string `xml:",comment"`
	}

	v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42}
	v.Comment = " Need more details. "
	v.Address = Address{"Hanga Roa", "Easter Island"}

	output, err := xml.MarshalIndent(v, "  ", "    ")
	if err != nil {
		fmt.Printf("error: %v\n", err)
	}

	os.Stdout.Write(output)
}

Output:

  <person id="13">
      <name>
          <first>John</first>
          <last>Doe</last>
      </name>
      <age>42</age>
      <Married>false</Married>
      <City>Hanga Roa</City>
      <State>Easter Island</State>
      <!-- Need more details. -->
  </person>

func Unmarshal

func Unmarshal(data []byte, v any) error

Unmarshal parses the XML-encoded data and stores the result in the value pointed to by v, which must be an arbitrary struct, slice, or string. Well-formed data that does not fit into v is discarded.

Because Unmarshal uses the reflect package, it can only assign to exported (upper case) fields. Unmarshal uses a case-sensitive comparison to match XML element names to tag values and struct field names.

Unmarshal maps an XML element to a struct using the following rules. In the rules, the tag of a field refers to the value associated with the key 'xml' in the struct field's tag (see the example above).

If Unmarshal encounters a field type that implements the Unmarshaler interface, Unmarshal calls its UnmarshalXML method to produce the value from the XML element. Otherwise, if the value implements encoding.TextUnmarshaler, Unmarshal calls that value's UnmarshalText method.

Unmarshal maps an XML element to a string or []byte by saving the concatenation of that element's character data in the string or []byte. The saved []byte is never nil.

Unmarshal maps an attribute value to a string or []byte by saving the value in the string or slice.

Unmarshal maps an attribute value to an Attr by saving the attribute, including its name, in the Attr.

Unmarshal maps an XML element or attribute value to a slice by extending the length of the slice and mapping the element or attribute to the newly created value.

Unmarshal maps an XML element or attribute value to a bool by setting it to the boolean value represented by the string. Whitespace is trimmed and ignored.

Unmarshal maps an XML element or attribute value to an integer or floating-point field by setting the field to the result of interpreting the string value in decimal. There is no check for overflow. Whitespace is trimmed and ignored.

Unmarshal maps an XML element to a Name by recording the element name.

Unmarshal maps an XML element to a pointer by setting the pointer to a freshly allocated value and then mapping the element to that value.

A missing element or empty attribute value will be unmarshaled as a zero value. If the field is a slice, a zero value will be appended to the field. Otherwise, the field will be set to its zero value.

Example

This example demonstrates unmarshaling an XML excerpt into a value with some preset fields. Note that the Phone field isn't modified and that the XML <Company> element is ignored. Also, the Groups field is assigned considering the element path provided in its tag.

package main

import (
	"encoding/xml"
	"fmt"
)

func main() {
	type Email struct {
		Where string `xml:"where,attr"`
		Addr  string
	}
	type Address struct {
		City, State string
	}
	type Result struct {
		XMLName xml.Name `xml:"Person"`
		Name    string   `xml:"FullName"`
		Phone   string
		Email   []Email
		Groups  []string `xml:"Group>Value"`
		Address
	}
	v := Result{Name: "none", Phone: "none"}

	data := `
		<Person>
			<FullName>Grace R. Emlin</FullName>
			<Company>Example Inc.</Company>
			<Email where="home">
				<Addr>gre@example.com</Addr>
			</Email>
			<Email where='work'>
				<Addr>gre@work.com</Addr>
			</Email>
			<Group>
				<Value>Friends</Value>
				<Value>Squash</Value>
			</Group>
			<City>Hanga Roa</City>
			<State>Easter Island</State>
		</Person>
	`
	err := xml.Unmarshal([]byte(data), &v)
	if err != nil {
		fmt.Printf("error: %v", err)
		return
	}
	fmt.Printf("XMLName: %#v\n", v.XMLName)
	fmt.Printf("Name: %q\n", v.Name)
	fmt.Printf("Phone: %q\n", v.Phone)
	fmt.Printf("Email: %v\n", v.Email)
	fmt.Printf("Groups: %v\n", v.Groups)
	fmt.Printf("Address: %v\n", v.Address)
}

Output:

XMLName: xml.Name{Space:"", Local:"Person"}
Name: "Grace R. Emlin"
Phone: "none"
Email: [{home gre@example.com} {work gre@work.com}]
Groups: [Friends Squash]
Address: {Hanga Roa Easter Island}

Types

type Attr

type Attr struct {
	Name  Name
	Value string
}

An Attr represents an attribute in an XML element (Name=Value).

type CharData

type CharData []byte

A CharData represents XML character data (raw text), in which XML escape sequences have been replaced by the characters they represent.

func (CharData) Copy

func (c CharData) Copy() CharData

Copy creates a new copy of CharData.

type Comment

type Comment []byte

A Comment represents an XML comment of the form <!--comment-->. The bytes do not include the <!-- and --> comment markers.

func (Comment) Copy

func (c Comment) Copy() Comment

Copy creates a new copy of Comment.

type Decoder

type Decoder struct {
	// Strict defaults to true, enforcing the requirements
	// of the XML specification.
	// If set to false, the parser allows input containing common
	// mistakes:
	//	* If an element is missing an end tag, the parser invents
	//	  end tags as necessary to keep the return values from Token
	//	  properly balanced.
	//	* In attribute values and character data, unknown or malformed
	//	  character entities (sequences beginning with &) are left alone.
	//
	// Setting:
	//
	//	d.Strict = false
	//	d.AutoClose = xml.HTMLAutoClose
	//	d.Entity = xml.HTMLEntity
	//
	// creates a parser that can handle typical HTML.
	//
	// Strict mode does not enforce the requirements of the XML name spaces TR.
	// In particular it does not reject name space tags using undefined prefixes.
	// Such tags are recorded with the unknown prefix as the name space URL.
	Strict bool

	// When Strict == false, AutoClose indicates a set of elements to
	// consider closed immediately after they are opened, regardless
	// of whether an end element is present.
	AutoClose []string

	// Entity can be used to map non-standard entity names to string replacements.
	// The parser behaves as if these standard mappings are present in the map,
	// regardless of the actual map content:
	//
	//	"lt": "<",
	//	"gt": ">",
	//	"amp": "&",
	//	"apos": "'",
	//	"quot": `"`,
	Entity map[string]string

	// CharsetReader, if non-nil, defines a function to generate
	// charset-conversion readers, converting from the provided
	// non-UTF-8 charset into UTF-8. If CharsetReader is nil or
	// returns an error, parsing stops with an error. One of the
	// CharsetReader's result values must be non-nil.
	CharsetReader func(charset string, input io.Reader) (io.Reader, error)

	// DefaultSpace sets the default name space used for unadorned tags,
	// as if the entire XML stream were wrapped in an element containing
	// the attribute xmlns="DefaultSpace".
	DefaultSpace string
	// contains filtered or unexported fields
}

A Decoder represents an XML parser reading a particular input stream. The parser assumes that its input is encoded in UTF-8.

func NewDecoder

func NewDecoder(r io.Reader) *Decoder

NewDecoder creates a new XML parser reading from r. If r does not implement io.ByteReader, NewDecoder will do its own buffering.

func NewTokenDecoder

func NewTokenDecoder(t TokenReader) *Decoder

NewTokenDecoder creates a new XML parser using an underlying token stream.

func (*Decoder) Decode

func (d *Decoder) Decode(v any) error

Decode works like Unmarshal, except it reads the decoder stream to find the start element.

func (*Decoder) DecodeElement

func (d *Decoder) DecodeElement(v any, start *StartElement) error

DecodeElement works like Unmarshal except that it takes a pointer to the start XML element to decode into v. It is useful when a client reads some raw XML tokens itself but also wants to defer to Unmarshal for some elements.

func (*Decoder) InputOffset

func (d *Decoder) InputOffset() int64

InputOffset returns the input stream byte offset of the current decoder position. The offset gives the location of the end of the most recently returned token and the beginning of the next token.

func (*Decoder) InputPos

func (d *Decoder) InputPos() (line, column int)

InputPos returns the line of the current decoder position and the 1 based input position of the line. The position gives the location of the end of the most recently returned token.

func (*Decoder) RawToken

func (d *Decoder) RawToken() (Token, error)

RawToken is like Decoder.Token but does not verify that start and end elements match and does not translate name space prefixes to their corresponding URLs.

func (*Decoder) Skip

func (d *Decoder) Skip() error

Skip reads tokens until it has consumed the end element matching the most recent start element already consumed, skipping nested structures. It returns nil if it finds an end element matching the start element; otherwise it returns an error describing the problem.

func (*Decoder) Token

func (d *Decoder) Token() (Token, error)

Token returns the next XML token in the input stream. At the end of the input stream, Token returns nil, io.EOF.

Slices of bytes in the returned token data refer to the parser's internal buffer and remain valid only until the next call to Token. To acquire a copy of the bytes, call CopyToken or the token's Copy method.

Token expands self-closing elements such as <br> into separate start and end elements returned by successive calls.

Token guarantees that the StartElement and EndElement tokens it returns are properly nested and matched: if Token encounters an unexpected end element or EOF before all expected end elements, it will return an error.

If [Decoder.CharsetReader] is called and returns an error, the error is wrapped and returned.

Token implements XML name spaces as described by https://www.w3.org/TR/REC-xml-names/. Each of the Name structures contained in the Token has the Space set to the URL identifying its name space when known. If Token encounters an unrecognized name space prefix, it uses the prefix as the Space rather than report an error.

type Directive

type Directive []byte

A Directive represents an XML directive of the form <!text>. The bytes do not include the <! and > markers.

func (Directive) Copy

func (d Directive) Copy() Directive

Copy creates a new copy of Directive.

type Encoder

type Encoder struct {
	// contains filtered or unexported fields
}

An Encoder writes XML data to an output stream.

Example
package main

import (
	"encoding/xml"
	"fmt"
	"os"
)

func main() {
	type Address struct {
		City, State string
	}
	type Person struct {
		XMLName   xml.Name `xml:"person"`
		Id        int      `xml:"id,attr"`
		FirstName string   `xml:"name>first"`
		LastName  string   `xml:"name>last"`
		Age       int      `xml:"age"`
		Height    float32  `xml:"height,omitempty"`
		Married   bool
		Address
		Comment string `xml:",comment"`
	}

	v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42}
	v.Comment = " Need more details. "
	v.Address = Address{"Hanga Roa", "Easter Island"}

	enc := xml.NewEncoder(os.Stdout)
	enc.Indent("  ", "    ")
	if err := enc.Encode(v); err != nil {
		fmt.Printf("error: %v\n", err)
	}

}

Output:

  <person id="13">
      <name>
          <first>John</first>
          <last>Doe</last>
      </name>
      <age>42</age>
      <Married>false</Married>
      <City>Hanga Roa</City>
      <State>Easter Island</State>
      <!-- Need more details. -->
  </person>

func NewEncoder

func NewEncoder(w io.Writer) *Encoder

NewEncoder returns a new encoder that writes to w.

func (*Encoder) Close

func (enc *Encoder) Close() error

Close the Encoder, indicating that no more data will be written. It flushes any buffered XML to the underlying writer and returns an error if the written XML is invalid (e.g. by containing unclosed elements).

func (*Encoder) Encode

func (enc *Encoder) Encode(v any) error

Encode writes the XML encoding of v to the stream.

See the documentation for Marshal for details about the conversion of Go values to XML.

Encode calls Encoder.Flush before returning.

func (*Encoder) EncodeElement

func (enc *Encoder) EncodeElement(v any, start StartElement) error

EncodeElement writes the XML encoding of v to the stream, using start as the outermost tag in the encoding.

See the documentation for Marshal for details about the conversion of Go values to XML.

EncodeElement calls Encoder.Flush before returning.

func (*Encoder) EncodeToken

func (enc *Encoder) EncodeToken(t Token) error

EncodeToken writes the given XML token to the stream. It returns an error if StartElement and EndElement tokens are not properly matched.

EncodeToken does not call Encoder.Flush, because usually it is part of a larger operation such as Encoder.Encode or Encoder.EncodeElement (or a custom Marshaler's MarshalXML invoked during those), and those will call Flush when finished. Callers that create an Encoder and then invoke EncodeToken directly, without using Encode or EncodeElement, need to call Flush when finished to ensure that the XML is written to the underlying writer.

EncodeToken allows writing a ProcInst with Target set to "xml" only as the first token in the stream.

func (*Encoder) Flush

func (enc *Encoder) Flush() error

Flush flushes any buffered XML to the underlying writer. See the Encoder.EncodeToken documentation for details about when it is necessary.

func (*Encoder) Indent

func (enc *Encoder) Indent(prefix, indent string)

Indent sets the encoder to generate XML in which each element begins on a new indented line that starts with prefix and is followed by one or more copies of indent according to the nesting depth.

type EndElement

type EndElement struct {
	Name Name
}

An EndElement represents an XML end element.

type Marshaler

type Marshaler interface {
	MarshalXML(e *Encoder, start StartElement) error
}

Marshaler is the interface implemented by objects that can marshal themselves into valid XML elements.

MarshalXML encodes the receiver as zero or more XML elements. By convention, arrays or slices are typically encoded as a sequence of elements, one per entry. Using start as the element tag is not required, but doing so will enable Unmarshal to match the XML elements to the correct struct field. One common implementation strategy is to construct a separate value with a layout corresponding to the desired XML and then to encode it using e.EncodeElement. Another common strategy is to use repeated calls to e.EncodeToken to generate the XML output one token at a time. The sequence of encoded tokens must make up zero or more valid XML elements.

type MarshalerAttr

type MarshalerAttr interface {
	MarshalXMLAttr(name Name) (Attr, error)
}

MarshalerAttr is the interface implemented by objects that can marshal themselves into valid XML attributes.

MarshalXMLAttr returns an XML attribute with the encoded value of the receiver. Using name as the attribute name is not required, but doing so will enable Unmarshal to match the attribute to the correct struct field. If MarshalXMLAttr returns the zero attribute Attr{}, no attribute will be generated in the output. MarshalXMLAttr is used only for struct fields with the "attr" option in the field tag.

type Name

type Name struct {
	Space, Local string
}

A Name represents an XML name (Local) annotated with a name space identifier (Space). In tokens returned by Decoder.Token, the Space identifier is given as a canonical URL, not the short prefix used in the document being parsed.

type ProcInst

type ProcInst struct {
	Target string
	Inst   []byte
}

A ProcInst represents an XML processing instruction of the form <?target inst?>

func (ProcInst) Copy

func (p ProcInst) Copy() ProcInst

Copy creates a new copy of ProcInst.

type StartElement

type StartElement struct {
	Name Name
	Attr []Attr
}

A StartElement represents an XML start element.

func (StartElement) Copy

func (e StartElement) Copy() StartElement

Copy creates a new copy of StartElement.

func (StartElement) End

func (e StartElement) End() EndElement

End returns the corresponding XML end element.

type SyntaxError

type SyntaxError struct {
	Msg  string
	Line int
}

A SyntaxError represents a syntax error in the XML input stream.

func (*SyntaxError) Error

func (e *SyntaxError) Error() string

type TagPathError

type TagPathError struct {
	Struct       reflect.Type
	Field1, Tag1 string
	Field2, Tag2 string
}

A TagPathError represents an error in the unmarshaling process caused by the use of field tags with conflicting paths.

func (*TagPathError) Error

func (e *TagPathError) Error() string

type Token

type Token any

A Token is an interface holding one of the token types: StartElement, EndElement, CharData, Comment, ProcInst, or Directive.

func CopyToken

func CopyToken(t Token) Token

CopyToken returns a copy of a Token.

type TokenReader

type TokenReader interface {
	Token() (Token, error)
}

A TokenReader is anything that can decode a stream of XML tokens, including a Decoder.

When Token encounters an error or end-of-file condition after successfully reading a token, it returns the token. It may return the (non-nil) error from the same call or return the error (and a nil token) from a subsequent call. An instance of this general case is that a TokenReader returning a non-nil token at the end of the token stream may return either io.EOF or a nil error. The next Read should return nil, io.EOF.

Implementations of Token are discouraged from returning a nil token with a nil error. Callers should treat a return of nil, nil as indicating that nothing happened; in particular it does not indicate EOF.

type UnmarshalError

type UnmarshalError string

An UnmarshalError represents an error in the unmarshaling process.

func (UnmarshalError) Error

func (e UnmarshalError) Error() string

type Unmarshaler

type Unmarshaler interface {
	UnmarshalXML(d *Decoder, start StartElement) error
}

Unmarshaler is the interface implemented by objects that can unmarshal an XML element description of themselves.

UnmarshalXML decodes a single XML element beginning with the given start element. If it returns an error, the outer call to Unmarshal stops and returns that error. UnmarshalXML must consume exactly one XML element. One common implementation strategy is to unmarshal into a separate value with a layout matching the expected XML using d.DecodeElement, and then to copy the data from that value into the receiver. Another common strategy is to use d.Token to process the XML object one token at a time. UnmarshalXML may not use d.RawToken.

type UnmarshalerAttr

type UnmarshalerAttr interface {
	UnmarshalXMLAttr(attr Attr) error
}

UnmarshalerAttr is the interface implemented by objects that can unmarshal an XML attribute description of themselves.

UnmarshalXMLAttr decodes a single XML attribute. If it returns an error, the outer call to Unmarshal stops and returns that error. UnmarshalXMLAttr is used only for struct fields with the "attr" option in the field tag.

type UnsupportedTypeError

type UnsupportedTypeError struct {
	Type reflect.Type
}

UnsupportedTypeError is returned when Marshal encounters a type that cannot be converted into XML.

func (*UnsupportedTypeError) Error

func (e *UnsupportedTypeError) Error() string