Phillip Trelford's Array

POKE 36879,255

F# XML Comparison (XElement vs XmlDocument vs XmlReader/XmlWriter vs Discriminated Unions)

    Find a pragmatic way to process smaller XML documents and fragments from the following code examples of:
  • string concatenation and string parsing
  • XML DOM with XmlDocument
  • Reading XML with XmlReader
  • Linq to XML with XElement
  • Element tree with F# Discriminated Unions

    RSS Test Fragment

     

    <item>
      <title>Space Exploration</title>
      <link>http://liftoff.msfc.nasa.gov/</link>
      <description>
        Sky watchers in Europe, Asia, and parts of Alaska and Canada
        will experience a partial eclipse of the Sun on Saturday, May 31.
      </description>
    </item>
    

     

    XML Writing Examples

String concatenation

"<item>\r\n" +
"\t<title>" + title + "</title>\r\n" +
"\t<link>" + link + "</link>\r\n" +
"\t<description>" + description + "</description>\r\n" + 
"</item>\r\n" 

 

XmlWriter

open System.Xml
let output = StringBuilder()
use writer = XmlWriter.Create(output)
writer.WriteStartElement("item")
writer.WriteElementString("title", title)
writer.WriteElementString("link", link)
writer.WriteElementString("description", description)
writer.WriteEndElement()
writer.Close()
output.ToString()

 

XmlDocument

let doc = new XmlDocument()        
let item = doc.AppendChild(doc.CreateElement("item"))
let Append name value =
    let child = doc.CreateElement name
    child.InnerText <- value
    item.AppendChild child |> ignore
Append "title" title   
Append "link" link
Append "description" description 
doc.OuterXml

 

XElement

type XElement (name:string, [<ParamArray>] values:obj []) = 
    inherit System.Xml.Linq.XElement
        (System.Xml.Linq.XName.op_Implicit(name), values)     

 

let item = 
    XElement("item", 
        XElement("title", title),
        XElement("link", link),
        XElement("description", description))
item.ToString()
  

 

F# Tree and XmlWriter 

/// F# Element Tree
type Xml = 
    | Element of string * string * Xml seq    
    member this.WriteContentTo(writer:XmlWriter) =
        let rec Write element =
            match element with
            | Element (name, value, children) -> 
                writer.WriteStartElement(name)
                writer.WriteString(value)
                children |> Seq.iter (fun child -> Write child)
                writer.WriteEndElement()
        Write this                
    override this.ToString() =
        let output = StringBuilder()             
        using (new XmlTextWriter(new StringWriter(output), 
                Formatting=Formatting.Indented))
            this.WriteContentTo        
        output.ToString()
let item = 
    Element("item","",
        [ 
        Element("title",title,[])
        Element("link",link,[])
        Element("description",description,[])
        ])
item.ToString()

 

XML Writing Comparison Summary Table (faster times are better)

Technique Time Remarks
string concat 11 Concise, fastest, but strings not escaped
XmlWriter 31 Verbose
XmlDocument 39 Verbose
XElement 44 Concise
F# Tree 24 Concise

 

XML Reading Examples

Event based string parser

/// Example event based XML parser (a bit like SAX)
type XmlEvent =
    | Element of string * string
    | EndElement of string              
    static member Parse (xml:string) f =                       
        let opens, closes =                
            [0..(xml.Length-1)] |> Seq.fold (fun (xs,ys) i ->              
                match xml.Chars(i) with
                | '<' -> (i::xs,ys)
                | '>' -> (xs,i::ys)
                | _ -> (xs,ys)
            ) ([],[])        
        let lastValue = (List.hd closes, xml.Length)
        let tags = Seq.zip (opens |> List.rev) (closes |> List.rev)
        let values =
            Seq.append              
                (Seq.pairwise tags 
                    |> Seq.map (fun ((_,end1),(start2,_)) -> (end1,start2)))
                [lastValue]                     
        Seq.zip tags values
        |> Seq.iter (fun ((tagStart,tagEnd),(valStart,valEnd)) ->           
            let (|EmptyTag|_|) (tag:string) = 
                if tag.EndsWith("/") then 
                    Some(tag.Substring(0,tag.Length-1)) else None            
            let (|EndTag|_|) (tag:string) =
                if tag.StartsWith("/") then 
                    Some(tag.Substring(1,tag.Length-1)) else None
            let (|ProcessingInstruction|_|) (tag:string) =
                if tag.StartsWith("?") && tag.EndsWith("?") then 
                    Some(tag.Substring(1, tag.Length-2)) else None   
            let tag = xml.Substring(tagStart+1, tagEnd-(tagStart+1)) 
            let value = xml.Substring(valStart+1, valEnd-(valStart+1))                  
            match tag with
            | EmptyTag name -> f (Element(name,"")); f(EndElement(name))
            | EndTag name -> f (EndElement(name))             
            | ProcessingInstruction _ -> ()                        
            | _ -> f (Element(tag,value.Trim()))            
        )        
type RssItem =
    { 
        mutable Title : String option
        mutable Link : String option
        mutable Description : String option        
    }
    static member Empty =
        { Title=None; Link=None; Description=None }
let item = RssItem.Empty
let MatchElement (name,value) =        
    match name with
    | "title" -> item.Title <- Some(value)
    | "link" -> item.Link <- Some(value)
    | "description" -> item.Description <- Some(value)
    | _ -> ()    
let tags = Stack<string>()
XmlEvent.Parse xml (fun event -> 
    match event with
    | Element (name,value) -> tags.Push(name); MatchElement(name,value); 
    | EndElement name -> let tag = tags.Pop() in Debug.Assert((tag = name))
)
item

 

XmlReader

use reader = XmlReader.Create(new StringReader(xml))
reader.MoveToElement() |> ignore
reader.ReadStartElement("item")
{ 
    Title = Some(reader.ReadElementString("title"))
    Link = Some(reader.ReadElementString("link"))
    Description = Some(reader.ReadElementString("description"))
}

 

XmlDocument

let doc = XmlDocument()
doc.LoadXml(xml)
let item = doc.DocumentElement
{
    Title = Some(item.["title"].InnerText)
    Link = Some(item.["link"].InnerText)
    Description = Some(item.["description"].InnerText)
}

 

XElement

open System.Xml.Linq
let e = XElement.Parse(xml)
{   
    Title = Some(e.Element(XName.op_Implicit("title")).Value);
    Link = Some(e.Element(XName.op_Implicit("link")).Value)
    Description = Some(e.Element(XName.op_Implicit("description")).Value)
}

F# Tree and XmlWriter

/// Element tree type
type ElementTree =     
    | ParentElement of string * ElementTree seq
    | ValueElement of string * string        
    | EmptyElement of string
    static member Parse (reader:XmlReader) =
        let rec ParseElement depth =                   
            let name = reader.Name                                                     
            let mutable value = None
            let mutable children = None            
            while reader.Read() && reader.Depth >= depth do
                match reader.NodeType with                
                | XmlNodeType.Element ->                    
                    let collection = 
                        match children with 
                        | Some xs -> xs 
                        | None -> 
                            let xs = new ResizeArray<ElementTree>() 
                            children <- Some xs
                            xs                                                                                            
                    match reader.IsEmptyElement with
                    | true -> EmptyElement reader.Name
                    | false -> ParseElement (reader.Depth+1)
                    |> collection.Add |> ignore                           
                | XmlNodeType.Text -> 
                    let builder =
                        match value with
                        | Some x -> x
                        | None -> let x = StringBuilder() in value <- Some x; x
                    builder.Append reader.Value |> ignore
                | _ -> ()                                                                             
            done                
            match children, value with
            | None, None -> EmptyElement(name) 
            | None, Some value -> ValueElement(name, value.ToString())
            | Some children, _ -> ParentElement(name, children)        
        reader.MoveToContent () |> ignore     
        ParseElement (reader.Depth+1)
    member element.WriteContentTo (writer:XmlWriter) =
        let rec WriteElement el =            
            match el with
            | ParentElement (name,children) ->
                writer.WriteStartElement(name)
                children |> Seq.iter (fun child -> WriteElement child)
                writer.WriteEndElement()
            | ValueElement (name,value) -> 
                writer.WriteElementString(name,value)
            | EmptyElement name -> 
                writer.WriteStartElement(name); 
                writer.WriteEndElement()
            writer.WriteWhitespace(Environment.NewLine) 
        WriteElement element
        writer.Close()             
    member element.ToXml () =
        let output = StringBuilder()
        let settings = XmlWriterSettings(Indent=true)
        using (XmlWriter.Create(output, settings))                        
            element.WriteContentTo    
        output.ToString()         
    member element.Name = 
        match element with         
        | ParentElement (name,_) -> name                
        | ValueElement (name,_) -> name
        | EmptyElement (name) -> name
    member element.Value = 
        match element with         
        | ParentElement (_,children) -> String.Empty
        | ValueElement (_,value) -> value
        | EmptyElement (_) -> String.Empty    
    member element.Children = 
        match element with
        | ParentElement (_, children) -> children
        | ValueElement (_,_) -> Seq.empty
        | EmptyElement (_) -> Seq.empty                 
    member element.FindElement name = 
        element.Children |> Seq.find (fun child -> child.Name = name)                

 

let reader = XmlReader.Create(new StringReader(xml))
let root = ElementTree.Parse(reader)
{
    Title = Some(root.FindElement("title").Value)
    Link = Some(root.FindElement("link").Value)
    Description = Some(root.FindElement("description").Value)
}    

 

XML Reading Comparison Summary Table (faster times are better)

Technique Time Remarks
Event based 1383 Verbose, error prone and ridiculously slow
XmlReader 73 Concise
XmlDocument 65 Concise
XElement 64 Concise - shame about implicit conversions
F# Tree 68 Concise

Note

For larger XML docs try other techniques like XPath and reflection based serialization e.g.: 

Comments are closed