- scala> val address = <address>
- | <CI_Address>
- | <deliveryPoint>
- | <CharacterString>Viale delle Terme di Caracalla
- | </CharacterString>
- | </deliveryPoint>
- | <city>
- | <CharacterString>Rome</CharacterString>
- | </city>
- | <administrativeArea>
- | <CharacterString />
- | </administrativeArea>
- | <postalCode>
- | <CharacterString>00153</CharacterString>
- | </postalCode>
- | <country>
- | <CharacterString>Italy</CharacterString>
- | </country>
- | <electronicMailAddress>
- | <CharacterString>jippe.hoogeveen@fao.org
- | </CharacterString>
- | </electronicMailAddress>
- | </CI_Address>
- | </address>
- address: scala.xml.Elem =
- <address>
- <CI_Address>
- ...
- // create a pretty printer for writing out the document nicely
- scala> val pp = new scala.xml.PrettyPrinter(80, 5);
- pp: scala.xml.PrettyPrinter = scala.xml.PrettyPrinter@6d87c12a
- // select the city
- scala> println( pp.formatNodes( address \ "CI_Address" \ "city" ) )
- <city>
- <gco:CharacterString>Rome</gco:CharacterString>
- </city>
- // a second way to select city
- scala> println( pp.formatNodes( address \\ "city" ) )
- <city>
- <gco:CharacterString>Rome</gco:CharacterString>
- </city>
- // select all characterStrings and print then one per line (unless there is a \n in the text)
- scala> (address \\ "CharacterString").mkString( "\n" )
- res2: String =
- <CharacterString>Viale delle Terme di Caracalla
- </CharacterString>
- <CharacterString>Rome</CharacterString>
- <CharacterString></CharacterString>
- <CharacterString>00153</CharacterString>
- <CharacterString>Italy</CharacterString>
- <CharacterString>jippe.hoogeveen@fao.org
- </CharacterString>
- // iterate over the city node and all of its child nodes.
- scala> println( pp.formatNodes( address \\ "city" \\ "_"))
- <city>
- <CharacterString>Rome</CharacterString>
- </city><CharacterString>Rome</CharacterString>
- // similar as above but iterate over all CI_Address nodes and each of its children
- scala>println( pp.formatNodes( address \\ "CI_Address" \\ "_"))
- <CI_Address>
- <deliveryPoint>
- <CharacterString>Viale delle Terme di Caracalla </CharacterString>
- </deliveryPoint>
- <city>
- <CharacterString>Rome</CharacterString>
- </city>
- <administrativeArea>
- <CharacterString></CharacterString>
- </administrativeArea>
- <postalCode>
- <CharacterString>00153</CharacterString>
- </postalCode>
- <country>
- <CharacterString>Italy</CharacterString>
- </country>
- <electronicMailAddress>
- <CharacterString>jippe.hoogeveen@fao.org </CharacterString>
- </electronicMailAddress>
- </CI_Address><deliveryPoint>
- <CharacterString>Viale delle Terme di Caracalla </CharacterString>
- </deliveryPoint><CharacterString>Viale delle Terme di Caracalla </CharacterString><city>
- <CharacterString>Rome</CharacterString>
- </city><CharacterString>Rome</CharacterString><administrativeArea>
- <CharacterString></CharacterString>
- </administrativeArea><CharacterString></CharacterString><postalCode>
- <CharacterString>00153</CharacterString>
- </postalCode><CharacterString>00153</CharacterString><country>
- <CharacterString>Italy</CharacterString>
- </country><CharacterString>Italy</CharacterString><electronicMailAddress>
- <CharacterString>jippe.hoogeveen@fao.org </CharacterString>
- </electronicMailAddress><CharacterString>jippe.hoogeveen@fao.org </CharacterString>
- // print all text
- scala> address.text
- res4: String =
-
-
- Viale delle Terme di Caracalla
-
-
-
- Rome
-
-
-
-
-
- 00153
-
-
- Italy
-
-
- jippe.hoogeveen@fao.org
-
- // print all character string text
- scala> (address \\ "CharacterString").text
- res3: String =
- Viale delle Terme di Caracalla
- Rome00153Italyjippe.hoogeveen@fao.org
-
- // print all character string text one per line
- scala> (address \\ "CharacterString").map( _.text ).mkString("\n")
- res6: String =
- Viale delle Terme di Caracalla
-
- Rome
- 00153
- Italy
- jippe.hoogeveen@fao.org
- // find the longest character string
- scala> (address \\ "CharacterString").reduceRight(
- | (elem, longest) => {
- | if( elem.text.length > longest.text.length ) elem
- | else longest
- | })
- res8: scala.xml.Node =
- <CharacterString>Viale delle Terme di Caracalla
- </CharacterString>
- // find the alphabetically first characterstring
- scala> (address \\ "CharacterString").reduceRight( (elem, longest) => {
- | if( elem.text > longest.text ) elem
- | else longest
- | })
- res9: scala.xml.Node =
- <CharacterString>jippe.hoogeveen@fao.org
- </CharacterString>
Thursday, August 27, 2009
XPath Style XML Selection
The xml API in scala allows xpath like (although not true xpath) queries. In combination with matching this makes it very easy to process XML documents. I am only going to discuss xpath style selection now. The code section is very long but primarily because the results are often quite lengthy.
No comments:
Post a Comment