meta/recipes-core/libxml/libxml2/CVE-2016-1836.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443

From 45752d2c334b50016666d8f0ec3691e2d680f0a0 Mon Sep 17 00:00:00 2001
From: Pranjal Jumde <pjumde@apple.com>
Date: Thu, 3 Mar 2016 11:50:34 -0800
Subject: [PATCH] Bug 759398: Heap use-after-free in xmlDictComputeFastKey
 <https://bugzilla.gnome.org/show_bug.cgi?id=759398>

* parser.c:
(xmlParseNCNameComplex): Store start position instead of a
pointer to the name since the underlying buffer may change,
resulting in a stale pointer being used.
* result/errors/759398.xml: Added.
* result/errors/759398.xml.err: Added.
* result/errors/759398.xml.str: Added.
* test/errors/759398.xml: Added test case.

Upstream-Status: Backport
CVE: CVE-2016-1836

Signed-off-by: Armin Kuster <akuster@mvista.com>

---
 parser.c                     |   9 +-
 result/errors/759398.xml     |   0
 result/errors/759398.xml.err |   9 ++
 result/errors/759398.xml.str |   5 +
 test/errors/759398.xml       | 326 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 344 insertions(+), 5 deletions(-)
 create mode 100644 result/errors/759398.xml
 create mode 100644 result/errors/759398.xml.err
 create mode 100644 result/errors/759398.xml.str
 create mode 100755 test/errors/759398.xml

Index: libxml2-2.9.2/parser.c
===================================================================
--- libxml2-2.9.2.orig/parser.c
+++ libxml2-2.9.2/parser.c
@@ -2010,6 +2010,7 @@ static int spacePop(xmlParserCtxtPtr ctx
 #define CUR (*ctxt->input->cur)
 #define NXT(val) ctxt->input->cur[(val)]
 #define CUR_PTR ctxt->input->cur
+#define BASE_PTR ctxt->input->base
 
 #define CMP4( s, c1, c2, c3, c4 ) \
   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
@@ -3484,7 +3485,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr c
     int len = 0, l;
     int c;
     int count = 0;
-    const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
+    size_t startPosition = 0;
 
 #ifdef DEBUG
     nbParseNCNameComplex++;
@@ -3494,7 +3495,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr c
      * Handler for more complex cases
      */
     GROW;
-    end = ctxt->input->cur;
+    startPosition = CUR_PTR - BASE_PTR;
     c = CUR_CHAR(l);
     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
@@ -3516,7 +3517,6 @@ xmlParseNCNameComplex(xmlParserCtxtPtr c
 	}
 	len += l;
 	NEXTL(l);
-	end = ctxt->input->cur;
 	c = CUR_CHAR(l);
 	if (c == 0) {
 	    count = 0;
@@ -3530,7 +3530,6 @@ xmlParseNCNameComplex(xmlParserCtxtPtr c
 	    ctxt->input->cur += l;
             if (ctxt->instate == XML_PARSER_EOF)
                 return(NULL);
-	    end = ctxt->input->cur;
 	    c = CUR_CHAR(l);
 	}
     }
@@ -3539,7 +3538,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr c
         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
         return(NULL);
     }
-    return(xmlDictLookup(ctxt->dict, end - len, len));
+    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
 }
 
 /**
Index: libxml2-2.9.2/result/errors/759398.xml.err
===================================================================
--- /dev/null
+++ libxml2-2.9.2/result/errors/759398.xml.err
@@ -0,0 +1,9 @@
+./test/errors/759398.xml:210: parser error : StartTag: invalid element name
+need to worry about parsers whi<! don't expand PErefs finding
+                                ^
+./test/errors/759398.xml:309: parser error : Opening and ending tag mismatch: spec line 50 and termdef
+and provide access to their content and structure.</termdef> <termdef
+                                                            ^
+./test/errors/759398.xml:309: parser error : Extra content at the end of the document
+and provide access to their content and structure.</termdef> <termdef
+                                                             ^
Index: libxml2-2.9.2/result/errors/759398.xml.str
===================================================================
--- /dev/null
+++ libxml2-2.9.2/result/errors/759398.xml.str
@@ -0,0 +1,5 @@
+./test/errors/759398.xml:210: parser error : internal error: detected an error in element content
+
+need to worry about parsers whi<! don't expand
+                               ^
+./test/errors/759398.xml : failed to parse
Index: libxml2-2.9.2/test/errors/759398.xml
===================================================================
--- /dev/null
+++ libxml2-2.9.2/test/errors/759398.xml
@@ -0,0 +1,326 @@
+<?xml version='1.0' encoding='ISO-8859-5' standalone='no'?>
+<!DOCTYPE spec SYSTEM "dtds/spec.dtd" [
+
+<!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 -->
+
+<!-- The words 'FINAL EDIT' in comments mark places where changes
+need to be made after approval of the document by the ERB, before
+publication.  -->
+
+<!ENTITY XML.version "1.0">
+<!ENTITY doc.date "10 February 1998">
+<!ENTITY iso6.doc.date "19980210">
+<!ENTITY w3c.doc.date "02-Feb-1998">
+<!ENTITY draft.day '10'>
+<!ENTITY draft.month 'February'>
+<!ENTITY draft.year '1998'>
+
+<!ENTITY WebSGML
+ 'WebSGML Adaptations Annex to ISO 8879'>
+
+<!ENTITY lt     "<">
+<!ENTITY gt     ">">
+<!ENTITY xmlpio "'&lt;?xml'">
+<!ENTITY pic    "'?>'">
+<!ENTITY br     "\n">
+<!ENTITY cellback '#c0d9c0'>
+<!ENTITY mdash  "--"> <!-- &#x2014, but nsgmls doesn't grok hex -->
+<!ENTITY com    "--">
+<!ENTITY como   "--">
+<!ENTITY comc   "--">
+<!ENTITY hcro   "&amp;#x">
+<!-- <!ENTITY nbsp "�"> -->
+<!ENTITY nbsp   "&#160;">
+<!ENTITY magicents "<code>amp</code>,
+<code>lt</code>,
+<code>gt</code>,
+<code>apos</code>,
+<code>quot</code>">
+
+<!-- audience and distribution status:  for use at publication time -->
+<!ENTITY doc.audience "public review and discussion">
+<!ENTITY doc.distribution "may be dislributed freely, as long as
+all text and legal notices remain intact">
+
+]>
+
+<!-- for Panorama *-->
+<?VERBATIM "eg" ?>
+
+<spec>
+<header>
+<title>Extensible Markup Language (XML) 1.0</title>
+<version></version>
+<w3c-designation>REC-xml-&iso6.doc.date;</w3c-designation>
+<w3c-doctype>W3C Recommendation</w3c-doctype>
+<pubdate><day>&draft.day;</day><month>&draft.month;</month><year>&draft.year;</year></pubdate>
+
+<publoc>
+<loc  href="http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;">
+http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;</loc>
+<loc  href="http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.xml">
+http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.xml</loc>
+<loc  href="http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.html">
+http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.html</loc>
+<loc  href="http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.pdf">
+http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.pdf</loc>
+<loc  href="http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.ps">
+http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.ps</loc>
+</publoc>
+<latestloc>
+<loc  href="http://www.w3.org/TR/REC-xml">
+htt����www.w3.org/TR/REC-xml</loc>
+</latestloc>
+<prevlocs>
+<loc  href="http://www.w3.org/TR/PR-xml-971208">
+http://www.w3.org/TR/PR-xml-971208</loc>
+<!--
+<loc  href='http://www.w3.org/TR/WD-xml-961114'>
+http://www.w3.org/TR/WD-xml-961114</loc>
+<loc  href='http://www.w3.org/TR/WD-xml-lang-970331'>
+http://www.w3.org/TR/WD-xml-lang-970331</loc>
+<loc  href='http://www.w3.org/TR/WD-xml-lang-970630'>
+http://www.w3.org/TR/WD-xml-lang-970630</loc>
+<loc  href='http://www.w3.org/TR/WD-xml-970807'>
+http://www.w3.org/TR/WD-xml-970807</loc>
+<loc  href='http://www.w3.org/TR/WD-xml-971117'>
+http://www.w3.org/TR/WD-xml-971117</loc>-->
+</prevlocs>
+<authlist>
+<author><name>Tim Bray</name>
+<affiliation>Textuality and Netscape</affiliation>
+<email
+href="mailto:tbray@textuality.com">tbray@textuality.com</email></author>
+<author><name>Jean Paoli</name>
+<affiliation>Microsoft</affiliation>
+<email href="mailto:jeanpa@microsoft.com">jeanpa@microsoft.com</email></author>
+<author><name>C. M. Sperberg-McQueen</name>
+<affiliation>University of Illinois at Chicago</affiliation>
+<email href="mailto:cmsmcq@uic.edu">cmsmcq@uic.edu</email></author>
+</authlist>
+<abstract>
+<p>The Extensible Markup Language (XML) is a subset of
+SGML that is completely described in this document. Its goal is to
+enable generic SGML to be served, received, and processed on the Web
+in the way that is now possible with HTML. XML has been designed for
+ease of implementation and for interoperability with both SGML and
+HTML.</p>
+</abstract>
+<status>
+<p>This document has been reviewed by W3C Members and
+other interested parties and has been endorsed by the
+Director as a W3C Recommendation. It is a stable
+document and may be used as reference material or cited
+as a normative reference from another document. W3C's
+role in making the Recommendation is to draw attention
+to the spPcification and to promote its widespread
+deployment. This enhances the functionality and
+interoperability of the Web.</p>
+<p>
+This document specifies a syntax created by subsetting an existing,
+widely used international text processing standard (Standard
+Generalized Markup Language, ISO 8879:1986(E) as amended and
+corrected) for use on the World Wide Web.  It is a product of the W3C
+XML Activity, details of which can be found at <loc
+href='http://www.w3.org/XML'>http://www.w3.org/XML</loc>.  A list of
+current W3C Recommendations and other technical documents can be found
+at <loc href='http://www.w3.org/TR'>http://www.w3.org/TR</loc>.
+</p>
+<p>This specification uses the term URI, which is defined by <bibref
+ref="Berners-Lee"/>, a work in progress expected to update <bibref
+ref="RFC1738"/> and <bibref ref="RFC1808"/>.
+</p>
+<p>The list of known errors in this specification is
+available at
+<loc href='http://www.w3.org/XML/xml-19980210-errata'>http://www.w3.org/XML/xml-19980210-errata</loc>.</p>
+<p>Please report errors in this document to
+<loc href='mailto:xml-editor@w3.org'>xml-editor@w3.org</loc>.
+</p>
+</status>
+
+
+<pubstmt>
+<p>Chicago, Vancouver, Mountain View, et al.:
+World-Wide Web Consortium, XML Working Group, 1996, 1997.</p>
+</pubstmt>
+<sourcedesc>
+<p>Created in electronic form.</p>
+</sourcedesc>
+<langusage>
+<language id='EN'>English</language>
+<language id='ebnf'>Extended Backus-Naur Form (formal grammar)</language>
+</langusage>
+<revisiondesc>
+<slist>
+<sitem>1997-12-03 : CMSMcQ : yet further changes</sitem>
+<sitem>1997-12-02 : TB : further changes (see TB to XML WG,
+2 December 1997)</sitem>
+<sitem>1997-12-02 : CMSMcQ : deal with as many corrections and
+comments from the proofreaders as possible:
+entify hard-coded document date in pubdate element,
+change expansion of entity WebSGML,
+update status description as per Dan Connolly (am not sure
+about refernece to Berners-Lee et al.),
+add 'The' to abstract as per WG decision,
+move Relationship to Existing Standards to back matter and
+combine with References,
+re-order back matter so normative appendices come first,
+re-tag back matter so informative appendices are tagged informdiv1,
+remove XXX XXX from list of 'normative' specs in prose,
+move some references from Other References to Normative References,
+add RFC 1738, 1808, and 2141 to Other References (they are not
+normative since we do not require the processor to enforce any
+rules based on them),
+add reference to 'Fielding draft' (Berners-Lee et al.),
+move notation section to end of body,
+drop URIchar non-terminal and use SkipLit instead,
+lose stray reference to defunct nonterminal 'markupdecls',
+move reference to Aho et al. into appendix (Tim's right),
+add prose note saying that hash marks and fragment identifiers are
+NOT part of the URI formally speaking, and are NOT legal in
+system identifiers (processor 'may' signal an error).
+Work through:
+Tim Bray reacting to James Clark,
+Tim Bray on his own,
+Eve Maler,
+
+NOT DONE YET:
+change binary / text to unparsed / parsed.
+handle James's suggestion about &lt; in attriubte values
+uppercase hex characters,
+namechar list,
+</sitem>
+<sitem>1997-12-01 : JB : add some column-width parameters</sitem>
+<sitem>1997-12-01 : CMSMcQ : begin round of changes to incorporate
+recent WG decisions and other corrections:
+binding sources of character encoding info (27 Aug / 3 Sept),
+correct wording of Faust quotation (restore dropped line),
+drop SDD from EncodingDecl,
+change text at version number 1.0,
+drop misleading (wrong!) sentence about ignorables and extenders,
+modify defin�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xamples with Byte Order Mark.
+Add content model as a term and clarify that it applies to both
+mixed and element content.
+</sitem>
+<sitem>1997-06-30 : CMSMcQ : change date, some cosmetic changes,
+changes to productions for choice, seq, Mixed, NotationType,
+Enumeration.  Follow James Clark's suggestion and prohibit
+conditional sections in internal subset.  TO DO:  simplify
+production for ignored sections as a result, since we don't
+need to worry about parsers whi<! don't expand PErefs finding
+a conditional section.</sitem>
+<sitem>1997-06-29 : TB : various edits</sitem>
+<sitem>1997-06-29 : CMSMcQ : further changes:
+Suppress old FINAL EDIT comments and some dead material.
+Revise occurrences of % in grammar to exploit Henry Thompson's pun,
+especially markupdecl and attdef.
+Remove RMD requirement relating to element content (?).
+</sitem>
+<sitem>1997-06-28 : CMSMcQ : Various changes for 1 July draft:
+Add text for draconian error handling (introduce
+the term Fatal Error).
+RE deleta est (changing wording from
+original announcement to restrict the requirement to validating
+parsers).
+Tag definition of validawwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww it meant 'may or may not'.</sitem>
+<sitem>1997-03-21 : TB : massive changes on plane flight from Chicago
+to Vancouver</sitem>
+<sitem>1997-03-21 : CMSMcQ : correct as many reported errors as possible.
+</sitem>
+<sitem>1997-03-20 : CMSMcQ : correct typos listed in CMSMcQ hand copy of spec.</sitem>
+<sitem>1997 James Clark:
+Define the set of characters from which [^abc] subtracts.
+Charref should use just [0-9] not Digit.
+Location info needs cleaner treatment:  remove?  (ERB
+question).
+One example of a PI has wrong pic.
+Clarify discussion of encoding names.
+Encoding failure should lead to unspecified results; don't
+prescribe error recovery.
+Don't require exposure of entity boundaries.
+Ignore white space in element content.
+Reserve entity names of the form u-NNNN.
+Clarify relative URLs.
+And some of my own:
+Correct productions for content model:  model cannot
+consist of a name, so "elements ::= cp" is no good.
+</sitem>
+<sitem>1996-11-11 : CMSMcQ : revise for style.
+Add new rhs to entity declaration, for parameter entities.</sitem>
+<sitem>1996-11-10 : CMSMcQ : revise for style.
+Fix / complete section on names, characters.
+Add sections on parameter entities, conditional sections.
+Still to do:  Add compatibility note on deterministic content models.
+Finish stylistic revision.</sitem>
+<sitem>1996-10-31 : TB : Add Entity Handling section</sitem>
+<sitem>1996-10-30 : TB : Clean up term &amp; termdef.  Slip in
+ERB decision re EMPTY.</sitem>
+<sitem>1996-10-28 : TB : Change DTD.  Implement some of Michael's
+suggestions.  Change comments back to //.  Introduce language for
+XML namespace reservation.  Add section on white-space handling.
+Lots more cleanup.</sitem>
+<sitem>1996-10-24 : CMSMcQ : quick tweaks, implement some ERB
+decisions.  Characters are not integers.  Comments are /* */ not //.
+Add bibliographic refs to 10646, HyTime, Unicode.
+Rename old Cdata as MsData since it's <emph>only</emph> seen
+in marked sections.  Call them attribute-value pairs not
+name-value pairs, except once.  Internal subset is optional, needs
+'?'.  Implied attributes should be signaled to the app, not
+have values supplied by processor.</sitem>
+<sitem>1996-10-16 : TB : track down &amp; excise all DSD references;
+introduce some EBNF for entity declarations.</sitem>
+<sitem>1996-10-?? nsistency check, fix up scraps so
+they all parse, get formatter working, correct a few productions.</sitem>
+<sitem>1996-10-10/11 : CMSMcQ : various maintenance, stylistic, and
+organizational changes:
+Replace a few literals with xmlpio and
+pi""entities, to make them consistent and ensure we can change pic
+reliably when the ERB votes.
+Drop paragraph on recognizers from notation section.
+Add match, exact match to terminology.
+Move old 2.2 XML Processors and Apps into intro.
+Mention comments, PIs, and marked sections in discussion of
+delimiter escaping.
+Streamline discussion of doctype decl syntax.
+Drop old section of 'PI syntax' for doctype decl, and add
+section on partial-DTD summary PIs to end of Logical Structures
+section.
+Revise DSD syntax section to use Tim's subset-in-a-PI
+mechanism.</sitem>
+<sitem>1996-10-10 : TB : eliminate name recognizers (and more?)</sitem>
+<sitem>1996-10-09 : CMSMcQ : revise for style, consistency through 2.3
+(Characters)</sitem>
+<sitem>1996-10-09 : CMSMcQ : re-unite everything for convenience,
+at least temporarily, and revise quickly</sitem>
+<sitem>1996-10-08 : TB : first major homogenization pass</sitem>
+<sitem>1996-10-08 : TB : turn "current" attribute on div type into
+CDATA</sitem>
+<sitem>1996-10-02 : TB : remould into skeleton + entities</sitem>
+<sitem>1996-09-30 : CMSMcQ : add a few more sections prior to exchange
+                            with Tim.</sitem>
+<sitem>1996-09-20 : CMSMcQ : finish transcribing notes.</sitem>
+<sitem>1996-09-19 : CMSMcQ : begin transcribing notes for draft.</sitem>
+<sitem>1996-09-13 : CMSMcQ : made outline from notes of 09-06,
+do some housekeeping</sitem>
+</slist>
+</revisiondesc>
+</header>
+<�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������m> is used to read XML documents
+and provide access to their content and structure.</termdef> <termdef
+id="dt-app" term="Application">It is @ssumed that an XML processor is
+doing its work on behalf of another module, called the
+<term>application</term>.</termdef> This specification describes the
+required beh\vior of an XML processor in terms of how it must read XML
+data and the information it must provide to the application.</p>
+
+<div2 id='sec-origin-goals'>
+<head>Origin and Goals</head>
+<p>XML was developed by an XML Working Group (orisable over the
+Internet.</p></item>
+<item><p>XML shall support a wide varie�y of applications.</p></item>
+<item><p>XML shall be compatible with SGML.</p></item>
+<item><p>It shall be easy to write programs which process XML
+documents.</p></item>
+<item><p>The number of optional features in XML is to be kept to the
+absolute minimum, ideally zero.</p></item>
+<item><p>XML documents shou
\ No newline at end of file