1		/* xml_prob.pl : Contains xml_parse/[2,3] a bi-directional XML parser written in
2		* Prolog.
3		*
4		* Copyright (C) 2001-2005 Binding Time Limited
5		* Copyright (C) 2005 John Fletcher
6		*
7		* Current Release: $Revision$
8		*
9		* TERMS AND CONDITIONS:
10		*
11		* This program is offered free of charge, as unsupported source code. You may
12		* use it, copy it, distribute it, modify it or sell it without restriction,
13		* but entirely at your own risk.
14		*
15		*/
16
17		% minimally adapted for SICStus Prolog
18		% Mats Carlsson, 2003-2006
19		% added linefeed counting for attributes (attributes_count_lf,spaces_count_linebreaks)
20		% Michael Leuschel, 2017-2022 (changes marked by PATCH LEUSCHEL)
21		% replaces library(xml) for SICStus
22
23		:- module( xml_prob, [
24		xml_parse/2,
25		xml_parse/3,
26		xml_subterm/2,
27		xml_pp/1,
28
29		xml_parse/4
30		]).
31
32		:- use_module(module_information,[module_info/2]).
33		:- module_info(group,external_functions).
34		:- module_info(description,'This module transforms XML into a Prolog representation, adaptation of SICStus library module.').
35
36		:- use_module(probsrc(error_manager)).
37		:- use_module(probsrc(debug),[debug_format/3]).
38		:- use_module(probsrc(tools_strings),[ajoin/2]).
39
40		% added by Leuschel:
41		xml_parse( Chars, Document, Controls, Span ) :-
42		bb_put(xml_prob_current_span,Span),
43		call_cleanup(xml_parse( Chars, Document, Controls),
44		bb_delete(xml_prob_current_span,_)).
45
46		add_xml_warning(Msg,Term) :-
47		bb_get(xml_prob_current_span,Span),!,
48		add_warning(xml_prob,Msg,Term,Span).
49		add_xml_warning(Msg,Term) :-
50		add_warning(xml_prob,Msg,Term).
51
52		add_xml_error(Msg,Term) :-
53		bb_get(xml_prob_current_span,Span),!,
54		add_error(xml_prob,Msg,Term,Span).
55		add_xml_error(Msg,Term) :-
56		add_error(xml_prob,Msg,Term).
57		:- use_module(library(lists),[append/2]).
58		expect_tokens(Tokens,_) --> Tokens,!.
59		expect_tokens(Tokens,Ctxt,Input,_) :-
60		append(["XML parse error, expected '",Tokens,"' in ",Ctxt,", but got:"],Codes),
61		atom_codes(Msg,Codes),
62		length(Tokens,Len),
63		get_next_tokens(Input,Len,Nxt),
64		atom_codes(NxtAtom,Nxt),
65		add_xml_error(Msg,NxtAtom), % TO DO: extract next tokens
66		fail.
67
68		get_next_tokens(_,N,R) :- N<1,!, R=[].
69		get_next_tokens(Var,_,Res) :- var(Var),!, Res="<EOF>".
70		get_next_tokens([],_,"<EOF>").
71		get_next_tokens([H\|T],Nr,[H\|R]) :- N1 is Nr-1, get_next_tokens(T,N1,R).
72
73		:- use_module(library(types), [
74		must_be/4,
75		illarg/3,
76		illarg/4
77		]).
78		:- use_module(library(lists), [
79		select/3,
80		is_list/1
81		]).
82
83		:- set_prolog_flag(double_quotes, codes).
84
85		%@ This is a package for parsing XML with Prolog, which
86		%@ provides Prolog applications with a simple ``Document Value Model''
87		%@ interface to XML documents. A description of the subset of XML that it
88		%@ supports can be found at:
89		%@ @c [PM] 4.1 link updated 2009-08-14
90		%@ @uref{http://www.binding-time.co.uk/xmlpl.html}
91		%@
92		%@ The package, originally written by Binding Time Ltd., is in the public
93		%@ domain and unsupported. To use the package, enter the query:
94		%@
95		%@ @example
96		%@ @group
97		%@ \| ?- use_module(library(xml)).
98		%@ @end group
99		%@ @end example
100		%@
101		%@ The package represents XML documents by the abstract data type
102		%@ @var{document}, which is defined by the following grammar:
103		%@
104		%@ @multitable @columnfractions .2 .3 .5
105		%@ @item @var{document} @tab ::= @code{xml(@var{attributes},@var{content})} @tab @r{@{ well-formed document @}}
106		%@ @item @tab \| @code{malformed(@var{attributes},@var{content})} @tab @r{@{ malformed document @}}
107		%@ @item
108		%@ @item @var{attributes} @tab ::= @code{[]}
109		%@ @item @tab \| @code{[@var{name}=@var{char-data}\|@var{attributes}]}
110		%@ @item
111		%@ @item @var{content} @tab ::= @code{[]}
112		%@ @item @tab \| @code{[@var{cterm}\|@var{content}]}
113		%@ @item
114		%@ @item @var{cterm} @tab ::= @code{pcdata(@var{char-data})} @tab @r{@{ text @}}
115		%@ @item @tab \| @code{comment(@var{char-data})} @tab @r{@{ an XML comment @}}
116		%@ @item @tab \| @code{namespace(@var{URI},@var{prefix},@var{element})} @tab @r{@{ a Namespace @}}
117		%@ @item @tab \| @code{element(@var{tag},@var{attributes},@var{content})} @tab @r{@{ <@var{tag}>..</@var{tag}> encloses @var{content} or <@var{tag} /> if empty @}}
118		%@ @item @tab \| @code{instructions(@var{name},@var{char-data})} @tab @r{@{ A PI <? @var{name} @var{char-data} ?> @}}
119		%@ @item @tab \| @code{cdata(@var{char-data})} @tab @r{@{ <![CDATA[@var{char-data}]]> @}}
120		%@ @item @tab \| @code{doctype(@var{tag},@var{doctype-id})} @tab @r{@{ DTD <!DOCTYPE .. > @}}
121		%@ @item @tab \| @code{unparsed(@var{char-data})} @tab @r{@{ text that hasn't been parsed @}}
122		%@ @item @tab \| @code{out_of_context(@var{tag})} @tab @r{@{ @var{tag} is not closed @}}
123		%@ @item
124		%@ @item @var{tag} @tab ::= @dfn{atom} @tab @r{@{ naming an element @}}
125		%@ @item
126		%@ @item @var{name} @tab ::= @dfn{atom} @tab @r{@{ not naming an element @}}
127		%@ @item
128		%@ @item @var{URI} @tab ::= @dfn{atom} @tab @r{@{ giving the URI of a namespace @}}
129		%@ @item
130		%@ @item @var{char-data} @tab ::= @dfn{code-list}
131		%@ @item
132		%@ @item @var{doctype-id} @tab ::= @code{public(@var{char-data},@var{char-data})}
133		%@ @item @tab \| @code{public(@var{char-data},@var{dtd-literals})}
134		%@ @item @tab \| @code{system(@var{char-data})}
135		%@ @item @tab \| @code{system(@var{char-data},@var{dtd-literals})}
136		%@ @item @tab \| @code{local}
137		%@ @item @tab \| @code{local,@var{dtd-literals}}
138		%@ @item
139		%@ @item @var{dtd-literals} @tab ::= @code{[]}
140		%@ @item @tab \| @code{[dtd_literal(@var{char-data})\|@var{dtd-literals}]}
141		%@ @end multitable
142		%@
143		%@ The following predicates are exported by the package:
144		%@
145		%@ @table @code
146		%@ @item xml_parse(@var{?Chars}, @var{?Document})
147		%@ @itemx xml_parse(@var{?Chars}, @var{?Document}, @var{+Options})
148		%@ @PLXindex {xml_parse/[2,3] (xml)}
149		%@ Either parses @var{Chars}, a @dfn{code-list}, to @var{Document}, a
150		%@ @var{document}. @var{Chars} is not required to represent strictly
151		%@ well-formed XML.
152		%@ Or generates @var{Chars}, a @dfn{code-list},
153		%@ from @var{Document}, a @var{document}.
154		%@ If @var{Document} is not a valid @var{document} term representing
155		%@ well-formed XML, an exception is raised.
156		%@ In the second usage of the predicate, the only option available is @code{format/1}.
157		%@
158		%@ @var{Options} is a list of zero or more of the following, where
159		%@ @var{Boolean} must be @code{true} or @code{false}:
160		%@
161		%@ @table @code
162		%@ @item format(@var{Boolean})
163		%@ @findex format/1 (xml_parse/3 option)
164		%@ Indent the element content (default @code{true}).
165		%@
166		%@ @item extended_characters(@var{Boolean})
167		%@ @findex extended_characters/1 (xml_parse/3 option)
168		%@ Use the extended character entities for XHTML (default @code{true}).
169		%@
170		%@ @item remove_attribute_prefixes(@var{Boolean})
171		%@ @findex remove_attribute_prefixes/1 (xml_parse/3 option)
172		%@ Remove namespace prefixes from attributes when it's the same as the
173		%@ prefix of the parent element (default @code{false}).
174		%@ @end table
175		%@
176		%@ @item xml_subterm(@var{+Term}, @var{?Subterm})
177		%@ @PLXindex {xml_subterm/2 (xml)}
178		%@ Unifies @var{Subterm} with a sub-term of @var{Term}, a
179		%@ @var{document}. This can be especially useful when trying to test or
180		%@ retrieve a deeply-nested subterm from a document.
181		%@
182		%@ @item xml_pp(@var{+Document})
183		%@ @PLXindex {xml_pp/1 (xml)}
184		%@ ``Pretty prints'' @var{Document}, a @var{document}, on the current
185		%@ output stream.
186		%@ @end table
187
188
189		/* xml_parse(+?Chars, ?+Document[, +Controls]) parses Chars to/from a data
190		* structure of the form xml(<atts>, <content>). <atts> is a list of
191		* <atom>=<string> attributes from the (possibly implicit) XML signature of the
192		* document. <content> is a (possibly empty) list comprising occurrences of :
193		*
194		* pcdata(<string>) : Text
195		* comment(<string>) : An xml comment;
196		* element(<tag>,<atts>,<content>) : <tag>..</tag> encloses <content>
197		* : <tag /> if empty
198		* instructions(<atom>, <string>) : Processing <? <atom> <params> ?>"
199		* cdata( <string> ) : <![CDATA[ <string> ]]>
200		* doctype(<atom>, <doctype id>) : DTD <!DOCTYPE .. >
201		*
202		* The conversions are not completely symmetrical, in that weaker XML is
203		* accepted than can be generated. Specifically, in-bound (Chars -> Document)
204		* does not require strictly well-formed XML. Document is instantiated to the
205		* term malformed(Attributes, Content) if Chars does not represent well-formed
206		* XML. The Content of a malformed/2 structure can contain:
207		*
208		* unparsed( <string> ) : Text which has not been parsed
209		* out_of_context( <tag> ) : <tag> is not closed
210		*
211		* in addition to the standard term types.
212		*
213		* Out-bound (Document -> Chars) parsing _does_ require that Document defines
214		* strictly well-formed XML. If an error is detected a 'domain' exception is
215		* raised.
216		*
217		* The domain exception will attempt to identify the particular sub-term in
218		* error and the message will show a list of its ancestor elements in the form
219		* <tag>{(id)}* where <id> is the value of any attribute _named_ id.
220		*
221		* At this release, the Controls applying to in-bound (Chars -> Document)
222		* parsing are:
223		*
224		* extended_characters(<bool>) : Use the extended character
225		* : entities for XHTML (default true)
226		*
227		* format(<bool>) : Strip layouts when no character data
228		* : appears between elements.
229		* : (default true)
230		*
231		* remove_attribute_prefixes( <bool>) : Remove namespace prefixes from
232		* : attributes when it's the same as the
233		* : prefix of the parent element
234		* : (default false).
235		*
236		* [<bool> is one of 'true' or 'false']
237		*
238		* For out-bound (Document -> Chars) parsing, the only available option is:
239		*
240		* format(<Bool>) : Indent the element content
241		* : (default true)
242		*
243		* Different DCGs for input and output are used because input parsing is
244		* more flexible than output parsing. Errors in input are recorded as part
245		* of the data structure. Output parsing throws an exception if the document
246		* is not well-formed, diagnosis tries to identify the specific culprit term.
247		*/
248		xml_parse( Chars, Document ) :-
249		xml_parse( Chars, Document, [] ).
250
251		xml_parse( Chars, Document, Controls ) :-
252		Goal = xml_parse(Chars,Document,Controls),
253		% format, extended_characters, remove_attribute_prefixes
254		must_be(Controls, proper_list, Goal, 3),
255		xml_options(Controls, opt(true,true,false), Options, Goal, 3),
256		( ground( Chars ) ->
257		xml_to_document( Options, Chars, Document )
258		; ground(Document) ->
259		document_to_xml( Options, Document, Chars )
260		; illarg(var, Goal, 0)
261		), !. /MC/
262
263		xml_options([], Opt, Opt, _, _) :- !.
264		xml_options([X\|L], Opt0, Opt, Goal, ArgNo) :- !,
265		( callable(X),
266		xml_option(X, Opt0, Opt1) -> true
267		; illarg(domain(term,xml_parse_option), Goal, ArgNo, X)
268		),
269		xml_options(L, Opt1, Opt, Goal, ArgNo).
270
271		xml_option(format(X), opt(_,B,C), opt(X,B,C)) :-
272		bool_option(X).
273		xml_option(extended_characters(X), opt(A,_,C), opt(A,X,C)) :-
274		bool_option(X).
275		xml_option(remove_attribute_prefixes(X), opt(A,B,_), opt(A,B,X)) :-
276		bool_option(X).
277
278		bool_option(X) :- var(X), !, fail.
279		bool_option(false).
280		bool_option(true).
281
282		document_to_xml( opt(Format,_,_), Document, Chars ) :-
283	?	( document_generation(Format, Document, Chars0, [] ) ->
284		Chars = Chars0
285		;
286		xml_fault( Document, [], Culprit, Path, Message ),
287		throw(xml_parse(Message,Document,Culprit,Path))
288		).
289
290		:- multifile user:generate_message_hook/3.
291		user:generate_message_hook(xml_parse(Message,Document,Culprit,Path)) --> !,
292		['XML Parse: ~a in ~q'-[Message,Document],nl,
293		'Culprit: ~q'-[Culprit],nl],
294		( {Path==[]} -> []
295		; ['Path: ~s'-[Path],nl]
296		).
297
298
299		/* xml_subterm( +XMLTerm, ?Subterm ) unifies Subterm with a sub-term of Term.
300		* Note that XMLTerm is a sub-term of itself.
301		*/
302		xml_subterm( Term, Term ).
303		xml_subterm( xml(_Attributes, Content), Term ) :-
304		xml_subterm( Content, Term ).
305		xml_subterm( [H\|T], Term ) :-
306		( xml_subterm( H, Term )
307		; xml_subterm( T, Term )
308		).
309		xml_subterm( element(_Name,_Attributes,Content), Term ) :-
310		xml_subterm( Content, Term ).
311		xml_subterm( namespace(_URI,_Prefix,Content), Term ) :-
312		xml_subterm( Content, Term ).
313
314		/* xml is intended to be a rather modular module: it should be easy to
315		* build a program that can output XML, but not read it, or vice versa.
316		* Similarly, you may be happy to dispense with diagnosis once you are
317		* sure that your code will only try to make valid calls to xml_parse/2.
318		*
319		* It is intended that the code should be very portable too. Clearly,
320		* some small changes will be needed between platforms, but these should
321		* be limited to xml_utilities. xml_utilities contains most of the shared
322		* code and most of the potentially non-portable code.
323		*/
324		/* xml_acquisition.pl : XML -> Document translation.
325		*
326		* $Revision$
327		*
328		*/
329
330		/* xml_to_document( +Options, +XML, ?Document ) translates the list of
331		* character codes XML into the Prolog term Document. Options is
332		* the parsed options list.
333		*/
334		xml_to_document( Options, XML, Document ) :-
335		initial_context( Options, Context ),
336		debug_format(19,'Parsing XML:~n~s~n',[XML]), %PATCH: LEUSCHEL
337		( xml_declaration( Attributes0, XML, XML1 ) ->
338		Attributes = Attributes0
339		;
340		format('No <xml> header declaration~n',[]), %PATCH LEUSCHEL
341		XML1 = XML,
342		Attributes = []
343		),
344		xml_to_document( XML1, Context, Terms, [], WellFormed ),
345		xml_to_document1( WellFormed, Attributes, Terms, Document ).
346
347		xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ).
348		xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
349
350		% unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
351		unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
352
353		xml_declaration( Attributes ) -->
354		spaces,
355		"<?",
356		nmtoken( xml ),
357		xml_declaration_attributes( Attributes ),
358		spaces,
359		"?>".
360
361		xml_to_document( [], Context, Terms, [], WF ) :-
362		close_context( Context, Terms, WF ).
363		xml_to_document( [Char\|Chars], Context, Terms, Residue, WF ) :-
364		( Char =:= "<" ->
365		xml_markup_structure( Chars, Context, Terms, Residue, WF )
366		; Char =:= "&" ->
367		entity_reference( Chars, Context, Terms, Residue, WF )
368		; Char =< " ",
369		\+ space_preserve( Context ) ->
370		layouts( Chars, Context, [Char\|T], T, Terms, Residue, WF )
371		; void_context( Context ) ->
372		unparsed( [Char\|Chars], Context, Terms, Residue, WF )
373		;
374		Terms = [pcdata([Char\|Chars1])\|Terms1],
375		acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
376		).
377
378		layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
379		close_context( Context, Terms, WF ).
380		layouts( [Char\|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
381		( Char =:= "<" ->
382		xml_markup_structure( Chars, Context, Terms, Residue, WF )
383		; Char =:= "&" ->
384		entity_reference( Chars, Context, Terms, Residue, WF )
385		; Char =< " " ->
386		Minus = [Char\|Minus1],
387		layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
388		; void_context( Context ) ->
389		unparsed( [Char\|Chars], Context, Terms, Residue, WF )
390		;
391		Terms = [pcdata(Plus)\|Terms1],
392		Minus = [Char\|Chars1],
393		context_update( space_preserve, Context, true, Context1 ),
394		acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
395		).
396
397		acquire_pcdata( [], Context, [], Terms, [], WF ) :-
398		close_context( Context, Terms, WF ).
399		acquire_pcdata( [Char\|Chars], Context, Chars1, Terms, Residue, WF ) :-
400		( Char =:= "<" ->
401		Chars1 = [],
402		xml_markup_structure( Chars, Context, Terms, Residue, WF )
403		; Char =:= "&" ->
404		reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
405		;
406		Chars1 = [Char\|Chars2],
407		acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
408		).
409
410		xml_markup_structure( [], Context, Terms, Residue, WF ) :-
411		unparsed( "<", Context, Terms, Residue, WF ).
412		xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
413		Chars = [Char\|Chars1],
414		( Char =:= "/" ->
415		closing_tag( Context, Chars1, Terms, Residue, WF )
416		; Char =:= "?" ->
417		pi_acquisition( Chars1, Context, Terms, Residue, WF )
418		; Char =:= "!" ->
419		declaration_acquisition( Chars1, Context, Terms, Residue, WF )
420		; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
421		push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
422		;
423		unparsed( [0'<\|Chars], Context, Terms, Residue, WF ) %'
424		).
425
426		push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
427		new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
428		push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
429
430		push_tag1( true, Context, Term, Chars, [Term\|Terms], Residue, WF ) :-
431		xml_to_document( Chars, Context, Terms, Residue, WF ).
432		push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
433
434		new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
435		namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
436		( append( NSChars, [0':\|TagChars1], TagChars ), %'
437		specific_namespace( NSChars, Context1, SpecificNamespace ) ->
438		Namespace0 = SpecificNamespace
439		;
440		NSChars = "",
441		TagChars1 = TagChars,
442		default_namespace( Context1, Namespace0 )
443		),
444		current_namespace( Context1, CurrentNamespace ),
445		( Namespace0 == CurrentNamespace ->
446		Term = element(Tag, Attributes, Contents),
447		Context2 = Context1
448		;
449		Term = namespace( Namespace0, NSChars,
450		element(Tag, Attributes, Contents)
451		),
452		context_update( current_namespace, Context1, Namespace0, Context2 )
453		),
454		input_attributes( Attributes1, Context2, Attributes ),
455		atom_codes( Tag, TagChars1 ),
456		close_tag( Type, Chars, Context2, Contents, Residue, WF ).
457
458		close_tag( empty, Residue, _Context, [], Residue, true ).
459		close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
460		context_update( element, Context0, Tag, Context1 ),
461		xml_to_document( Chars, Context1, Contents, Residue, WF ).
462
463		pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
464		( inline_instruction(Target, Processing, Chars, Rest ),
465		Target \== xml ->
466		Terms = [instructions(Target, Processing)\|Terms1],
467		xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
468		;
469		unparsed( [0'<,0'?\|Chars], Context, Terms, Residue, WellFormed )
470		).
471
472		declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
473		( declaration_type( Chars, Type, Chars1 ),
474		declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
475		Terms = [Term\|Terms1],
476		xml_to_document( Rest, Context1, Terms1, Residue, WF )
477		;
478		unparsed( [0'<,0'!\|Chars], Context, Terms, Residue, WF )
479		).
480
481		open_tag( Tag, Namespaces, Attributes, Termination ) -->
482		nmtoken_chars( Tag ),
483		attributes( Attributes, [], Namespaces ),
484		spaces,
485	?	(open_tag_terminator( Tag, Termination ) -> []
486		; {atom_codes(ATag,Tag), add_xml_warning('XML tag not properly terminated with >: ',ATag),fail}
487		).
488
489		open_tag_terminator( Tag, push(Tag) ) -->
490		">".
491		open_tag_terminator( _Tag, empty ) -->
492		"/>".
493
494		declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
495		comment(Comment).
496		declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
497		cdata( CData ).
498		declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
499		doctype( Name, Names, Namespaces0, Namespaces ),
500		spaces,
501		">".
502
503		inline_instruction( Target, Processing, Plus, Minus ) :-
504		nmtoken(Target, Plus, Mid0 ),
505		spaces( Mid0, Mid1 ),
506		append( Processing, [0'?,0'>\|Minus], Mid1 ),
507		!.
508
509		entity_reference_name( Reference ) -->
510		nmtoken_chars( Reference ),
511		";".
512
513		declaration_type( [Char1,Char2\|Chars1], Class, Rest ) :-
514		Chars = [Char1,Char2\|Chars1],
515		( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
516		Class = Class0,
517		Rest = Residue
518		;
519		Class = generic,
520		Rest = Chars
521		).
522
523		declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
524		declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
525		append( "DATA[", Residue, Chars ).
526		declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
527		append( "CTYPE", Residue, Chars ).
528
529		closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
530		( closing_tag_name( Tag, Chars, Rest ),
531		current_tag( Context, Tag ) ->
532		Terms = [],
533		Residue = Rest,
534		WellFormed = true
535		;
536		unparsed( [0'<,0'/\|Chars], Context, Terms, Residue, WellFormed )
537		).
538
539		closing_tag_name( Tag ) -->
540		nmtoken_chars( Tag ),
541		spaces,
542		">".
543
544		entity_reference( Chars, Context, Terms, Residue, WF ) :-
545		( standard_character_entity( Char, Chars, Rest ) ->
546		Terms = [pcdata([Char\|Chars1])\|Terms1],
547		acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
548		; entity_reference_name( Reference, Chars, Rest ),
549		defined_entity( Reference, Context, String ) ->
550		append( String, Rest, Full ),
551		xml_to_document( Full, Context, Terms, Residue, WF )
552		;
553		unparsed( [0'&\|Chars], Context, Terms, Residue, WF ) %'
554		).
555
556		reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
557	?	( standard_character_entity(Char, Chars0, Rest ) ->
558		Chars1 = [Char\|Chars2],
559		acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
560		; entity_reference_name(Reference, Chars0, Rest ),
561		defined_entity( Reference, Context, String ) ->
562		append( String, Rest, Full ),
563		acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
564		;
565		Chars1 = [],
566		unparsed( [0'&\|Chars0], Context, Terms, Residue, WF ) %'
567		).
568
569		namespace_attributes( [], Context, Context, [] ).
570		namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
571		Attributes0 = [_\|_],
572		append( "xmlns:", Unqualified, QualifiedNameChars ),
573	?	( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
574		atom_codes( URI, Value ),
575		context_update( default_namespace, Context0, URI, Context1 ),
576		namespace_attributes( Attributes1, Context1, Context, Attributes )
577	?	; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
578		Attributes = [QualifiedNameChars=Value\|Attributes2],
579		atom_codes( URI, Value ),
580		context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
581		namespace_attributes( Attributes1, Context1, Context, Attributes2 )
582		; member( "xml:space"="preserve", Attributes0 ) ->
583		Attributes = Attributes0,
584		context_update( space_preserve, Context0, true, Context )
585		;
586		Context = Context0,
587		Attributes = Attributes0
588		).
589
590		input_attributes( [], _Context, [] ).
591		input_attributes( [NameChars=Value\|Attributes0], Context,
592		[Name=Value\|Attributes] ) :-
593		( remove_attribute_prefixes( Context ),
594		append( NSChars, [0':\|NameChars1], NameChars ), %'
595		NSChars \== "xmlns",
596		specific_namespace( NSChars, Context, Namespace ),
597		current_namespace( Context, Namespace ) ->
598		atom_codes( Name, NameChars1 )
599		;
600		atom_codes( Name, NameChars )
601		),
602		input_attributes( Attributes0, Context, Attributes ).
603
604		attributes( Attributes, Seen, Namespaces ) -->
605		attributes_count_lf(0, Attributes, Seen, Namespaces ). % PATCH LEUSCHEL: add count parameter
606
607		attributes_count_lf(Count0, [Name=Value\|Attributes], Seen, Namespaces ) -->
608		spaces_count_linebreaks(Count0,C1),
609		nmtoken_chars( Name ),
610		{\+ member(Name, Seen)},
611		spaces_count_linebreaks(C1,C2),
612		expect_tokens("=","attributes"),
613		spaces_count_linebreaks(C2,C3),
614		attribute_value( CountValue, Value, Namespaces, Name ), % Value does not contain the line breaks anymore, so we have to count them in attribute_layouts.
615		!,
616		{C4 is C3+CountValue},
617		attributes_count_lf( C4, Attributes, [Name\|Seen], Namespaces ).
618		attributes_count_lf( 0, [], _Seen, _Namespaces ) --> !, "". % ,{print(no_linefeeds),nl}.
619		attributes_count_lf( Count, ["$attribute_linefeeds"=Count], _Seen, _Namespaces ) --> "". %, {print(linefeeds(Count)),nl}.
620
621		spaces_count_linebreaks(C,C, [], [] ).
622		spaces_count_linebreaks(Count,ResCount, [Char\|Chars0], Chars1 ) :-
623		( Char=10 ->
624		Count1 is Count+1, spaces_count_linebreaks(Count1, ResCount, Chars0, Chars1 )
625		; Char =< 32 ->
626		spaces_count_linebreaks(Count,ResCount, Chars0, Chars1 )
627		;
628		Count=ResCount, Chars1 = [Char\|Chars0]
629		).
630		% END PATCH
631
632		xml_declaration_attributes( Res ) -->
633		spaces1,!,
634		xml_declaration_attributes( Res ).
635		xml_declaration_attributes( Res ) -->
636		nmtoken( Name ), !,
637		spaces,
638		expect_tokens("=","declaration attribute"),
639		spaces,
640		xml_string( Value ),
641	?	{xml_declaration_attribute_valid(Name, Value)
642		-> Res = [Name-Value\|Attributes] ; Res = [Attributes]}, % PATCH LEUSCHEL: also continue with invalid declarations
643		xml_declaration_attributes( Attributes ),
644		spaces.
645		xml_declaration_attributes( [] ) --> "".
646
647		doctype( Name, External, Namespaces0, Namespaces1 ) -->
648		spaces,
649		nmtoken( Name ),
650		spaces,
651		doctype_id( External0 ),
652		spaces,
653		doctype1( Namespaces0, Literals, Namespaces1 ),
654		{doctype_extension(Literals, External0, External)}.
655
656		doctype_extension( [], External, External ).
657		doctype_extension( [Literal\|Literals], External0, External ) :-
658		extended_doctype( External0, [Literal\|Literals], External ).
659
660		extended_doctype( system(URL), Literals, system(URL,Literals) ).
661		extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
662		extended_doctype( local, Literals, local(Literals) ).
663
664		doctype1( Namespaces0, Literals, Namespaces1 ) -->
665		"[",
666		!,
667		dtd( Namespaces0, Literals, Namespaces1 ),
668		expect_tokens("]","doctype").
669		doctype1( Namespaces, [], Namespaces ) --> "".
670
671		doctype_id( system(URL) ) -->
672		"SYSTEM",
673		spaces,
674		uri( URL ).
675		doctype_id( public(URN,URL) ) -->
676		"PUBLIC",
677		spaces,
678		uri( URN ),
679		spaces,
680		uri( URL ).
681		doctype_id( local ) --> "".
682
683		dtd( Namespaces0, Literals, Namespaces1 ) -->
684		spaces,
685		"<!ENTITY",
686		!,
687		spaces,
688		nmtoken_chars( Name ),
689		spaces,
690		quote( Quote ),
691		entity_value( Quote, Namespaces0, String ),
692		spaces,
693		expect_tokens(">","ENTITY"),
694		{\+ character_entity( Name, _StandardChar ),
695		% Don't allow < "e; etc. to be updated
696		context_update( entity(Name), Namespaces0, String, Namespaces2 )
697		},
698		dtd( Namespaces2, Literals, Namespaces1 ).
699		dtd( Namespaces0, Literals, Namespaces1 ) -->
700		spaces,
701		"<!--",
702		!,
703		dtd_comment,
704		expect_tokens(">","comment"),
705		dtd( Namespaces0, Literals, Namespaces1 ).
706		dtd( Namespaces0, [dtd_literal(Literal)\|Literals], Namespaces1 ) -->
707		spaces,
708		"<!",
709		!,
710		dtd_literal( Literal ),
711		dtd( Namespaces0, Literals, Namespaces1 ).
712		dtd( Namespaces, [], Namespaces ) --> spaces.
713
714		dtd_literal( [] ) --> ">", !.
715		dtd_literal( Chars ) -->
716		"--",
717		!,
718		dtd_comment,
719		dtd_literal( Chars ).
720		dtd_literal( [Char\|Chars] ) -->
721		[Char],
722		dtd_literal( Chars ).
723
724		dtd_comment( Plus, Minus ) :-
725		append( _Chars, [0'-,0'-\|Minus], Plus ),
726		!.
727
728		entity_value( Quote, Namespaces, String, [Char\|Plus], Minus ) :-
729		( Char == Quote ->
730		String = [],
731		Minus = Plus
732		; Char =:= "&" ->
733		reference_in_entity( Namespaces, Quote, String, Plus, Minus )
734		;
735		String = [Char\|String1],
736		entity_value( Quote, Namespaces, String1, Plus, Minus )
737		).
738
739		attribute_value( NL, String, Namespaces, Name ) --> % PATCH : add counter NL for line breaks in attribute values
740	?	(quote( Quote ) -> []
741		; {atom_codes(AN,Name), add_xml_warning('Expecting starting quotes for value of XML attribute: ',AN),fail}),
742		attribute_leading_layouts( NL, Quote, Namespaces, String ).
743
744		attribute_leading_layouts( 0, _Quote, _Namespace, [], [], [] ).
745		attribute_leading_layouts( NL, Quote, Namespaces, String, [Char\|Plus], Minus ) :-
746		( Char == Quote ->
747		String = [],
748		Minus = Plus,
749		NL = 0
750		; Char =:= "&" ->
751		reference_in_layout( NL, Namespaces, Quote, String, Plus, Minus )
752		; Char =:= 10 -> % count linebreaks for correct XML line numbers, but don't add to attribute value (according to https://www.w3.org/TR/1998/REC-xml-19980210#AVNormalize)
753		attribute_layouts( OldL, Quote, Namespaces, false, String, Plus, Minus ),
754		NL is OldL+1
755		; Char > 32, Char \== 160 ->
756		String = [Char\|String1],
757		attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
758		;
759		attribute_leading_layouts( NL, Quote, Namespaces, String, Plus, Minus )
760		).
761
762		attribute_layouts( 0, _Quote, _Namespaces, _Layout, [], [], [] ).
763		attribute_layouts( NL, Quote, Namespaces, Layout, String, [Char\|Plus], Minus ) :-
764		( Char == Quote ->
765		String = [],
766		Minus = Plus,
767		NL = 0
768		; Char =:= "&" ->
769		reference_in_value( NL, Namespaces, Quote, Layout, String, Plus, Minus )
770		; Char =:= 10 -> % count linebreaks, same as above
771		attribute_layouts( OldL, Quote, Namespaces, false, String, Plus, Minus ),
772		NL is OldL+1
773		; Char > 32, Char \== 160 ->
774		( Layout == true ->
775		String = [0' ,Char\|String1] %'
776		;
777		String = [Char\|String1]
778		),
779		attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
780		;
781		attribute_layouts( NL, Quote, Namespaces, true, String, Plus, Minus )
782		). % PATCH end
783
784		reference_in_layout( NL, Namespaces, Quote, String, Plus, Minus ) :-
785		( standard_character_entity( Char, Plus, Mid ) ->
786		String = [Char\|String1],
787		attribute_layouts( NL, Quote, Namespaces, false, String1, Mid, Minus )
788		; entity_reference_name( Name, Plus, Suffix ),
789		defined_entity( Name, Namespaces, Text ) ->
790		append( Text, Suffix, Mid ),
791		attribute_leading_layouts( NL, Quote, Namespaces, String, Mid, Minus )
792		; % Just & is okay in a value
793		String = [0'&\|String1], %'
794		attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
795		).
796
797		reference_in_value( Newlines, Namespaces, Quote, Layout, String, Plus, Minus ) :-
798		( standard_character_entity( Char, Plus, Mid ) ->
799		( Layout == true ->
800		String = [0' ,Char\|String1] %'
801		;
802		String = [Char\|String1]
803		),
804		Layout1 = false
805		; entity_reference_name( Name, Plus, Suffix ),
806		defined_entity( Name, Namespaces, Text ) ->
807		String = String1,
808		append( Text, Suffix, Mid ),
809		Layout1 = Layout
810		; % Just & is okay in a value
811		Mid = Plus,
812		String = [0'&\|String1], %'
813		Layout1 = false
814		),
815		attribute_layouts( Newlines, Quote, Namespaces, Layout1, String1, Mid, Minus ).
816
817		/* References are resolved backwards in Entity defintions so that
818		* circularity is avoided.
819		*/
820		reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
821		( standard_character_entity( _SomeChar, Plus, _Rest ) ->
822		String = [0'&\|String1], % ' Character entities are unparsed
823		Mid = Plus
824		; entity_reference_name( Name, Plus, Suffix ),
825		defined_entity( Name, Namespaces, Text ) ->
826		String = String1,
827		append( Text, Suffix, Mid )
828		),
829		entity_value( Quote, Namespaces, String1, Mid, Minus ).
830
831		standard_character_entity( Char ) -->
832		"#x", !, hex_character_reference( Char ), ";".
833		standard_character_entity( Char ) -->
834		"#", !, digit( Digit ), digits( Digits ), ";",
835		{number_codes( Char, [Digit\|Digits])}.
836		standard_character_entity( C ) -->
837	?	chars( String ),
838		";",
839		!,
840	?	{character_entity(String, C)}. % quot, amp, lt, gt, apos
841
842		uri( URI ) -->
843		quote( Quote ),
844		uri1( Quote, URI ).
845
846		uri1( Quote, [] ) -->
847		quote( Quote ),
848		!.
849		uri1( Quote, [Char\|Chars] ) -->
850		[Char],
851		uri1( Quote, Chars ).
852
853		comment( Chars, Plus, Minus ) :-
854		append( Chars, [0'-,0'-,0'>\|Minus], Plus ), %'
855		!.
856
857		cdata( Chars, Plus, Minus ) :-
858		append( Chars, [0'],0'],0'>\|Minus], Plus ), %'
859		!.
860		% Syntax Components
861
862		hex_character_reference( Code ) -->
863		hex_character_reference1( 0, Code ).
864
865		hex_character_reference1( Current, Code ) -->
866		hex_digit_char( Value ),
867		!,
868		{New is (Current << 4) + Value},
869		hex_character_reference1( New, Code ).
870		hex_character_reference1( Code, Code ) --> "".
871
872		hex_digit_char( 0 ) --> "0".
873		hex_digit_char( 1 ) --> "1".
874		hex_digit_char( 2 ) --> "2".
875		hex_digit_char( 3 ) --> "3".
876		hex_digit_char( 4 ) --> "4".
877		hex_digit_char( 5 ) --> "5".
878		hex_digit_char( 6 ) --> "6".
879		hex_digit_char( 7 ) --> "7".
880		hex_digit_char( 8 ) --> "8".
881		hex_digit_char( 9 ) --> "9".
882		hex_digit_char( 10 ) --> "A".
883		hex_digit_char( 11 ) --> "B".
884		hex_digit_char( 12 ) --> "C".
885		hex_digit_char( 13 ) --> "D".
886		hex_digit_char( 14 ) --> "E".
887		hex_digit_char( 15 ) --> "F".
888		hex_digit_char( 10 ) --> "a".
889		hex_digit_char( 11 ) --> "b".
890		hex_digit_char( 12 ) --> "c".
891		hex_digit_char( 13 ) --> "d".
892		hex_digit_char( 14 ) --> "e".
893		hex_digit_char( 15 ) --> "f".
894
895		quote( 0'" ) --> %'
896		"""".
897		quote( 0'\' ) -->
898		"'".
899
900		spaces( [], [] ).
901		spaces( [Char\|Chars0], Chars1 ) :-
902		( Char =< 32 ->
903		spaces( Chars0, Chars1 )
904		;
905		Chars1 = [Char\|Chars0]
906		).
907
908		spaces1( [Char\|Chars0], Chars1 ) :-
909		( Char =< 32 ->
910		spaces( Chars0, Chars1 )
911		).
912
913		nmtoken( Name ) -->
914		nmtoken_chars( Chars ),
915		{atom_codes(Name, Chars)}.
916
917		nmtoken_chars( [Char\|Chars] ) -->
918		[Char],
919		{nmtoken_first( Char )},
920		nmtoken_chars_tail( Chars ).
921
922		nmtoken_chars_tail( [Char\|Chars] ) -->
923		[Char],
924		{nmtoken_char(Char)},
925		!,
926		nmtoken_chars_tail( Chars ).
927		nmtoken_chars_tail([]) --> "".
928
929		nmtoken_first( 0': ).
930		nmtoken_first( 0'_ ).
931		nmtoken_first( Char ) :-
932		alphabet( Char ).
933
934		nmtoken_char( 0'a ).
935		nmtoken_char( 0'b ).
936		nmtoken_char( 0'c ).
937		nmtoken_char( 0'd ).
938		nmtoken_char( 0'e ).
939		nmtoken_char( 0'f ).
940		nmtoken_char( 0'g ).
941		nmtoken_char( 0'h ).
942		nmtoken_char( 0'i ).
943		nmtoken_char( 0'j ).
944		nmtoken_char( 0'k ).
945		nmtoken_char( 0'l ).
946		nmtoken_char( 0'm ).
947		nmtoken_char( 0'n ).
948		nmtoken_char( 0'o ).
949		nmtoken_char( 0'p ).
950		nmtoken_char( 0'q ).
951		nmtoken_char( 0'r ).
952		nmtoken_char( 0's ).
953		nmtoken_char( 0't ).
954		nmtoken_char( 0'u ).
955		nmtoken_char( 0'v ).
956		nmtoken_char( 0'w ).
957		nmtoken_char( 0'x ).
958		nmtoken_char( 0'y ).
959		nmtoken_char( 0'z ).
960		nmtoken_char( 0'A ).
961		nmtoken_char( 0'B ).
962		nmtoken_char( 0'C ).
963		nmtoken_char( 0'D ).
964		nmtoken_char( 0'E ).
965		nmtoken_char( 0'F ).
966		nmtoken_char( 0'G ).
967		nmtoken_char( 0'H ).
968		nmtoken_char( 0'I ).
969		nmtoken_char( 0'J ).
970		nmtoken_char( 0'K ).
971		nmtoken_char( 0'L ).
972		nmtoken_char( 0'M ).
973		nmtoken_char( 0'N ).
974		nmtoken_char( 0'O ).
975		nmtoken_char( 0'P ).
976		nmtoken_char( 0'Q ).
977		nmtoken_char( 0'R ).
978		nmtoken_char( 0'S ).
979		nmtoken_char( 0'T ).
980		nmtoken_char( 0'U ).
981		nmtoken_char( 0'V ).
982		nmtoken_char( 0'W ).
983		nmtoken_char( 0'X ).
984		nmtoken_char( 0'Y ).
985		nmtoken_char( 0'Z ).
986		nmtoken_char( 0'0 ).
987		nmtoken_char( 0'1 ).
988		nmtoken_char( 0'2 ).
989		nmtoken_char( 0'3 ).
990		nmtoken_char( 0'4 ).
991		nmtoken_char( 0'5 ).
992		nmtoken_char( 0'6 ).
993		nmtoken_char( 0'7 ).
994		nmtoken_char( 0'8 ).
995		nmtoken_char( 0'9 ).
996		nmtoken_char( 0'. ).
997		nmtoken_char( 0'- ).
998		nmtoken_char( 0'_ ).
999		nmtoken_char( 0': ).
1000
1001		xml_string( String ) -->
1002	?	quote( Quote ),!,
1003		xml_string1( Quote, String ).
1004
1005		xml_string1( Quote, [] ) -->
1006		quote( Quote ),
1007		!.
1008		xml_string1( Quote, [Char\|Chars] ) -->
1009		[Char],
1010		xml_string1( Quote, Chars ).
1011
1012		alphabet( 0'a ).
1013		alphabet( 0'b ).
1014		alphabet( 0'c ).
1015		alphabet( 0'd ).
1016		alphabet( 0'e ).
1017		alphabet( 0'f ).
1018		alphabet( 0'g ).
1019		alphabet( 0'h ).
1020		alphabet( 0'i ).
1021		alphabet( 0'j ).
1022		alphabet( 0'k ).
1023		alphabet( 0'l ).
1024		alphabet( 0'm ).
1025		alphabet( 0'n ).
1026		alphabet( 0'o ).
1027		alphabet( 0'p ).
1028		alphabet( 0'q ).
1029		alphabet( 0'r ).
1030		alphabet( 0's ).
1031		alphabet( 0't ).
1032		alphabet( 0'u ).
1033		alphabet( 0'v ).
1034		alphabet( 0'w ).
1035		alphabet( 0'x ).
1036		alphabet( 0'y ).
1037		alphabet( 0'z ).
1038		alphabet( 0'A ).
1039		alphabet( 0'B ).
1040		alphabet( 0'C ).
1041		alphabet( 0'D ).
1042		alphabet( 0'E ).
1043		alphabet( 0'F ).
1044		alphabet( 0'G ).
1045		alphabet( 0'H ).
1046		alphabet( 0'I ).
1047		alphabet( 0'J ).
1048		alphabet( 0'K ).
1049		alphabet( 0'L ).
1050		alphabet( 0'M ).
1051		alphabet( 0'N ).
1052		alphabet( 0'O ).
1053		alphabet( 0'P ).
1054		alphabet( 0'Q ).
1055		alphabet( 0'R ).
1056		alphabet( 0'S ).
1057		alphabet( 0'T ).
1058		alphabet( 0'U ).
1059		alphabet( 0'V ).
1060		alphabet( 0'W ).
1061		alphabet( 0'X ).
1062		alphabet( 0'Y ).
1063		alphabet( 0'Z ).
1064
1065		digit( C ) --> [C], {digit_table( C )}.
1066
1067		digit_table( 0'0 ).
1068		digit_table( 0'1 ).
1069		digit_table( 0'2 ).
1070		digit_table( 0'3 ).
1071		digit_table( 0'4 ).
1072		digit_table( 0'5 ).
1073		digit_table( 0'6 ).
1074		digit_table( 0'7 ).
1075		digit_table( 0'8 ).
1076		digit_table( 0'9 ).
1077
1078		digits( [Digit\|Digits] ) -->
1079		digit( Digit ),
1080		digits( Digits ).
1081		digits( [] ) --> [].
1082
1083		character_entity( "quot", 0'" ). %'
1084		character_entity( "amp", 0'& ). %'
1085		character_entity( "lt", 0'< ). %'
1086		character_entity( "gt", 0'> ). %'
1087		character_entity( "apos", 0'\' ).
1088		/* xml_diagnosis.pl : XML exception diagnosis.
1089		*
1090		* $Revision$
1091		*/
1092
1093		/* xml_fault( +Term, +Indentation, ?SubTerm, ?Path, ?Message ) identifies SubTerm
1094		* as a sub-term of Term which cannot be serialized after Indentation.
1095		* Message is an atom naming the type of error; Path is a string encoding a
1096		* list of SubTerm's ancestor elements in the form <tag>{(id)}* where <tag> is the
1097		* element tag and <id> is the value of any attribute _named_ id.
1098		*/
1099		xml_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1100		var( Term ).
1101		xml_fault( xml(Attributes,_Content), _Indent, Term, [], Message ) :-
1102		member( Attribute, Attributes ),
1103		attribute_fault( Attribute, Term, Message ).
1104		xml_fault( xml(_Attributes,Content), Indent, Culprit, Path, Message ) :-
1105		xml_content_fault( Content, Indent, Culprit, Path, Message ).
1106		xml_fault( Term, _Indent, Term, [], 'Illegal Term' ).
1107
1108		xml_content_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1109		var( Term ).
1110		xml_content_fault( pcdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1111		\+ is_chars( Chars ).
1112		xml_content_fault( cdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1113		\+ is_chars( Chars ).
1114		xml_content_fault( [H\|_T], Indent, Culprit, Path, Message ) :-
1115		xml_content_fault( H, Indent, Culprit, Path, Message ).
1116		xml_content_fault( [_H\|T], Indent, Culprit, Path, Message ) :-
1117		xml_content_fault( T, Indent, Culprit, Path, Message ).
1118		xml_content_fault( namespace(_URI,_Prefix,Element), Indent, Culprit, Path, Message ) :-
1119		element_fault( Element, [0' \|Indent], Culprit, Path, Message ).
1120		xml_content_fault( Element, Indent, Culprit, Path, Message ) :-
1121		element_fault( Element, [0' \|Indent], Culprit, Path, Message ).
1122		xml_content_fault( Term, Indent, Term, [], 'Illegal Term' ) :-
1123		\+ generation(Term, "", false, Indent, _Format, _Plus, _Minus ).
1124
1125		element_fault( element(Tag, _Attributes, _Contents), _Indent, Tag, [], 'Tag must be an atom' ) :-
1126		\+ atom( Tag ).
1127		element_fault( element(Tag, Attributes, _Contents), _Indent, Tag, [], 'Attributes must be instantiated' ) :-
1128		var( Attributes ).
1129		element_fault( element(Tag, Attributes, _Contents), _Indent, Faulty, Path, Message ) :-
1130		fault_path( Tag, Attributes, Path, [] ),
1131		member( Attribute, Attributes ),
1132		attribute_fault( Attribute, Faulty, Message ).
1133		element_fault( element(Tag, Attributes, Contents), Indent, Culprit, Path, Message ) :-
1134		fault_path( Tag, Attributes, Path, Path1 ),
1135		xml_content_fault( Contents, Indent, Culprit, Path1, Message ).
1136
1137		attribute_fault( Attribute, Attribute, 'Illegal Variable' ) :-
1138		var( Attribute ).
1139		attribute_fault( Name=Value, Name=Value, 'Attribute Name must be atom' ) :-
1140		\+ atom(Name).
1141		attribute_fault( Name=Value, Name=Value, 'Attribute Value must be chars' ) :-
1142		\+ is_chars( Value ).
1143		attribute_fault( Attribute, Attribute, 'Malformed Attribute' ) :-
1144		Attribute \= (_Name=_Value).
1145
1146		is_chars( Chars ) :-
1147		is_list( Chars ),
1148		\+ (member( Char, Chars ), \+ (integer(Char), Char >=0, Char =< 255)).
1149
1150		fault_path( Tag, Attributes ) -->
1151		{atom_codes( Tag, Chars )},
1152		chars( Chars ),
1153		fault_id( Attributes ),
1154		" ".
1155
1156		fault_id( Attributes ) -->
1157		{member( id=Chars, Attributes ), is_chars( Chars )},
1158		!,
1159		"(", chars(Chars), ")".
1160		fault_id( _Attributes ) --> "".
1161		/* xml_generation.pl : Document -> XML translation
1162		*
1163		* $Revision$
1164		*/
1165
1166		/* document_generation( +Format, +Document ) is a DCG generating Document
1167		* as a list of character codes. Format is true\|false defining whether layouts,
1168		* to provide indentation, should be added between the element content of
1169		* the resultant "string". Note that formatting is disabled for elements that
1170		* are interspersed with pcdata/1 terms, such as XHTML's 'inline' elements.
1171		* Also, Format is over-ridden, for an individual element, by an explicit
1172		* 'xml:space'="preserve" attribute.
1173		*/
1174		document_generation( Format, xml(Attributes, Document) ) -->
1175	?	document_generation_body( Attributes, Format, Document ).
1176
1177		document_generation_body( [], Format, Document ) -->
1178		generation( Document, "", Format, [], _Format1 ).
1179		document_generation_body( Attributes, Format, Document ) -->
1180		{ Attributes = [_\|_],
1181	?	xml_declaration_attributes_valid( Attributes )
1182		},
1183		"<?xml",
1184		generated_attributes( Attributes, Format, Format0 ),
1185		"?>",
1186		indent( true, [] ),
1187		generation( Document, "", Format0, [], _Format1 ).
1188
1189		generation( [], _Prefix, Format, _Indent, Format ) --> [].
1190		generation( [Term\|Terms], Prefix, Format0, Indent, Format ) -->
1191		generation( Term, Prefix, Format0, Indent, Format1 ),
1192		generation( Terms, Prefix, Format1, Indent, Format ).
1193		generation( doctype(Name, External), _Prefix, Format, [], Format ) -->
1194		"<!DOCTYPE ",
1195		generated_name( Name ),
1196		generated_external_id( External ),
1197		">".
1198		generation( instructions(Target,Process), _Prefix, Format, Indent, Format ) -->
1199		indent( Format, Indent ),
1200		"<?", generated_name(Target), " ", chars( Process ) ,"?>".
1201		generation( pcdata(Chars), _Prefix, _Format, _Indent, false ) -->
1202		pcdata_generation( Chars ).
1203		generation( comment( Comment ), _Prefix, Format, Indent, Format ) -->
1204		indent( Format, Indent ),
1205		"<!--", chars( Comment ), "-->".
1206		generation( namespace(URI, Prefix, element(Name, Atts, Content)),
1207		_Prefix0, Format, Indent, Format ) -->
1208		indent( Format, Indent ),
1209		"<", generated_prefixed_name( Prefix, Name ),
1210		generated_prefixed_attributes( Prefix, URI, Atts, Format, Format1 ),
1211		generated_content( Content, Format1, Indent, Prefix, Name ).
1212		generation( element(Name, Atts, Content), Prefix, Format, Indent, Format ) -->
1213		indent( Format, Indent ),
1214		"<", generated_prefixed_name( Prefix, Name ),
1215		generated_attributes( Atts, Format, Format1 ),
1216		generated_content( Content, Format1, Indent, Prefix, Name ).
1217		generation( cdata(CData), _Prefix, Format, Indent, Format ) -->
1218		indent( Format, Indent ),
1219		"<![CDATA[", cdata_generation(CData), "]]>".
1220
1221		generated_attributes( [], Format, Format ) --> [].
1222		generated_attributes( [Name=Value\|Attributes], Format0, Format ) -->
1223		{( Name == 'xml:space',
1224		Value="preserve" ->
1225		Format1 = false
1226		;
1227		Format1 = Format0
1228		)},
1229		" ",
1230		generated_name( Name ),
1231		"=""",
1232		quoted_string( Value ),
1233		"""",
1234		generated_attributes( Attributes, Format1, Format ).
1235
1236		generated_prefixed_name( [], Name ) -->
1237		generated_name( Name ).
1238		generated_prefixed_name( Prefix, Name ) -->
1239		{Prefix = [_\|_]},
1240		chars( Prefix ), ":",
1241		generated_name( Name ).
1242
1243		generated_content( [], _Format, _Indent, _Prefix, _Namespace ) -->
1244		" />". % Leave an extra space for XHTML output.
1245		generated_content( [H\|T], Format, Indent, Prefix, Namespace ) -->
1246		">",
1247		generation( H, Prefix, Format, [0' \|Indent], Format1 ),
1248		generation( T, Prefix, Format1, [0' \|Indent], Format2 ),
1249		indent( Format2, Indent ),
1250		"</", generated_prefixed_name( Prefix, Namespace ), ">".
1251
1252		generated_prefixed_attributes( [_\|_Prefix], _URI, Atts, Format0, Format ) -->
1253		generated_attributes( Atts, Format0, Format ).
1254		generated_prefixed_attributes( [], URI, Atts, Format0, Format ) -->
1255		{atom_codes( URI, Namespace ),
1256		findall( Attr, (member(Attr, Atts), Attr \= (xmlns=_Val)), Atts1 )
1257		},
1258		generated_attributes( [xmlns=Namespace\|Atts1], Format0, Format ).
1259
1260		generated_name( Name, Plus, Minus ) :-
1261		atom_codes( Name, Chars ),
1262		append( Chars, Minus, Plus ).
1263
1264		generated_external_id( local ) --> "".
1265		generated_external_id( local(Literals) ) --> " [",
1266		generated_doctype_literals( Literals ), "\n\t]".
1267		generated_external_id( system(URL) ) -->
1268		" SYSTEM """,
1269		chars( URL ),
1270		"""".
1271		generated_external_id( system(URL,Literals) ) -->
1272		" SYSTEM """,
1273		chars( URL ),
1274		""" [",
1275		generated_doctype_literals( Literals ), "\n\t]".
1276		generated_external_id( public(URN,URL) ) -->
1277		" PUBLIC """,
1278		chars( URN ),
1279		""" """,
1280		chars( URL ),
1281		"""".
1282		generated_external_id( public(URN,URL,Literals) ) -->
1283		" PUBLIC """,
1284		chars( URN ),
1285		""" """,
1286		chars( URL ),
1287		""" [",
1288		generated_doctype_literals( Literals ), "\n\t]".
1289
1290		generated_doctype_literals( [] ) --> "".
1291		generated_doctype_literals( [dtd_literal(String)\|Literals] ) --> "\n\t",
1292		"<!", cdata_generation( String ), ">",
1293		generated_doctype_literals( Literals ).
1294
1295		/* quoted_string( +Chars ) is a DCG representing Chars, a list of character
1296		* codes, as a legal XML attribute string. Any leading or trailing layout
1297		* characters are removed. &, " and < characters are replaced by &, "
1298		* and < respectively.
1299		*/
1300		quoted_string( Raw, Plus, Minus ) :-
1301		quoted_string1( Raw, NoLeadingLayouts ),
1302		quoted_string2( NoLeadingLayouts, Layout, Layout, Plus, Minus ).
1303
1304		quoted_string1( [], [] ).
1305		quoted_string1( [Char\|Chars], NoLeadingLayouts ) :-
1306		( Char > 32 ->
1307		NoLeadingLayouts = [Char\|Chars]
1308		;
1309		quoted_string1( Chars, NoLeadingLayouts )
1310		).
1311
1312		quoted_string2( [], _LayoutPlus, _LayoutMinus, List, List ).
1313		quoted_string2( [Char\|Chars], LayoutPlus, LayoutMinus, Plus, Minus ) :-
1314		( Char =< " " ->
1315		Plus = Plus1,
1316		LayoutMinus = [Char\|LayoutMinus1],
1317		LayoutPlus = LayoutPlus1
1318		; Char =< 127 ->
1319		Plus = LayoutPlus,
1320		pcdata_7bit( Char, LayoutMinus, Plus1 ),
1321		LayoutPlus1 = LayoutMinus1
1322	?	; legal_xml_unicode( Char ) ->
1323		Plus = LayoutPlus,
1324		number_codes( Char, Codes ),
1325		pcdata_8bits_plus( Codes, LayoutMinus, Plus1 ),
1326		LayoutPlus1 = LayoutMinus1
1327		;
1328		LayoutPlus = LayoutPlus1,
1329		LayoutMinus = LayoutMinus1,
1330		Plus = Plus1
1331		),
1332		quoted_string2( Chars, LayoutPlus1, LayoutMinus1, Plus1, Minus ).
1333
1334		indent( false, _Indent ) --> [].
1335		indent( true, Indent ) --> "\n",
1336		chars( Indent ).
1337
1338		/* pcdata_generation( +Chars ) is a DCG representing Chars, a list of character
1339		* codes as legal XML "Parsed character data" (PCDATA) string. Any codes
1340		* which cannot be represented by a 7-bit character are replaced by their
1341		* decimal numeric character entity e.g. code 160 (non-breaking space) is
1342		* represented as . Any character codes disallowed by the XML
1343		* specification are not encoded.
1344		*/
1345		pcdata_generation( [], Plus, Plus ).
1346		pcdata_generation( [Char\|Chars], Plus, Minus ) :-
1347		( Char =< 127 ->
1348		pcdata_7bit( Char, Plus, Mid )
1349		; legal_xml_unicode( Char ) ->
1350		number_codes( Char, Codes ),
1351		pcdata_8bits_plus( Codes, Plus, Mid )
1352		;
1353		Plus = Mid
1354		),
1355		pcdata_generation( Chars, Mid, Minus ).
1356
1357		/* pcdata_7bit(+Char) represents the ascii character set in its
1358		* simplest format, using the character entities & " < and >
1359		* which are common to both XML and HTML. The numeric entity ' is used in
1360		* place of ', because browsers don't recognize it in HTML.
1361		*/
1362		pcdata_7bit( 0 ) --> "".
1363		pcdata_7bit( 1 ) --> "".
1364		pcdata_7bit( 2 ) --> "".
1365		pcdata_7bit( 3 ) --> "".
1366		pcdata_7bit( 4 ) --> "".
1367		pcdata_7bit( 5 ) --> "".
1368		pcdata_7bit( 6 ) --> "".
1369		pcdata_7bit( 7 ) --> "".
1370		pcdata_7bit( 8 ) --> "".
1371		pcdata_7bit( 9 ) --> [9].
1372		pcdata_7bit( 10 ) --> [10].
1373		pcdata_7bit( 11 ) --> "".
1374		pcdata_7bit( 12 ) --> "".
1375		pcdata_7bit( 13 ) --> [13].
1376		pcdata_7bit( 14 ) --> "".
1377		pcdata_7bit( 15 ) --> "".
1378		pcdata_7bit( 16 ) --> "".
1379		pcdata_7bit( 17 ) --> "".
1380		pcdata_7bit( 18 ) --> "".
1381		pcdata_7bit( 19 ) --> "".
1382		pcdata_7bit( 20 ) --> "".
1383		pcdata_7bit( 21 ) --> "".
1384		pcdata_7bit( 22 ) --> "".
1385		pcdata_7bit( 23 ) --> "".
1386		pcdata_7bit( 24 ) --> "".
1387		pcdata_7bit( 25 ) --> "".
1388		pcdata_7bit( 26 ) --> "".
1389		pcdata_7bit( 27 ) --> "".
1390		pcdata_7bit( 28 ) --> "".
1391		pcdata_7bit( 29 ) --> "".
1392		pcdata_7bit( 30 ) --> "".
1393		pcdata_7bit( 31 ) --> "".
1394		pcdata_7bit( 32 ) --> " ".
1395		pcdata_7bit( 33 ) --> "!".
1396		pcdata_7bit( 34 ) --> """.
1397		pcdata_7bit( 35 ) --> "#".
1398		pcdata_7bit( 36 ) --> "$".
1399		pcdata_7bit( 37 ) --> "%".
1400		pcdata_7bit( 38 ) --> "&".
1401		pcdata_7bit( 39 ) --> "'".
1402		pcdata_7bit( 40 ) --> "(".
1403		pcdata_7bit( 41 ) --> ")".
1404		pcdata_7bit( 42 ) --> "*".
1405		pcdata_7bit( 43 ) --> "+".
1406		pcdata_7bit( 44 ) --> ",".
1407		pcdata_7bit( 45 ) --> "-".
1408		pcdata_7bit( 46 ) --> ".".
1409		pcdata_7bit( 47 ) --> "/".
1410		pcdata_7bit( 48 ) --> "0".
1411		pcdata_7bit( 49 ) --> "1".
1412		pcdata_7bit( 50 ) --> "2".
1413		pcdata_7bit( 51 ) --> "3".
1414		pcdata_7bit( 52 ) --> "4".
1415		pcdata_7bit( 53 ) --> "5".
1416		pcdata_7bit( 54 ) --> "6".
1417		pcdata_7bit( 55 ) --> "7".
1418		pcdata_7bit( 56 ) --> "8".
1419		pcdata_7bit( 57 ) --> "9".
1420		pcdata_7bit( 58 ) --> ":".
1421		pcdata_7bit( 59 ) --> ";".
1422		pcdata_7bit( 60 ) --> "<".
1423		pcdata_7bit( 61 ) --> "=".
1424		pcdata_7bit( 62 ) --> ">".
1425		pcdata_7bit( 63 ) --> "?".
1426		pcdata_7bit( 64 ) --> "@".
1427		pcdata_7bit( 65 ) --> "A".
1428		pcdata_7bit( 66 ) --> "B".
1429		pcdata_7bit( 67 ) --> "C".
1430		pcdata_7bit( 68 ) --> "D".
1431		pcdata_7bit( 69 ) --> "E".
1432		pcdata_7bit( 70 ) --> "F".
1433		pcdata_7bit( 71 ) --> "G".
1434		pcdata_7bit( 72 ) --> "H".
1435		pcdata_7bit( 73 ) --> "I".
1436		pcdata_7bit( 74 ) --> "J".
1437		pcdata_7bit( 75 ) --> "K".
1438		pcdata_7bit( 76 ) --> "L".
1439		pcdata_7bit( 77 ) --> "M".
1440		pcdata_7bit( 78 ) --> "N".
1441		pcdata_7bit( 79 ) --> "O".
1442		pcdata_7bit( 80 ) --> "P".
1443		pcdata_7bit( 81 ) --> "Q".
1444		pcdata_7bit( 82 ) --> "R".
1445		pcdata_7bit( 83 ) --> "S".
1446		pcdata_7bit( 84 ) --> "T".
1447		pcdata_7bit( 85 ) --> "U".
1448		pcdata_7bit( 86 ) --> "V".
1449		pcdata_7bit( 87 ) --> "W".
1450		pcdata_7bit( 88 ) --> "X".
1451		pcdata_7bit( 89 ) --> "Y".
1452		pcdata_7bit( 90 ) --> "Z".
1453		pcdata_7bit( 91 ) --> "[".
1454		pcdata_7bit( 92 ) --> [92].
1455		pcdata_7bit( 93 ) --> "]".
1456		pcdata_7bit( 94 ) --> "^".
1457		pcdata_7bit( 95 ) --> "_".
1458		pcdata_7bit( 96 ) --> "`".
1459		pcdata_7bit( 97 ) --> "a".
1460		pcdata_7bit( 98 ) --> "b".
1461		pcdata_7bit( 99 ) --> "c".
1462		pcdata_7bit( 100 ) --> "d".
1463		pcdata_7bit( 101 ) --> "e".
1464		pcdata_7bit( 102 ) --> "f".
1465		pcdata_7bit( 103 ) --> "g".
1466		pcdata_7bit( 104 ) --> "h".
1467		pcdata_7bit( 105 ) --> "i".
1468		pcdata_7bit( 106 ) --> "j".
1469		pcdata_7bit( 107 ) --> "k".
1470		pcdata_7bit( 108 ) --> "l".
1471		pcdata_7bit( 109 ) --> "m".
1472		pcdata_7bit( 110 ) --> "n".
1473		pcdata_7bit( 111 ) --> "o".
1474		pcdata_7bit( 112 ) --> "p".
1475		pcdata_7bit( 113 ) --> "q".
1476		pcdata_7bit( 114 ) --> "r".
1477		pcdata_7bit( 115 ) --> "s".
1478		pcdata_7bit( 116 ) --> "t".
1479		pcdata_7bit( 117 ) --> "u".
1480		pcdata_7bit( 118 ) --> "v".
1481		pcdata_7bit( 119 ) --> "w".
1482		pcdata_7bit( 120 ) --> "x".
1483		pcdata_7bit( 121 ) --> "y".
1484		pcdata_7bit( 122 ) --> "z".
1485		pcdata_7bit( 123 ) --> "{".
1486		pcdata_7bit( 124 ) --> "\|".
1487		pcdata_7bit( 125 ) --> "}".
1488		pcdata_7bit( 126 ) --> "~".
1489		pcdata_7bit( 127 ) --> "".
1490
1491		pcdata_8bits_plus( Codes ) -->
1492		"&#", chars( Codes ), ";".
1493
1494		/* cdata_generation( +Chars ) is a DCG representing Chars, a list of character
1495		* codes as a legal XML CDATA string. Any character codes disallowed by the XML
1496		* specification are not encoded.
1497		*/
1498		cdata_generation( [] ) --> "".
1499		cdata_generation( [Char\|Chars] ) -->
1500		( {legal_xml_unicode( Char )}, !, [Char]
1501		\| ""
1502		),
1503		cdata_generation( Chars ).
1504
1505		legal_xml_unicode( 9 ).
1506		legal_xml_unicode( 10 ).
1507		legal_xml_unicode( 13 ).
1508		legal_xml_unicode( Code ) :-
1509		Code >= 32,
1510		Code =< 55295.
1511		legal_xml_unicode( Code ) :-
1512		Code >= 57344,
1513		Code =< 65533.
1514		legal_xml_unicode( Code ) :-
1515		Code >= 65536,
1516		Code =< 1114111.
1517
1518		/* xml_pp: "pretty print" an XML Document on the current output stream.
1519		*
1520		* Current Release: 1.9
1521		*/
1522		/* xml_pp( +XMLDocument ) "pretty prints" XMLDocument on the current
1523		* output stream.
1524		*/
1525		xml_pp( xml(Attributes, Document) ) :-
1526		write( 'xml( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1527		xml_pp_list( Document, "\t" ),
1528		format( ' ).~n', [] ).
1529		xml_pp( malformed(Attributes, Document) ) :-
1530		write( 'malformed( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1531		xml_pp_list( Document, "\t" ),
1532		format( ' ).~n', [] ).
1533
1534		xml_pp_indented( [], Indent ) :-
1535		format( '~s[]', [Indent] ).
1536		xml_pp_indented( List, Indent ) :-
1537		List = [_\|_],
1538		format( '~s', [Indent] ),
1539		xml_pp_list( List, Indent ).
1540		xml_pp_indented( comment(Text), Indent ) :-
1541		format( '~scomment(', [Indent] ), pp_string(Text), put_code( 0') ).
1542		xml_pp_indented( namespace(URI,Prefix,Element), Indent ) :-
1543		format( '~snamespace( ~q, "~s",~n', [Indent,URI,Prefix] ),
1544		xml_pp_indented( Element, [0'\t\|Indent] ),
1545		format( '~n~s)', [[0'\t\|Indent]] ).
1546		xml_pp_indented( element(Tag,Attributes,Contents), Indent ) :-
1547		format( '~selement( ~q,~n', [Indent,Tag] ),
1548		pp_attributes( Attributes, [0'\t\|Indent] ), put_code(0',), nl,
1549		xml_pp_list( Contents, [0'\t\|Indent] ), write( ' )' ).
1550		xml_pp_indented( instructions(Target, Processing), Indent ) :-
1551		format( '~sinstructions( ~q, ', [Indent,Target] ),
1552		pp_string(Processing), put_code( 0') ).
1553		xml_pp_indented( doctype(Name, DoctypeId), Indent ) :-
1554		format( '~sdoctype( ~q, ', [Indent,Name] ),
1555		xml_pp_indented( DoctypeId, [0'\t\|Indent] ),
1556		write( ' )' ).
1557		xml_pp_indented( cdata(CData), Indent ) :-
1558		format( '~scdata(', [Indent] ), pp_string(CData), put_code( 0') ).
1559		xml_pp_indented( pcdata(PCData), Indent ) :-
1560		format( '~spcdata(', [Indent] ), pp_string(PCData), put_code( 0') ).
1561		xml_pp_indented( public(URN,URL), _Indent ) :-
1562		format( 'public( "~s", "~s" )', [URN,URL] ).
1563		xml_pp_indented( public(URN,URL,Literals), Indent ) :-
1564		format( 'public( "~s", "~s",~n', [URN,URL] ),
1565		xml_pp_list( Literals, [0'\t\|Indent] ), write( ' )' ). %'
1566		xml_pp_indented( system(URL), _Indent ) :-
1567		format( 'system( "~s" )', [URL] ).
1568		xml_pp_indented( system(URL,Literals), Indent ) :-
1569		format( 'system( "~s",~n', [URL] ),
1570		xml_pp_list( Literals, [0'\t\|Indent] ), write( ' )' ). %'
1571		xml_pp_indented( local, _Indent ) :-
1572		write( local ).
1573		xml_pp_indented( local(Literals), Indent ) :-
1574		write( 'local(' ), nl,
1575		xml_pp_list( Literals, [0'\t\|Indent] ), write( ' )' ). %'
1576		xml_pp_indented( dtd_literal(String), Indent ) :-
1577		format( '~sdtd_literal(', [Indent] ), pp_string(String), put_code( 0') ). %'
1578		xml_pp_indented( out_of_context(Tag), Indent ) :-
1579		format( '~s/* SYNTAX ERROR */ out_of_context( ~q )', [Indent,Tag] ).
1580		xml_pp_indented( unparsed(String), Indent ) :-
1581		format( '~s/* SYNTAX ERROR */ unparsed( ', [Indent] ),
1582		pp_string(String), put_code( 0') ).
1583
1584		xml_pp_list( [], Indent ) :-
1585		format( '~s[]', [Indent] ).
1586		xml_pp_list( [H\|T], Indent ) :-
1587		format( '~s[~n', [Indent] ),
1588		xml_pp_indented( H, Indent ),
1589		xml_pp_list1( T, Indent ),
1590		format( '~s]', [Indent] ).
1591
1592		xml_pp_list1( [], _Indent ) :-
1593		nl.
1594		xml_pp_list1( [H\|T], Indent ) :-
1595		put_code( 0', ), nl,
1596		xml_pp_indented( H, Indent ),
1597		xml_pp_list1( T, Indent ).
1598
1599		% PATCH LEUSCHEL:
1600		pp_attributes( Attributes, Indent ) :-
1601		(select('$attribute_linefeeds'=_Count,Attributes,Rest)
1602		-> pp_attributes2(Rest,Indent) % TO DO: insert linefeeds
1603		; pp_attributes2(Attributes,Indent)).
1604
1605		pp_attributes2( [], Indent ) :-
1606		format( '~s[]', [Indent] ).
1607		pp_attributes2( [Attribute\|Attributes], Indent ) :-
1608		format( '~s[', [Indent] ),
1609		pp_attributes1( Attributes, Attribute ),
1610		put_code( 0'] ).
1611		% END PATCH
1612		pp_attributes1( [], Name=Value ) :-
1613		format( '~q=', [Name] ), pp_string( Value ).
1614		pp_attributes1( [H\|T], Name=Value ) :-
1615		format( '~q=', [Name] ), pp_string( Value ), write( ', ' ),
1616		pp_attributes1( T, H ).
1617		/* XML Utilities
1618		*
1619		* $Revision$
1620		*/
1621
1622		% Entity and Namespace map operations: these maps are usually quite small, so
1623		% a linear list lookup is okay. They could be substituted by a logarithmic
1624		% data structure - in extremis.
1625
1626		/* empty_map( ?Map ) is true if Map is a null map.
1627		*/
1628		empty_map( [] ).
1629
1630		/* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure
1631		* which records the pair Key-Data. Key must be ground.
1632		*/
1633		map_member( Key0, [Key1-Data1\|Rest], Data0 ) :-
1634		( Key0 == Key1 ->
1635		Data0 = Data1
1636		; Key0 @> Key1 ->
1637		map_member( Key0, Rest, Data0 )
1638		).
1639
1640		/* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map
1641		* structure, Key must be ground, and Map1 is identical to Map0 except that
1642		* the pair Key-Data is recorded by Map1.
1643		*/
1644		map_store( [], Key, Data, [Key-Data] ).
1645		map_store( [Key0-Data0\|Map0], Key, Data, Map ) :-
1646		( Key == Key0 ->
1647		Map = [Key-Data\|Map0]
1648		; Key @< Key0 ->
1649		Map = [Key-Data,Key0-Data0\|Map0]
1650		; % >
1651		Map = [Key0-Data0\|Map1],
1652		map_store( Map0, Key, Data, Map1 )
1653		).
1654
1655		/* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces )
1656		* is an ADT hiding the "state" arguments for XML Acquisition
1657		*/
1658		initial_context(
1659		opt(Fmt,Ext,RemoveAttributePrefixes),
1660		context(void,PreserveSpace,'','',Entities,Empty,
1661		RemoveAttributePrefixes)
1662		) :-
1663		empty_map( Empty ),
1664		( Ext==false ->
1665		Entities = Empty
1666		;
1667		extended_character_entities(Entities)
1668		),
1669		( Fmt==false ->
1670		PreserveSpace = true
1671		;
1672		PreserveSpace = false
1673		).
1674
1675		context_update( current_namespace, Context0, URI, Context1 ) :-
1676		Context0 = context(Element,Preserve,_Current,Default,Entities,
1677		Namespaces,RemoveAttributePrefixes),
1678		Context1 = context(Element,Preserve,URI,Default,Entities,
1679		Namespaces,RemoveAttributePrefixes).
1680		context_update( element, Context0, Tag, Context1 ) :-
1681		Context0 = context(_Element,Preserve,Current,Default,Entities,
1682		Namespaces,RemoveAttributePrefixes),
1683		Context1 = context(tag(Tag),Preserve,Current,Default,Entities,
1684		Namespaces,RemoveAttributePrefixes).
1685		context_update( default_namespace, Context0, URI, Context1 ):-
1686		Context0 = context(Element,Preserve,Current,_Default,Entities,
1687		Namespaces,RemoveAttributePrefixes),
1688		Context1 = context(Element,Preserve,Current,URI,Entities,
1689		Namespaces,RemoveAttributePrefixes).
1690		context_update( space_preserve, Context0, Boolean, Context1 ):-
1691		Context0 = context(Element,_Preserve,Current,Default,Entities,
1692		Namespaces,RemoveAttributePrefixes),
1693		Context1 = context(Element,Boolean,Current,Default,Entities,
1694		Namespaces,RemoveAttributePrefixes).
1695		context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :-
1696		Context0 = context(Element,Preserve,Current,Default,Entities,
1697		Namespaces0,RemoveAttributePrefixes),
1698		Context1 = context(Element,Preserve,Current,Default,Entities,
1699		Namespaces1,RemoveAttributePrefixes),
1700		map_store( Namespaces0, Prefix, URI, Namespaces1 ).
1701		context_update( entity(Name), Context0, String, Context1 ) :-
1702		Context0 = context(Element,Preserve,Current,Default,Entities0,
1703		Namespaces,RemoveAttributePrefixes),
1704		Context1 = context(Element,Preserve,Current,Default,Entities1,
1705		Namespaces,RemoveAttributePrefixes),
1706		map_store( Entities0, Name, String, Entities1 ).
1707
1708		remove_attribute_prefixes( Context ) :-
1709		Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1710		_Namespaces,true).
1711
1712		current_tag( Context, Tag ) :-
1713		Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities,
1714		_Namespaces,_RPFA).
1715
1716		current_namespace( Context, Current ) :-
1717		Context = context(_Element,_Preserve,Current,_Default,_Entities,
1718		_Namespaces,_RPFA).
1719
1720		default_namespace( Context, Default ) :-
1721		Context = context(_Element,_Preserve,_Current,Default,_Entities,
1722		_Namespaces,_RPFA).
1723
1724		space_preserve( Context ) :-
1725		Context = context(_Element,true,_Current,_Default,_Entities,
1726		_Namespaces,_RPFA).
1727
1728		specific_namespace( Prefix, Context, URI ) :-
1729		Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1730		Namespaces,_RPFA),
1731		map_member( Prefix, Namespaces, URI ).
1732
1733		defined_entity( Reference, Context, String ) :-
1734		Context = context(_Element,_Preserve,_Current,_Default,Entities,
1735		_Namespaces,_RPFA),
1736		map_member( Reference, Entities, String ).
1737
1738		close_context( Context, Terms, WellFormed ) :-
1739		Context = context(Element,_Preserve,_Current,_Default,_Entities,
1740		_Namespaces,_RPFA),
1741		close_context1( Element, Terms, WellFormed ).
1742
1743		close_context1( void, [], true ).
1744		close_context1( tag(TagChars), [out_of_context(Tag)], false ) :-
1745		atom_codes( Tag, TagChars ).
1746
1747		void_context(
1748		context(void,_Preserve,_Current,_Default,_Entities,_Namespaces)
1749		).
1750
1751		/* pp_string( +String ) prints String onto the current output stream.
1752		* If String contains only 7-bit chars it is printed in shorthand quoted
1753		* format, otherwise it is written as a list.
1754		* If your Prolog uses " to delimit a special string type, just use write/1.
1755		*/
1756		% [MC] rewritten
1757		pp_string(Chars) :-
1758		put_code(0'"),
1759		pp_string1(Chars),
1760		put_code(0'").
1761
1762		pp_string1([]).
1763		pp_string1([Char\|Chars]) :-
1764		pp_string2(Char),
1765		pp_string1(Chars).
1766
1767		pp_string2(0'\a) :- !,
1768		put_code(0'\\),
1769		put_code(0'a).
1770		pp_string2(0'\b) :- !,
1771		put_code(0'\\),
1772		put_code(0'b).
1773		pp_string2(0'\t) :- !,
1774		put_code(0'\\),
1775		put_code(0't).
1776		pp_string2(0'\n) :- !,
1777		put_code(0'\\),
1778		put_code(0'n).
1779		pp_string2(0'\v) :- !,
1780		put_code(0'\\),
1781		put_code(0'v).
1782		pp_string2(0'\f) :- !,
1783		put_code(0'\\),
1784		put_code(0'f).
1785		pp_string2(0'\r) :- !,
1786		put_code(0'\\),
1787		put_code(0'r).
1788		pp_string2(0'\e) :- !,
1789		put_code(0'\\),
1790		put_code(0'e).
1791		pp_string2(0'\") :- !,
1792		put_code(0'\\),
1793		put_code(0'").
1794		pp_string2(0'\\) :- !,
1795		put_code(0'\\),
1796		put_code(0'\\).
1797		pp_string2(Char) :-
1798		Char>=32, Char=<126, !,
1799		put_code(Char).
1800		pp_string2(Char) :-
1801		format('\\~8r\\', [Char]).
1802
1803		xml_declaration_attributes_valid( [] ).
1804		xml_declaration_attributes_valid( [Name=Value\|Attributes] ) :-
1805	?	xml_declaration_attribute_valid( Name, Value ),
1806	?	xml_declaration_attributes_valid( Attributes ).
1807
1808		xml_declaration_attribute_valid( Name, Value ) :-
1809		lowercase( Value, Lowercase ),
1810		debug_format(19,'Checking xml declaration attribute ~s=~s~n',[Name,Lowercase]),
1811	?	if(canonical_xml_declaration_attribute( Name, Lowercase ), true,
1812		(format(user_error,'Illegal <xml> declaration attribute: ~s = ~s~n',[Name,Value]),
1813		(valid_declaration_attribute_name(Name)
1814		-> ajoin(['Ignoring illegal value for attribute ',Name,': '],Msg),
1815		atom_codes(AValue,Value),
1816		add_xml_warning(Msg,AValue)
1817		; add_xml_warning('Ignoring illegal <xml> declaration attribute: ',Name)
1818		),
1819		fail
1820		)). % PATCH LEUSCHEL: add warning
1821
1822		% PATCH LEUSCHEL:
1823		valid_declaration_attribute_name(version).
1824		valid_declaration_attribute_name(standalone).
1825		valid_declaration_attribute_name(encoding).
1826
1827
1828		canonical_xml_declaration_attribute( version, "1.0" ).
1829		canonical_xml_declaration_attribute( standalone, "yes" ).
1830		canonical_xml_declaration_attribute( standalone, "no" ).
1831		canonical_xml_declaration_attribute( encoding, "utf-8" ).
1832		canonical_xml_declaration_attribute( encoding, "utf-16" ).
1833		canonical_xml_declaration_attribute( encoding, "ascii" ).
1834		canonical_xml_declaration_attribute( encoding, "iso-8859-1" ).
1835		canonical_xml_declaration_attribute( encoding, "iso-8859-2" ).
1836		canonical_xml_declaration_attribute( encoding, "iso-8859-15" ).
1837		canonical_xml_declaration_attribute( encoding, "windows-1252" ).
1838
1839		% PATCH LEUSCHEL: some more sets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
1840		%canonical_xml_declaration_attribute( encoding, "iso-8859-3" ).
1841		%canonical_xml_declaration_attribute( encoding, "iso-8859-4" ).
1842		canonical_xml_declaration_attribute( encoding, Encoding ) :- atom_codes(Atom,Encoding),
1843		add_xml_warning('Unsupported xml encoding (which can lead to unexpected results): ',Atom).
1844
1845
1846
1847		/* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are
1848		* lists of character codes, and Lowercase is identical to MixedCase with
1849		* every uppercase character replaced by its lowercase equivalent.
1850		*/
1851		lowercase( [], [] ).
1852		lowercase( [Char\|Chars], [Lower\|LowerCase] ) :-
1853		( Char >= 0'A, Char =< 0'Z
1854		->
1855		Lower is Char + 0'a - 0'A
1856		;
1857		Lower = Char
1858		),
1859		lowercase( Chars, LowerCase ).
1860
1861		:- dynamic extended_character_entities/1. % no point compiling it
1862		extended_character_entities( [
1863		"Aacute"-[193], % latin capital letter A with acute,
1864		"aacute"-[225], % latin small letter a with acute,
1865		"Acirc"-[194], % latin capital letter A with circumflex,
1866		"acirc"-[226], % latin small letter a with circumflex,
1867		"acute"-[180], % acute accent = spacing acute,
1868		"AElig"-[198], % latin capital letter AE
1869		"aelig"-[230], % latin small letter ae
1870		"Agrave"-[192], % latin capital letter A with grave
1871		"agrave"-[224], % latin small letter a with grave
1872		"alefsym"-[8501], % alef symbol = first transfinite cardinal,
1873		"Alpha"-[913], % greek capital letter alpha, U+0391
1874		"alpha"-[945], % greek small letter alpha,
1875		"and"-[8743], % logical and = wedge, U+2227 ISOtech
1876		"ang"-[8736], % angle, U+2220 ISOamso
1877		"Aring"-[197], % latin capital letter A with ring above
1878		"aring"-[229], % latin small letter a with ring above
1879		"asymp"-[8776], % almost equal to = asymptotic to,
1880		"Atilde"-[195], % latin capital letter A with tilde,
1881		"atilde"-[227], % latin small letter a with tilde,
1882		"Auml"-[196], % latin capital letter A with diaeresis,
1883		"auml"-[228], % latin small letter a with diaeresis,
1884		"bdquo"-[8222], % double low-9 quotation mark, U+201E NEW
1885		"Beta"-[914], % greek capital letter beta, U+0392
1886		"beta"-[946], % greek small letter beta, U+03B2 ISOgrk3
1887		"brvbar"-[166], % broken bar = broken vertical bar,
1888		"bull"-[8226], % bullet = black small circle,
1889		"cap"-[8745], % intersection = cap, U+2229 ISOtech
1890		"Ccedil"-[199], % latin capital letter C with cedilla,
1891		"ccedil"-[231], % latin small letter c with cedilla,
1892		"cedil"-[184], % cedilla = spacing cedilla, U+00B8 ISOdia>
1893		"cent"-[162], % cent sign, U+00A2 ISOnum>
1894		"Chi"-[935], % greek capital letter chi, U+03A7
1895		"chi"-[967], % greek small letter chi, U+03C7 ISOgrk3
1896		"circ"-[710], % modifier letter circumflex accent,
1897		"clubs"-[9827], % black club suit = shamrock,
1898		"cong"-[8773], % approximately equal to, U+2245 ISOtech
1899		"copy"-[169], % copyright sign, U+00A9 ISOnum>
1900		"crarr"-[8629], % downwards arrow with corner leftwards
1901		"cup"-[8746], % union = cup, U+222A ISOtech
1902		"curren"-[164], % currency sign, U+00A4 ISOnum>
1903		"dagger"-[8224], % dagger, U+2020 ISOpub
1904		"Dagger"-[8225], % double dagger, U+2021 ISOpub
1905		"darr"-[8595], % downwards arrow, U+2193 ISOnum
1906		"dArr"-[8659], % downwards double arrow, U+21D3 ISOamsa
1907		"deg"-[176], % degree sign, U+00B0 ISOnum>
1908		"Delta"-[916], % greek capital letter delta,
1909		"delta"-[948], % greek small letter delta,
1910		"diams"-[9830], % black diamond suit, U+2666 ISOpub
1911		"divide"-[247], % division sign, U+00F7 ISOnum>
1912		"Eacute"-[201], % latin capital letter E with acute,
1913		"eacute"-[233], % latin small letter e with acute,
1914		"Ecirc"-[202], % latin capital letter E with circumflex,
1915		"ecirc"-[234], % latin small letter e with circumflex,
1916		"Egrave"-[200], % latin capital letter E with grave,
1917		"egrave"-[232], % latin small letter e with grave,
1918		"empty"-[8709], % empty set = null set = diameter,
1919		"emsp"-[8195], % em space, U+2003 ISOpub
1920		"ensp"-[8194], % en space, U+2002 ISOpub
1921		"Epsilon"-[917], % greek capital letter epsilon, U+0395
1922		"epsilon"-[949], % greek small letter epsilon,
1923		"equiv"-[8801], % identical to, U+2261 ISOtech
1924		"Eta"-[919], % greek capital letter eta, U+0397
1925		"eta"-[951], % greek small letter eta, U+03B7 ISOgrk3
1926		"ETH"-[208], % latin capital letter ETH, U+00D0 ISOlat1>
1927		"eth"-[240], % latin small letter eth, U+00F0 ISOlat1>
1928		"Euml"-[203], % latin capital letter E with diaeresis,
1929		"euml"-[235], % latin small letter e with diaeresis,
1930		"euro"-[8364], % euro sign, U+20AC NEW
1931		"exist"-[8707], % there exists, U+2203 ISOtech
1932		"fnof"-[402], % latin small f with hook = function
1933		"forall"-[8704], % for all, U+2200 ISOtech
1934		"frac12"-[189], % vulgar fraction one half
1935		"frac14"-[188], % vulgar fraction one quarter
1936		"frac34"-[190], % vulgar fraction three quarters
1937		"frasl"-[8260], % fraction slash, U+2044 NEW
1938		"Gamma"-[915], % greek capital letter gamma,
1939		"gamma"-[947], % greek small letter gamma,
1940		"ge"-[8805], % greater-than or equal to,
1941		"harr"-[8596], % left right arrow, U+2194 ISOamsa
1942		"hArr"-[8660], % left right double arrow,
1943		"hearts"-[9829], % black heart suit = valentine,
1944		"hellip"-[8230], % horizontal ellipsis = three dot leader,
1945		"Iacute"-[205], % latin capital letter I with acute,
1946		"iacute"-[237], % latin small letter i with acute,
1947		"Icirc"-[206], % latin capital letter I with circumflex,
1948		"icirc"-[238], % latin small letter i with circumflex,
1949		"iexcl"-[161], % inverted exclamation mark, U+00A1 ISOnum>
1950		"Igrave"-[204], % latin capital letter I with grave,
1951		"igrave"-[236], % latin small letter i with grave,
1952		"image"-[8465], % blackletter capital I = imaginary part,
1953		"infin"-[8734], % infinity, U+221E ISOtech
1954		"int"-[8747], % integral, U+222B ISOtech
1955		"Iota"-[921], % greek capital letter iota, U+0399
1956		"iota"-[953], % greek small letter iota, U+03B9 ISOgrk3
1957		"iquest"-[191], % inverted question mark
1958		"isin"-[8712], % element of, U+2208 ISOtech
1959		"Iuml"-[207], % latin capital letter I with diaeresis,
1960		"iuml"-[239], % latin small letter i with diaeresis,
1961		"Kappa"-[922], % greek capital letter kappa, U+039A
1962		"kappa"-[954], % greek small letter kappa,
1963		"Lambda"-[923], % greek capital letter lambda,
1964		"lambda"-[955], % greek small letter lambda,
1965		"lang"-[9001], % left-pointing angle bracket = bra,
1966		"laquo"-[171], % left-pointing double angle quotation mark
1967		"larr"-[8592], % leftwards arrow, U+2190 ISOnum
1968		"lArr"-[8656], % leftwards double arrow, U+21D0 ISOtech
1969		"lceil"-[8968], % left ceiling = apl upstile,
1970		"ldquo"-[8220], % left double quotation mark,
1971		"le"-[8804], % less-than or equal to, U+2264 ISOtech
1972		"lfloor"-[8970], % left floor = apl downstile,
1973		"lowast"-[8727], % asterisk operator, U+2217 ISOtech
1974		"loz"-[9674], % lozenge, U+25CA ISOpub
1975		"lrm"-[8206], % left-to-right mark, U+200E NEW RFC 2070
1976		"lsaquo"-[8249], % single left-pointing angle quotation mark,
1977		"lsquo"-[8216], % left single quotation mark,
1978		"macr"-[175], % macron = spacing macron = overline
1979		"mdash"-[8212], % em dash, U+2014 ISOpub
1980		"micro"-[181], % micro sign, U+00B5 ISOnum>
1981		"middot"-[183], % middle dot = Georgian comma
1982		"minus"-[8722], % minus sign, U+2212 ISOtech
1983		"Mu"-[924], % greek capital letter mu, U+039C
1984		"mu"-[956], % greek small letter mu, U+03BC ISOgrk3
1985		"nabla"-[8711], % nabla = backward difference,
1986		"nbsp"-[160], % no-break space = non-breaking space,
1987		"ndash"-[8211], % en dash, U+2013 ISOpub
1988		"ne"-[8800], % not equal to, U+2260 ISOtech
1989		"ni"-[8715], % contains as member, U+220B ISOtech
1990		"not"-[172], % not sign, U+00AC ISOnum>
1991		"notin"-[8713], % not an element of, U+2209 ISOtech
1992		"nsub"-[8836], % not a subset of, U+2284 ISOamsn
1993		"Ntilde"-[209], % latin capital letter N with tilde,
1994		"ntilde"-[241], % latin small letter n with tilde,
1995		"Nu"-[925], % greek capital letter nu, U+039D
1996		"nu"-[957], % greek small letter nu, U+03BD ISOgrk3
1997		"Oacute"-[211], % latin capital letter O with acute,
1998		"oacute"-[243], % latin small letter o with acute,
1999		"Ocirc"-[212], % latin capital letter O with circumflex,
2000		"ocirc"-[244], % latin small letter o with circumflex,
2001		"OElig"-[338], % latin capital ligature OE,
2002		"oelig"-[339], % latin small ligature oe, U+0153 ISOlat2
2003		"Ograve"-[210], % latin capital letter O with grave,
2004		"ograve"-[242], % latin small letter o with grave,
2005		"oline"-[8254], % overline = spacing overscore,
2006		"Omega"-[937], % greek capital letter omega,
2007		"omega"-[969], % greek small letter omega,
2008		"Omicron"-[927], % greek capital letter omicron, U+039F
2009		"omicron"-[959], % greek small letter omicron, U+03BF NEW
2010		"oplus"-[8853], % circled plus = direct sum,
2011		"or"-[8744], % logical or = vee, U+2228 ISOtech
2012		"ordf"-[170], % feminine ordinal indicator, U+00AA ISOnum>
2013		"ordm"-[186], % masculine ordinal indicator,
2014		"Oslash"-[216], % latin capital letter O with stroke
2015		"oslash"-[248], % latin small letter o with stroke,
2016		"Otilde"-[213], % latin capital letter O with tilde,
2017		"otilde"-[245], % latin small letter o with tilde,
2018		"otimes"-[8855], % circled times = vector product,
2019		"Ouml"-[214], % latin capital letter O with diaeresis,
2020		"ouml"-[246], % latin small letter o with diaeresis,
2021		"para"-[182], % pilcrow sign = paragraph sign,
2022		"part"-[8706], % partial differential, U+2202 ISOtech
2023		"permil"-[8240], % per mille sign, U+2030 ISOtech
2024		"perp"-[8869], % up tack = orthogonal to = perpendicular,
2025		"Phi"-[934], % greek capital letter phi,
2026		"phi"-[966], % greek small letter phi, U+03C6 ISOgrk3
2027		"Pi"-[928], % greek capital letter pi, U+03A0 ISOgrk3
2028		"pi"-[960], % greek small letter pi, U+03C0 ISOgrk3
2029		"piv"-[982], % greek pi symbol, U+03D6 ISOgrk3
2030		"plusmn"-[177], % plus-minus sign = plus-or-minus sign,
2031		"pound"-[163], % pound sign, U+00A3 ISOnum>
2032		"prime"-[8242], % prime = minutes = feet, U+2032 ISOtech
2033		"Prime"-[8243], % double prime = seconds = inches,
2034		"prod"-[8719], % n-ary product = product sign,
2035		"prop"-[8733], % proportional to, U+221D ISOtech
2036		"Psi"-[936], % greek capital letter psi,
2037		"psi"-[968], % greek small letter psi, U+03C8 ISOgrk3
2038		"radic"-[8730], % square root = radical sign,
2039		"rang"-[9002], % right-pointing angle bracket = ket,
2040		"raquo"-[187], % right-pointing double angle quotation mark
2041		"rarr"-[8594], % rightwards arrow, U+2192 ISOnum
2042		"rArr"-[8658], % rightwards double arrow,
2043		"rceil"-[8969], % right ceiling, U+2309 ISOamsc
2044		"rdquo"-[8221], % right double quotation mark,
2045		"real"-[8476], % blackletter capital R = real part symbol,
2046		"reg"-[174], % registered sign = registered trade mark sign,
2047		"rfloor"-[8971], % right floor, U+230B ISOamsc
2048		"Rho"-[929], % greek capital letter rho, U+03A1
2049		"rho"-[961], % greek small letter rho, U+03C1 ISOgrk3
2050		"rlm"-[8207], % right-to-left mark, U+200F NEW RFC 2070
2051		"rsaquo"-[8250], % single right-pointing angle quotation mark,
2052		"rsquo"-[8217], % right single quotation mark,
2053		"sbquo"-[8218], % single low-9 quotation mark, U+201A NEW
2054		"Scaron"-[352], % latin capital letter S with caron,
2055		"scaron"-[353], % latin small letter s with caron,
2056		"sdot"-[8901], % dot operator, U+22C5 ISOamsb
2057		"sect"-[167], % section sign, U+00A7 ISOnum>
2058		"shy"-[173], % soft hyphen = discretionary hyphen,
2059		"Sigma"-[931], % greek capital letter sigma,
2060		"sigma"-[963], % greek small letter sigma,
2061		"sigmaf"-[962], % greek small letter final sigma,
2062		"sim"-[8764], % tilde operator = varies with = similar to,
2063		"spades"-[9824], % black spade suit, U+2660 ISOpub
2064		"sub"-[8834], % subset of, U+2282 ISOtech
2065		"sube"-[8838], % subset of or equal to, U+2286 ISOtech
2066		"sum"-[8721], % n-ary sumation, U+2211 ISOamsb
2067		"sup"-[8835], % superset of, U+2283 ISOtech
2068		"sup1"-[185], % superscript one = superscript digit one,
2069		"sup2"-[178], % superscript two = superscript digit two
2070		"sup3"-[179], % superscript three = superscript digit three
2071		"supe"-[8839], % superset of or equal to,
2072		"szlig"-[223], % latin small letter sharp s = ess-zed,
2073		"Tau"-[932], % greek capital letter tau, U+03A4
2074		"tau"-[964], % greek small letter tau, U+03C4 ISOgrk3
2075		"there4"-[8756], % therefore, U+2234 ISOtech
2076		"Theta"-[920], % greek capital letter theta,
2077		"theta"-[952], % greek small letter theta,
2078		"thetasym"-[977], % greek small letter theta symbol,
2079		"thinsp"-[8201], % thin space, U+2009 ISOpub
2080		"THORN"-[222], % latin capital letter THORN,
2081		"thorn"-[254], % latin small letter thorn with,
2082		"tilde"-[732], % small tilde, U+02DC ISOdia
2083		"times"-[215], % multiplication sign, U+00D7 ISOnum>
2084		"trade"-[8482], % trade mark sign, U+2122 ISOnum
2085		"Uacute"-[218], % latin capital letter U with acute,
2086		"uacute"-[250], % latin small letter u with acute,
2087		"uarr"-[8593], % upwards arrow, U+2191 ISOnum
2088		"uArr"-[8657], % upwards double arrow, U+21D1 ISOamsa
2089		"Ucirc"-[219], % latin capital letter U with circumflex,
2090		"ucirc"-[251], % latin small letter u with circumflex,
2091		"Ugrave"-[217], % latin capital letter U with grave,
2092		"ugrave"-[249], % latin small letter u with grave,
2093		"uml"-[168], % diaeresis = spacing diaeresis,
2094		"upsih"-[978], % greek upsilon with hook symbol,
2095		"Upsilon"-[933], % greek capital letter upsilon,
2096		"upsilon"-[965], % greek small letter upsilon,
2097		"Uuml"-[220], % latin capital letter U with diaeresis,
2098		"uuml"-[252], % latin small letter u with diaeresis,
2099		"weierp"-[8472], % script capital P = power set
2100		"Xi"-[926], % greek capital letter xi, U+039E ISOgrk3
2101		"xi"-[958], % greek small letter xi, U+03BE ISOgrk3
2102		"Yacute"-[221], % latin capital letter Y with acute,
2103		"yacute"-[253], % latin small letter y with acute,
2104		"yen"-[165], % yen sign = yuan sign, U+00A5 ISOnum>
2105		"yuml"-[255], % latin small letter y with diaeresis,
2106		"Yuml"-[376], % latin capital letter Y with diaeresis,
2107		"Zeta"-[918], % greek capital letter zeta, U+0396
2108		"zeta"-[950], % greek small letter zeta, U+03B6 ISOgrk3
2109		"zwj"-[8205], % zero width joiner, U+200D NEW RFC 2070
2110		"zwnj"-[8204] % zero width non-joiner,
2111		] ).
2112
2113		/* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to
2114		* copy the list Chars inline.
2115		*
2116		* This is best expressed in terms of append/3 where append/3 is built-in.
2117		* For other Prologs, a straightforward specification can be used:
2118		*
2119		* chars( [] ) --> "".
2120		* chars( [Char\|Chars] ) -->
2121		* [Char],
2122		* chars( Chars ).
2123		*/
2124		chars( Chars, Plus, Minus ) :-
2125		append( Chars, Minus, Plus ).