1 /* xml_prob.pl : Contains xml_parse/[2,3] a bi-directional XML parser written in
2 * Prolog.
3 *
4 * Copyright (C) 2001-2005 Binding Time Limited
5 * Copyright (C) 2005 John Fletcher
6 *
7 * Current Release: $Revision$
8 *
9 * TERMS AND CONDITIONS:
10 *
11 * This program is offered free of charge, as unsupported source code. You may
12 * use it, copy it, distribute it, modify it or sell it without restriction,
13 * but entirely at your own risk.
14 *
15 */
16
17 % minimally adapted for SICStus Prolog
18 % Mats Carlsson, 2003-2006
19 % added linefeed counting for attributes (attributes_count_lf,spaces_count_linebreaks)
20 % Michael Leuschel, 2017-2022 (changes marked by PATCH LEUSCHEL)
21 % replaces library(xml) for SICStus
22
23 :- module( xml_prob, [
24 xml_parse/2,
25 xml_parse/3,
26 xml_subterm/2,
27 xml_pp/1,
28
29 xml_parse/4
30 ]).
31
32 :- use_module(module_information,[module_info/2]).
33 :- module_info(group,external_functions).
34 :- module_info(description,'This module transforms XML into a Prolog representation, adaptation of SICStus library module.').
35
36 :- use_module(probsrc(error_manager)).
37 :- use_module(probsrc(debug),[debug_format/3]).
38 :- use_module(probsrc(tools_strings),[ajoin/2]).
39
40 % added by Leuschel:
41 xml_parse( Chars, Document, Controls, Span ) :-
42 bb_put(xml_prob_current_span,Span),
43 call_cleanup(xml_parse( Chars, Document, Controls),
44 bb_delete(xml_prob_current_span,_)).
45
46 add_xml_warning(Msg,Term) :-
47 bb_get(xml_prob_current_span,Span),!,
48 add_warning(xml_prob,Msg,Term,Span).
49 add_xml_warning(Msg,Term) :-
50 add_warning(xml_prob,Msg,Term).
51
52 add_xml_error(Msg,Term) :-
53 bb_get(xml_prob_current_span,Span),!,
54 add_error(xml_prob,Msg,Term,Span).
55 add_xml_error(Msg,Term) :-
56 add_error(xml_prob,Msg,Term).
57 :- use_module(library(lists),[append/2]).
58 expect_tokens(Tokens,_) --> Tokens,!.
59 expect_tokens(Tokens,Ctxt,Input,_) :-
60 append(["XML parse error, expected '",Tokens,"' in ",Ctxt,", but got:"],Codes),
61 atom_codes(Msg,Codes),
62 length(Tokens,Len),
63 get_next_tokens(Input,Len,Nxt),
64 atom_codes(NxtAtom,Nxt),
65 add_xml_error(Msg,NxtAtom), % TO DO: extract next tokens
66 fail.
67
68 get_next_tokens(_,N,R) :- N<1,!, R=[].
69 get_next_tokens(Var,_,Res) :- var(Var),!, Res="<EOF>".
70 get_next_tokens([],_,"<EOF>").
71 get_next_tokens([H|T],Nr,[H|R]) :- N1 is Nr-1, get_next_tokens(T,N1,R).
72
73 :- use_module(library(types), [
74 must_be/4,
75 illarg/3,
76 illarg/4
77 ]).
78 :- use_module(library(lists), [
79 select/3,
80 is_list/1
81 ]).
82
83 :- set_prolog_flag(double_quotes, codes).
84
85 %@ This is a package for parsing XML with Prolog, which
86 %@ provides Prolog applications with a simple ``Document Value Model''
87 %@ interface to XML documents. A description of the subset of XML that it
88 %@ supports can be found at:
89 %@ @c [PM] 4.1 link updated 2009-08-14
90 %@ @uref{http://www.binding-time.co.uk/xmlpl.html}
91 %@
92 %@ The package, originally written by Binding Time Ltd., is in the public
93 %@ domain and unsupported. To use the package, enter the query:
94 %@
95 %@ @example
96 %@ @group
97 %@ | ?- use_module(library(xml)).
98 %@ @end group
99 %@ @end example
100 %@
101 %@ The package represents XML documents by the abstract data type
102 %@ @var{document}, which is defined by the following grammar:
103 %@
104 %@ @multitable @columnfractions .2 .3 .5
105 %@ @item @var{document} @tab ::= @code{xml(@var{attributes},@var{content})} @tab @r{@{ well-formed document @}}
106 %@ @item @tab | @code{malformed(@var{attributes},@var{content})} @tab @r{@{ malformed document @}}
107 %@ @item
108 %@ @item @var{attributes} @tab ::= @code{[]}
109 %@ @item @tab | @code{[@var{name}=@var{char-data}|@var{attributes}]}
110 %@ @item
111 %@ @item @var{content} @tab ::= @code{[]}
112 %@ @item @tab | @code{[@var{cterm}|@var{content}]}
113 %@ @item
114 %@ @item @var{cterm} @tab ::= @code{pcdata(@var{char-data})} @tab @r{@{ text @}}
115 %@ @item @tab | @code{comment(@var{char-data})} @tab @r{@{ an XML comment @}}
116 %@ @item @tab | @code{namespace(@var{URI},@var{prefix},@var{element})} @tab @r{@{ a Namespace @}}
117 %@ @item @tab | @code{element(@var{tag},@var{attributes},@var{content})} @tab @r{@{ <@var{tag}>..</@var{tag}> encloses @var{content} or <@var{tag} /> if empty @}}
118 %@ @item @tab | @code{instructions(@var{name},@var{char-data})} @tab @r{@{ A PI <? @var{name} @var{char-data} ?> @}}
119 %@ @item @tab | @code{cdata(@var{char-data})} @tab @r{@{ <![CDATA[@var{char-data}]]> @}}
120 %@ @item @tab | @code{doctype(@var{tag},@var{doctype-id})} @tab @r{@{ DTD <!DOCTYPE .. > @}}
121 %@ @item @tab | @code{unparsed(@var{char-data})} @tab @r{@{ text that hasn't been parsed @}}
122 %@ @item @tab | @code{out_of_context(@var{tag})} @tab @r{@{ @var{tag} is not closed @}}
123 %@ @item
124 %@ @item @var{tag} @tab ::= @dfn{atom} @tab @r{@{ naming an element @}}
125 %@ @item
126 %@ @item @var{name} @tab ::= @dfn{atom} @tab @r{@{ not naming an element @}}
127 %@ @item
128 %@ @item @var{URI} @tab ::= @dfn{atom} @tab @r{@{ giving the URI of a namespace @}}
129 %@ @item
130 %@ @item @var{char-data} @tab ::= @dfn{code-list}
131 %@ @item
132 %@ @item @var{doctype-id} @tab ::= @code{public(@var{char-data},@var{char-data})}
133 %@ @item @tab | @code{public(@var{char-data},@var{dtd-literals})}
134 %@ @item @tab | @code{system(@var{char-data})}
135 %@ @item @tab | @code{system(@var{char-data},@var{dtd-literals})}
136 %@ @item @tab | @code{local}
137 %@ @item @tab | @code{local,@var{dtd-literals}}
138 %@ @item
139 %@ @item @var{dtd-literals} @tab ::= @code{[]}
140 %@ @item @tab | @code{[dtd_literal(@var{char-data})|@var{dtd-literals}]}
141 %@ @end multitable
142 %@
143 %@ The following predicates are exported by the package:
144 %@
145 %@ @table @code
146 %@ @item xml_parse(@var{?Chars}, @var{?Document})
147 %@ @itemx xml_parse(@var{?Chars}, @var{?Document}, @var{+Options})
148 %@ @PLXindex {xml_parse/[2,3] (xml)}
149 %@ Either parses @var{Chars}, a @dfn{code-list}, to @var{Document}, a
150 %@ @var{document}. @var{Chars} is not required to represent strictly
151 %@ well-formed XML.
152 %@ Or generates @var{Chars}, a @dfn{code-list},
153 %@ from @var{Document}, a @var{document}.
154 %@ If @var{Document} is not a valid @var{document} term representing
155 %@ well-formed XML, an exception is raised.
156 %@ In the second usage of the predicate, the only option available is @code{format/1}.
157 %@
158 %@ @var{Options} is a list of zero or more of the following, where
159 %@ @var{Boolean} must be @code{true} or @code{false}:
160 %@
161 %@ @table @code
162 %@ @item format(@var{Boolean})
163 %@ @findex format/1 (xml_parse/3 option)
164 %@ Indent the element content (default @code{true}).
165 %@
166 %@ @item extended_characters(@var{Boolean})
167 %@ @findex extended_characters/1 (xml_parse/3 option)
168 %@ Use the extended character entities for XHTML (default @code{true}).
169 %@
170 %@ @item remove_attribute_prefixes(@var{Boolean})
171 %@ @findex remove_attribute_prefixes/1 (xml_parse/3 option)
172 %@ Remove namespace prefixes from attributes when it's the same as the
173 %@ prefix of the parent element (default @code{false}).
174 %@ @end table
175 %@
176 %@ @item xml_subterm(@var{+Term}, @var{?Subterm})
177 %@ @PLXindex {xml_subterm/2 (xml)}
178 %@ Unifies @var{Subterm} with a sub-term of @var{Term}, a
179 %@ @var{document}. This can be especially useful when trying to test or
180 %@ retrieve a deeply-nested subterm from a document.
181 %@
182 %@ @item xml_pp(@var{+Document})
183 %@ @PLXindex {xml_pp/1 (xml)}
184 %@ ``Pretty prints'' @var{Document}, a @var{document}, on the current
185 %@ output stream.
186 %@ @end table
187
188
189 /* xml_parse(+?Chars, ?+Document[, +Controls]) parses Chars to/from a data
190 * structure of the form xml(<atts>, <content>). <atts> is a list of
191 * <atom>=<string> attributes from the (possibly implicit) XML signature of the
192 * document. <content> is a (possibly empty) list comprising occurrences of :
193 *
194 * pcdata(<string>) : Text
195 * comment(<string>) : An xml comment;
196 * element(<tag>,<atts>,<content>) : <tag>..</tag> encloses <content>
197 * : <tag /> if empty
198 * instructions(<atom>, <string>) : Processing <? <atom> <params> ?>"
199 * cdata( <string> ) : <![CDATA[ <string> ]]>
200 * doctype(<atom>, <doctype id>) : DTD <!DOCTYPE .. >
201 *
202 * The conversions are not completely symmetrical, in that weaker XML is
203 * accepted than can be generated. Specifically, in-bound (Chars -> Document)
204 * does not require strictly well-formed XML. Document is instantiated to the
205 * term malformed(Attributes, Content) if Chars does not represent well-formed
206 * XML. The Content of a malformed/2 structure can contain:
207 *
208 * unparsed( <string> ) : Text which has not been parsed
209 * out_of_context( <tag> ) : <tag> is not closed
210 *
211 * in addition to the standard term types.
212 *
213 * Out-bound (Document -> Chars) parsing _does_ require that Document defines
214 * strictly well-formed XML. If an error is detected a 'domain' exception is
215 * raised.
216 *
217 * The domain exception will attempt to identify the particular sub-term in
218 * error and the message will show a list of its ancestor elements in the form
219 * <tag>{(id)}* where <id> is the value of any attribute _named_ id.
220 *
221 * At this release, the Controls applying to in-bound (Chars -> Document)
222 * parsing are:
223 *
224 * extended_characters(<bool>) : Use the extended character
225 * : entities for XHTML (default true)
226 *
227 * format(<bool>) : Strip layouts when no character data
228 * : appears between elements.
229 * : (default true)
230 *
231 * remove_attribute_prefixes( <bool>) : Remove namespace prefixes from
232 * : attributes when it's the same as the
233 * : prefix of the parent element
234 * : (default false).
235 *
236 * [<bool> is one of 'true' or 'false']
237 *
238 * For out-bound (Document -> Chars) parsing, the only available option is:
239 *
240 * format(<Bool>) : Indent the element content
241 * : (default true)
242 *
243 * Different DCGs for input and output are used because input parsing is
244 * more flexible than output parsing. Errors in input are recorded as part
245 * of the data structure. Output parsing throws an exception if the document
246 * is not well-formed, diagnosis tries to identify the specific culprit term.
247 */
248 xml_parse( Chars, Document ) :-
249 xml_parse( Chars, Document, [] ).
250
251 xml_parse( Chars, Document, Controls ) :-
252 Goal = xml_parse(Chars,Document,Controls),
253 % format, extended_characters, remove_attribute_prefixes
254 must_be(Controls, proper_list, Goal, 3),
255 xml_options(Controls, opt(true,true,false), Options, Goal, 3),
256 ( ground( Chars ) ->
257 xml_to_document( Options, Chars, Document )
258 ; ground(Document) ->
259 document_to_xml( Options, Document, Chars )
260 ; illarg(var, Goal, 0)
261 ), !. /*MC*/
262
263 xml_options([], Opt, Opt, _, _) :- !.
264 xml_options([X|L], Opt0, Opt, Goal, ArgNo) :- !,
265 ( callable(X),
266 xml_option(X, Opt0, Opt1) -> true
267 ; illarg(domain(term,xml_parse_option), Goal, ArgNo, X)
268 ),
269 xml_options(L, Opt1, Opt, Goal, ArgNo).
270
271 xml_option(format(X), opt(_,B,C), opt(X,B,C)) :-
272 bool_option(X).
273 xml_option(extended_characters(X), opt(A,_,C), opt(A,X,C)) :-
274 bool_option(X).
275 xml_option(remove_attribute_prefixes(X), opt(A,B,_), opt(A,B,X)) :-
276 bool_option(X).
277
278 bool_option(X) :- var(X), !, fail.
279 bool_option(false).
280 bool_option(true).
281
282 document_to_xml( opt(Format,_,_), Document, Chars ) :-
283 ? ( document_generation(Format, Document, Chars0, [] ) ->
284 Chars = Chars0
285 ;
286 xml_fault( Document, [], Culprit, Path, Message ),
287 throw(xml_parse(Message,Document,Culprit,Path))
288 ).
289
290 :- multifile user:generate_message_hook/3.
291 user:generate_message_hook(xml_parse(Message,Document,Culprit,Path)) --> !,
292 ['XML Parse: ~a in ~q'-[Message,Document],nl,
293 'Culprit: ~q'-[Culprit],nl],
294 ( {Path==[]} -> []
295 ; ['Path: ~s'-[Path],nl]
296 ).
297
298
299 /* xml_subterm( +XMLTerm, ?Subterm ) unifies Subterm with a sub-term of Term.
300 * Note that XMLTerm is a sub-term of itself.
301 */
302 xml_subterm( Term, Term ).
303 xml_subterm( xml(_Attributes, Content), Term ) :-
304 xml_subterm( Content, Term ).
305 xml_subterm( [H|T], Term ) :-
306 ( xml_subterm( H, Term )
307 ; xml_subterm( T, Term )
308 ).
309 xml_subterm( element(_Name,_Attributes,Content), Term ) :-
310 xml_subterm( Content, Term ).
311 xml_subterm( namespace(_URI,_Prefix,Content), Term ) :-
312 xml_subterm( Content, Term ).
313
314 /* xml is intended to be a rather modular module: it should be easy to
315 * build a program that can output XML, but not read it, or vice versa.
316 * Similarly, you may be happy to dispense with diagnosis once you are
317 * sure that your code will only try to make valid calls to xml_parse/2.
318 *
319 * It is intended that the code should be very portable too. Clearly,
320 * some small changes will be needed between platforms, but these should
321 * be limited to xml_utilities. xml_utilities contains most of the shared
322 * code and most of the potentially non-portable code.
323 */
324 /* xml_acquisition.pl : XML -> Document translation.
325 *
326 * $Revision$
327 *
328 */
329
330 /* xml_to_document( +Options, +XML, ?Document ) translates the list of
331 * character codes XML into the Prolog term Document. Options is
332 * the parsed options list.
333 */
334 xml_to_document( Options, XML, Document ) :-
335 initial_context( Options, Context ),
336 debug_format(19,'Parsing XML:~n~s~n',[XML]), %PATCH: LEUSCHEL
337 ( xml_declaration( Attributes0, XML, XML1 ) ->
338 Attributes = Attributes0
339 ;
340 format('No <xml> header declaration~n',[]), %PATCH LEUSCHEL
341 XML1 = XML,
342 Attributes = []
343 ),
344 xml_to_document( XML1, Context, Terms, [], WellFormed ),
345 xml_to_document1( WellFormed, Attributes, Terms, Document ).
346
347 xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ).
348 xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
349
350 % unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
351 unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
352
353 xml_declaration( Attributes ) -->
354 spaces,
355 "<?",
356 nmtoken( xml ),
357 xml_declaration_attributes( Attributes ),
358 spaces,
359 "?>".
360
361 xml_to_document( [], Context, Terms, [], WF ) :-
362 close_context( Context, Terms, WF ).
363 xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :-
364 ( Char =:= "<" ->
365 xml_markup_structure( Chars, Context, Terms, Residue, WF )
366 ; Char =:= "&" ->
367 entity_reference( Chars, Context, Terms, Residue, WF )
368 ; Char =< " ",
369 \+ space_preserve( Context ) ->
370 layouts( Chars, Context, [Char|T], T, Terms, Residue, WF )
371 ; void_context( Context ) ->
372 unparsed( [Char|Chars], Context, Terms, Residue, WF )
373 ;
374 Terms = [pcdata([Char|Chars1])|Terms1],
375 acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
376 ).
377
378 layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
379 close_context( Context, Terms, WF ).
380 layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
381 ( Char =:= "<" ->
382 xml_markup_structure( Chars, Context, Terms, Residue, WF )
383 ; Char =:= "&" ->
384 entity_reference( Chars, Context, Terms, Residue, WF )
385 ; Char =< " " ->
386 Minus = [Char|Minus1],
387 layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
388 ; void_context( Context ) ->
389 unparsed( [Char|Chars], Context, Terms, Residue, WF )
390 ;
391 Terms = [pcdata(Plus)|Terms1],
392 Minus = [Char|Chars1],
393 context_update( space_preserve, Context, true, Context1 ),
394 acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
395 ).
396
397 acquire_pcdata( [], Context, [], Terms, [], WF ) :-
398 close_context( Context, Terms, WF ).
399 acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :-
400 ( Char =:= "<" ->
401 Chars1 = [],
402 xml_markup_structure( Chars, Context, Terms, Residue, WF )
403 ; Char =:= "&" ->
404 reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
405 ;
406 Chars1 = [Char|Chars2],
407 acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
408 ).
409
410 xml_markup_structure( [], Context, Terms, Residue, WF ) :-
411 unparsed( "<", Context, Terms, Residue, WF ).
412 xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
413 Chars = [Char|Chars1],
414 ( Char =:= "/" ->
415 closing_tag( Context, Chars1, Terms, Residue, WF )
416 ; Char =:= "?" ->
417 pi_acquisition( Chars1, Context, Terms, Residue, WF )
418 ; Char =:= "!" ->
419 declaration_acquisition( Chars1, Context, Terms, Residue, WF )
420 ; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
421 push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
422 ;
423 unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %'
424 ).
425
426 push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
427 new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
428 push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
429
430 push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :-
431 xml_to_document( Chars, Context, Terms, Residue, WF ).
432 push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
433
434 new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
435 namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
436 ( append( NSChars, [0':|TagChars1], TagChars ), %'
437 specific_namespace( NSChars, Context1, SpecificNamespace ) ->
438 Namespace0 = SpecificNamespace
439 ;
440 NSChars = "",
441 TagChars1 = TagChars,
442 default_namespace( Context1, Namespace0 )
443 ),
444 current_namespace( Context1, CurrentNamespace ),
445 ( Namespace0 == CurrentNamespace ->
446 Term = element(Tag, Attributes, Contents),
447 Context2 = Context1
448 ;
449 Term = namespace( Namespace0, NSChars,
450 element(Tag, Attributes, Contents)
451 ),
452 context_update( current_namespace, Context1, Namespace0, Context2 )
453 ),
454 input_attributes( Attributes1, Context2, Attributes ),
455 atom_codes( Tag, TagChars1 ),
456 close_tag( Type, Chars, Context2, Contents, Residue, WF ).
457
458 close_tag( empty, Residue, _Context, [], Residue, true ).
459 close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
460 context_update( element, Context0, Tag, Context1 ),
461 xml_to_document( Chars, Context1, Contents, Residue, WF ).
462
463 pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
464 ( inline_instruction(Target, Processing, Chars, Rest ),
465 Target \== xml ->
466 Terms = [instructions(Target, Processing)|Terms1],
467 xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
468 ;
469 unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed )
470 ).
471
472 declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
473 ( declaration_type( Chars, Type, Chars1 ),
474 declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
475 Terms = [Term|Terms1],
476 xml_to_document( Rest, Context1, Terms1, Residue, WF )
477 ;
478 unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF )
479 ).
480
481 open_tag( Tag, Namespaces, Attributes, Termination ) -->
482 nmtoken_chars( Tag ),
483 attributes( Attributes, [], Namespaces ),
484 spaces,
485 (open_tag_terminator( Tag, Termination ) -> []
486 ; {atom_codes(ATag,Tag), add_xml_warning('XML tag not properly terminated with >: ',ATag),fail}
487 ).
488
489 open_tag_terminator( Tag, push(Tag) ) -->
490 ">".
491 open_tag_terminator( _Tag, empty ) -->
492 "/>".
493
494 declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
495 comment(Comment).
496 declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
497 cdata( CData ).
498 declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
499 doctype( Name, Names, Namespaces0, Namespaces ),
500 spaces,
501 ">".
502
503 inline_instruction( Target, Processing, Plus, Minus ) :-
504 nmtoken(Target, Plus, Mid0 ),
505 spaces( Mid0, Mid1 ),
506 append( Processing, [0'?,0'>|Minus], Mid1 ),
507 !.
508
509 entity_reference_name( Reference ) -->
510 nmtoken_chars( Reference ),
511 ";".
512
513 declaration_type( [Char1,Char2|Chars1], Class, Rest ) :-
514 Chars = [Char1,Char2|Chars1],
515 ( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
516 Class = Class0,
517 Rest = Residue
518 ;
519 Class = generic,
520 Rest = Chars
521 ).
522
523 declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
524 declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
525 append( "DATA[", Residue, Chars ).
526 declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
527 append( "CTYPE", Residue, Chars ).
528
529 closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
530 ( closing_tag_name( Tag, Chars, Rest ),
531 current_tag( Context, Tag ) ->
532 Terms = [],
533 Residue = Rest,
534 WellFormed = true
535 ;
536 unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed )
537 ).
538
539 closing_tag_name( Tag ) -->
540 nmtoken_chars( Tag ),
541 spaces,
542 ">".
543
544 entity_reference( Chars, Context, Terms, Residue, WF ) :-
545 ( standard_character_entity( Char, Chars, Rest ) ->
546 Terms = [pcdata([Char|Chars1])|Terms1],
547 acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
548 ; entity_reference_name( Reference, Chars, Rest ),
549 defined_entity( Reference, Context, String ) ->
550 append( String, Rest, Full ),
551 xml_to_document( Full, Context, Terms, Residue, WF )
552 ;
553 unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %'
554 ).
555
556 reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
557 ( standard_character_entity(Char, Chars0, Rest ) ->
558 Chars1 = [Char|Chars2],
559 acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
560 ; entity_reference_name(Reference, Chars0, Rest ),
561 defined_entity( Reference, Context, String ) ->
562 append( String, Rest, Full ),
563 acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
564 ;
565 Chars1 = [],
566 unparsed( [0'&|Chars0], Context, Terms, Residue, WF ) %'
567 ).
568
569 namespace_attributes( [], Context, Context, [] ).
570 namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
571 Attributes0 = [_|_],
572 append( "xmlns:", Unqualified, QualifiedNameChars ),
573 ( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
574 atom_codes( URI, Value ),
575 context_update( default_namespace, Context0, URI, Context1 ),
576 namespace_attributes( Attributes1, Context1, Context, Attributes )
577 ; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
578 Attributes = [QualifiedNameChars=Value|Attributes2],
579 atom_codes( URI, Value ),
580 context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
581 namespace_attributes( Attributes1, Context1, Context, Attributes2 )
582 ; member( "xml:space"="preserve", Attributes0 ) ->
583 Attributes = Attributes0,
584 context_update( space_preserve, Context0, true, Context )
585 ;
586 Context = Context0,
587 Attributes = Attributes0
588 ).
589
590 input_attributes( [], _Context, [] ).
591 input_attributes( [NameChars=Value|Attributes0], Context,
592 [Name=Value|Attributes] ) :-
593 ( remove_attribute_prefixes( Context ),
594 append( NSChars, [0':|NameChars1], NameChars ), %'
595 NSChars \== "xmlns",
596 specific_namespace( NSChars, Context, Namespace ),
597 current_namespace( Context, Namespace ) ->
598 atom_codes( Name, NameChars1 )
599 ;
600 atom_codes( Name, NameChars )
601 ),
602 input_attributes( Attributes0, Context, Attributes ).
603
604 attributes( Attributes, Seen, Namespaces ) -->
605 attributes_count_lf(0, Attributes, Seen, Namespaces ). % PATCH LEUSCHEL: add count parameter
606
607 attributes_count_lf(Count0, [Name=Value|Attributes], Seen, Namespaces ) -->
608 spaces_count_linebreaks(Count0,C1),
609 nmtoken_chars( Name ),
610 {\+ member(Name, Seen)},
611 spaces_count_linebreaks(C1,C2),
612 expect_tokens("=","attributes"),
613 spaces_count_linebreaks(C2,C3),
614 attribute_value( CountValue, Value, Namespaces, Name ), % Value does not contain the line breaks anymore, so we have to count them in attribute_layouts.
615 !,
616 {C4 is C3+CountValue},
617 attributes_count_lf( C4, Attributes, [Name|Seen], Namespaces ).
618 attributes_count_lf( 0, [], _Seen, _Namespaces ) --> !, "". % ,{print(no_linefeeds),nl}.
619 attributes_count_lf( Count, ["$attribute_linefeeds"=Count], _Seen, _Namespaces ) --> "". %, {print(linefeeds(Count)),nl}.
620
621 spaces_count_linebreaks(C,C, [], [] ).
622 spaces_count_linebreaks(Count,ResCount, [Char|Chars0], Chars1 ) :-
623 ( Char=10 ->
624 Count1 is Count+1, spaces_count_linebreaks(Count1, ResCount, Chars0, Chars1 )
625 ; Char =< 32 ->
626 spaces_count_linebreaks(Count,ResCount, Chars0, Chars1 )
627 ;
628 Count=ResCount, Chars1 = [Char|Chars0]
629 ).
630 % END PATCH
631
632 xml_declaration_attributes( Res ) -->
633 spaces1,!,
634 xml_declaration_attributes( Res ).
635 xml_declaration_attributes( Res ) -->
636 nmtoken( Name ), !,
637 spaces,
638 expect_tokens("=","declaration attribute"),
639 spaces,
640 xml_string( Value ),
641 {xml_declaration_attribute_valid(Name, Value)
642 -> Res = [Name-Value|Attributes] ; Res = [Attributes]}, % PATCH LEUSCHEL: also continue with invalid declarations
643 xml_declaration_attributes( Attributes ),
644 spaces.
645 xml_declaration_attributes( [] ) --> "".
646
647 doctype( Name, External, Namespaces0, Namespaces1 ) -->
648 spaces,
649 nmtoken( Name ),
650 spaces,
651 doctype_id( External0 ),
652 spaces,
653 doctype1( Namespaces0, Literals, Namespaces1 ),
654 {doctype_extension(Literals, External0, External)}.
655
656 doctype_extension( [], External, External ).
657 doctype_extension( [Literal|Literals], External0, External ) :-
658 extended_doctype( External0, [Literal|Literals], External ).
659
660 extended_doctype( system(URL), Literals, system(URL,Literals) ).
661 extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
662 extended_doctype( local, Literals, local(Literals) ).
663
664 doctype1( Namespaces0, Literals, Namespaces1 ) -->
665 "[",
666 !,
667 dtd( Namespaces0, Literals, Namespaces1 ),
668 expect_tokens("]","doctype").
669 doctype1( Namespaces, [], Namespaces ) --> "".
670
671 doctype_id( system(URL) ) -->
672 "SYSTEM",
673 spaces,
674 uri( URL ).
675 doctype_id( public(URN,URL) ) -->
676 "PUBLIC",
677 spaces,
678 uri( URN ),
679 spaces,
680 uri( URL ).
681 doctype_id( local ) --> "".
682
683 dtd( Namespaces0, Literals, Namespaces1 ) -->
684 spaces,
685 "<!ENTITY",
686 !,
687 spaces,
688 nmtoken_chars( Name ),
689 spaces,
690 quote( Quote ),
691 entity_value( Quote, Namespaces0, String ),
692 spaces,
693 expect_tokens(">","ENTITY"),
694 {\+ character_entity( Name, _StandardChar ),
695 % Don't allow < "e; etc. to be updated
696 context_update( entity(Name), Namespaces0, String, Namespaces2 )
697 },
698 dtd( Namespaces2, Literals, Namespaces1 ).
699 dtd( Namespaces0, Literals, Namespaces1 ) -->
700 spaces,
701 "<!--",
702 !,
703 dtd_comment,
704 expect_tokens(">","comment"),
705 dtd( Namespaces0, Literals, Namespaces1 ).
706 dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) -->
707 spaces,
708 "<!",
709 !,
710 dtd_literal( Literal ),
711 dtd( Namespaces0, Literals, Namespaces1 ).
712 dtd( Namespaces, [], Namespaces ) --> spaces.
713
714 dtd_literal( [] ) --> ">", !.
715 dtd_literal( Chars ) -->
716 "--",
717 !,
718 dtd_comment,
719 dtd_literal( Chars ).
720 dtd_literal( [Char|Chars] ) -->
721 [Char],
722 dtd_literal( Chars ).
723
724 dtd_comment( Plus, Minus ) :-
725 append( _Chars, [0'-,0'-|Minus], Plus ),
726 !.
727
728 entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :-
729 ( Char == Quote ->
730 String = [],
731 Minus = Plus
732 ; Char =:= "&" ->
733 reference_in_entity( Namespaces, Quote, String, Plus, Minus )
734 ;
735 String = [Char|String1],
736 entity_value( Quote, Namespaces, String1, Plus, Minus )
737 ).
738
739 attribute_value( NL, String, Namespaces, Name ) --> % PATCH : add counter NL for line breaks in attribute values
740 (quote( Quote ) -> []
741 ; {atom_codes(AN,Name), add_xml_warning('Expecting starting quotes for value of XML attribute: ',AN),fail}),
742 attribute_leading_layouts( NL, Quote, Namespaces, String ).
743
744 attribute_leading_layouts( 0, _Quote, _Namespace, [], [], [] ).
745 attribute_leading_layouts( NL, Quote, Namespaces, String, [Char|Plus], Minus ) :-
746 ( Char == Quote ->
747 String = [],
748 Minus = Plus,
749 NL = 0
750 ; Char =:= "&" ->
751 reference_in_layout( NL, Namespaces, Quote, String, Plus, Minus )
752 ; Char =:= 10 -> % count linebreaks for correct XML line numbers, but don't add to attribute value (according to https://www.w3.org/TR/1998/REC-xml-19980210#AVNormalize)
753 attribute_layouts( OldL, Quote, Namespaces, false, String, Plus, Minus ),
754 NL is OldL+1
755 ; Char > 32, Char \== 160 ->
756 String = [Char|String1],
757 attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
758 ;
759 attribute_leading_layouts( NL, Quote, Namespaces, String, Plus, Minus )
760 ).
761
762 attribute_layouts( 0, _Quote, _Namespaces, _Layout, [], [], [] ).
763 attribute_layouts( NL, Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :-
764 ( Char == Quote ->
765 String = [],
766 Minus = Plus,
767 NL = 0
768 ; Char =:= "&" ->
769 reference_in_value( NL, Namespaces, Quote, Layout, String, Plus, Minus )
770 ; Char =:= 10 -> % count linebreaks, same as above
771 attribute_layouts( OldL, Quote, Namespaces, false, String, Plus, Minus ),
772 NL is OldL+1
773 ; Char > 32, Char \== 160 ->
774 ( Layout == true ->
775 String = [0' ,Char|String1] %'
776 ;
777 String = [Char|String1]
778 ),
779 attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
780 ;
781 attribute_layouts( NL, Quote, Namespaces, true, String, Plus, Minus )
782 ). % PATCH end
783
784 reference_in_layout( NL, Namespaces, Quote, String, Plus, Minus ) :-
785 ( standard_character_entity( Char, Plus, Mid ) ->
786 String = [Char|String1],
787 attribute_layouts( NL, Quote, Namespaces, false, String1, Mid, Minus )
788 ; entity_reference_name( Name, Plus, Suffix ),
789 defined_entity( Name, Namespaces, Text ) ->
790 append( Text, Suffix, Mid ),
791 attribute_leading_layouts( NL, Quote, Namespaces, String, Mid, Minus )
792 ; % Just & is okay in a value
793 String = [0'&|String1], %'
794 attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
795 ).
796
797 reference_in_value( Newlines, Namespaces, Quote, Layout, String, Plus, Minus ) :-
798 ( standard_character_entity( Char, Plus, Mid ) ->
799 ( Layout == true ->
800 String = [0' ,Char|String1] %'
801 ;
802 String = [Char|String1]
803 ),
804 Layout1 = false
805 ; entity_reference_name( Name, Plus, Suffix ),
806 defined_entity( Name, Namespaces, Text ) ->
807 String = String1,
808 append( Text, Suffix, Mid ),
809 Layout1 = Layout
810 ; % Just & is okay in a value
811 Mid = Plus,
812 String = [0'&|String1], %'
813 Layout1 = false
814 ),
815 attribute_layouts( Newlines, Quote, Namespaces, Layout1, String1, Mid, Minus ).
816
817 /* References are resolved backwards in Entity defintions so that
818 * circularity is avoided.
819 */
820 reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
821 ( standard_character_entity( _SomeChar, Plus, _Rest ) ->
822 String = [0'&|String1], % ' Character entities are unparsed
823 Mid = Plus
824 ; entity_reference_name( Name, Plus, Suffix ),
825 defined_entity( Name, Namespaces, Text ) ->
826 String = String1,
827 append( Text, Suffix, Mid )
828 ),
829 entity_value( Quote, Namespaces, String1, Mid, Minus ).
830
831 standard_character_entity( Char ) -->
832 "#x", !, hex_character_reference( Char ), ";".
833 standard_character_entity( Char ) -->
834 "#", !, digit( Digit ), digits( Digits ), ";",
835 {number_codes( Char, [Digit|Digits])}.
836 standard_character_entity( C ) -->
837 chars( String ),
838 ";",
839 !,
840 {character_entity(String, C)}. % quot, amp, lt, gt, apos
841
842 uri( URI ) -->
843 quote( Quote ),
844 uri1( Quote, URI ).
845
846 uri1( Quote, [] ) -->
847 quote( Quote ),
848 !.
849 uri1( Quote, [Char|Chars] ) -->
850 [Char],
851 uri1( Quote, Chars ).
852
853 comment( Chars, Plus, Minus ) :-
854 append( Chars, [0'-,0'-,0'>|Minus], Plus ), %'
855 !.
856
857 cdata( Chars, Plus, Minus ) :-
858 append( Chars, [0'],0'],0'>|Minus], Plus ), %'
859 !.
860 % Syntax Components
861
862 hex_character_reference( Code ) -->
863 hex_character_reference1( 0, Code ).
864
865 hex_character_reference1( Current, Code ) -->
866 hex_digit_char( Value ),
867 !,
868 {New is (Current << 4) + Value},
869 hex_character_reference1( New, Code ).
870 hex_character_reference1( Code, Code ) --> "".
871
872 hex_digit_char( 0 ) --> "0".
873 hex_digit_char( 1 ) --> "1".
874 hex_digit_char( 2 ) --> "2".
875 hex_digit_char( 3 ) --> "3".
876 hex_digit_char( 4 ) --> "4".
877 hex_digit_char( 5 ) --> "5".
878 hex_digit_char( 6 ) --> "6".
879 hex_digit_char( 7 ) --> "7".
880 hex_digit_char( 8 ) --> "8".
881 hex_digit_char( 9 ) --> "9".
882 hex_digit_char( 10 ) --> "A".
883 hex_digit_char( 11 ) --> "B".
884 hex_digit_char( 12 ) --> "C".
885 hex_digit_char( 13 ) --> "D".
886 hex_digit_char( 14 ) --> "E".
887 hex_digit_char( 15 ) --> "F".
888 hex_digit_char( 10 ) --> "a".
889 hex_digit_char( 11 ) --> "b".
890 hex_digit_char( 12 ) --> "c".
891 hex_digit_char( 13 ) --> "d".
892 hex_digit_char( 14 ) --> "e".
893 hex_digit_char( 15 ) --> "f".
894
895 quote( 0'" ) --> %'
896 """".
897 quote( 0'\' ) -->
898 "'".
899
900 spaces( [], [] ).
901 spaces( [Char|Chars0], Chars1 ) :-
902 ( Char =< 32 ->
903 spaces( Chars0, Chars1 )
904 ;
905 Chars1 = [Char|Chars0]
906 ).
907
908 spaces1( [Char|Chars0], Chars1 ) :-
909 ( Char =< 32 ->
910 spaces( Chars0, Chars1 )
911 ).
912
913 nmtoken( Name ) -->
914 nmtoken_chars( Chars ),
915 {atom_codes(Name, Chars)}.
916
917 nmtoken_chars( [Char|Chars] ) -->
918 [Char],
919 {nmtoken_first( Char )},
920 nmtoken_chars_tail( Chars ).
921
922 nmtoken_chars_tail( [Char|Chars] ) -->
923 [Char],
924 {nmtoken_char(Char)},
925 !,
926 nmtoken_chars_tail( Chars ).
927 nmtoken_chars_tail([]) --> "".
928
929 nmtoken_first( 0': ).
930 nmtoken_first( 0'_ ).
931 nmtoken_first( Char ) :-
932 alphabet( Char ).
933
934 nmtoken_char( 0'a ).
935 nmtoken_char( 0'b ).
936 nmtoken_char( 0'c ).
937 nmtoken_char( 0'd ).
938 nmtoken_char( 0'e ).
939 nmtoken_char( 0'f ).
940 nmtoken_char( 0'g ).
941 nmtoken_char( 0'h ).
942 nmtoken_char( 0'i ).
943 nmtoken_char( 0'j ).
944 nmtoken_char( 0'k ).
945 nmtoken_char( 0'l ).
946 nmtoken_char( 0'm ).
947 nmtoken_char( 0'n ).
948 nmtoken_char( 0'o ).
949 nmtoken_char( 0'p ).
950 nmtoken_char( 0'q ).
951 nmtoken_char( 0'r ).
952 nmtoken_char( 0's ).
953 nmtoken_char( 0't ).
954 nmtoken_char( 0'u ).
955 nmtoken_char( 0'v ).
956 nmtoken_char( 0'w ).
957 nmtoken_char( 0'x ).
958 nmtoken_char( 0'y ).
959 nmtoken_char( 0'z ).
960 nmtoken_char( 0'A ).
961 nmtoken_char( 0'B ).
962 nmtoken_char( 0'C ).
963 nmtoken_char( 0'D ).
964 nmtoken_char( 0'E ).
965 nmtoken_char( 0'F ).
966 nmtoken_char( 0'G ).
967 nmtoken_char( 0'H ).
968 nmtoken_char( 0'I ).
969 nmtoken_char( 0'J ).
970 nmtoken_char( 0'K ).
971 nmtoken_char( 0'L ).
972 nmtoken_char( 0'M ).
973 nmtoken_char( 0'N ).
974 nmtoken_char( 0'O ).
975 nmtoken_char( 0'P ).
976 nmtoken_char( 0'Q ).
977 nmtoken_char( 0'R ).
978 nmtoken_char( 0'S ).
979 nmtoken_char( 0'T ).
980 nmtoken_char( 0'U ).
981 nmtoken_char( 0'V ).
982 nmtoken_char( 0'W ).
983 nmtoken_char( 0'X ).
984 nmtoken_char( 0'Y ).
985 nmtoken_char( 0'Z ).
986 nmtoken_char( 0'0 ).
987 nmtoken_char( 0'1 ).
988 nmtoken_char( 0'2 ).
989 nmtoken_char( 0'3 ).
990 nmtoken_char( 0'4 ).
991 nmtoken_char( 0'5 ).
992 nmtoken_char( 0'6 ).
993 nmtoken_char( 0'7 ).
994 nmtoken_char( 0'8 ).
995 nmtoken_char( 0'9 ).
996 nmtoken_char( 0'. ).
997 nmtoken_char( 0'- ).
998 nmtoken_char( 0'_ ).
999 nmtoken_char( 0': ).
1000
1001 xml_string( String ) -->
1002 quote( Quote ),!,
1003 xml_string1( Quote, String ).
1004
1005 xml_string1( Quote, [] ) -->
1006 quote( Quote ),
1007 !.
1008 xml_string1( Quote, [Char|Chars] ) -->
1009 [Char],
1010 xml_string1( Quote, Chars ).
1011
1012 alphabet( 0'a ).
1013 alphabet( 0'b ).
1014 alphabet( 0'c ).
1015 alphabet( 0'd ).
1016 alphabet( 0'e ).
1017 alphabet( 0'f ).
1018 alphabet( 0'g ).
1019 alphabet( 0'h ).
1020 alphabet( 0'i ).
1021 alphabet( 0'j ).
1022 alphabet( 0'k ).
1023 alphabet( 0'l ).
1024 alphabet( 0'm ).
1025 alphabet( 0'n ).
1026 alphabet( 0'o ).
1027 alphabet( 0'p ).
1028 alphabet( 0'q ).
1029 alphabet( 0'r ).
1030 alphabet( 0's ).
1031 alphabet( 0't ).
1032 alphabet( 0'u ).
1033 alphabet( 0'v ).
1034 alphabet( 0'w ).
1035 alphabet( 0'x ).
1036 alphabet( 0'y ).
1037 alphabet( 0'z ).
1038 alphabet( 0'A ).
1039 alphabet( 0'B ).
1040 alphabet( 0'C ).
1041 alphabet( 0'D ).
1042 alphabet( 0'E ).
1043 alphabet( 0'F ).
1044 alphabet( 0'G ).
1045 alphabet( 0'H ).
1046 alphabet( 0'I ).
1047 alphabet( 0'J ).
1048 alphabet( 0'K ).
1049 alphabet( 0'L ).
1050 alphabet( 0'M ).
1051 alphabet( 0'N ).
1052 alphabet( 0'O ).
1053 alphabet( 0'P ).
1054 alphabet( 0'Q ).
1055 alphabet( 0'R ).
1056 alphabet( 0'S ).
1057 alphabet( 0'T ).
1058 alphabet( 0'U ).
1059 alphabet( 0'V ).
1060 alphabet( 0'W ).
1061 alphabet( 0'X ).
1062 alphabet( 0'Y ).
1063 alphabet( 0'Z ).
1064
1065 digit( C ) --> [C], {digit_table( C )}.
1066
1067 digit_table( 0'0 ).
1068 digit_table( 0'1 ).
1069 digit_table( 0'2 ).
1070 digit_table( 0'3 ).
1071 digit_table( 0'4 ).
1072 digit_table( 0'5 ).
1073 digit_table( 0'6 ).
1074 digit_table( 0'7 ).
1075 digit_table( 0'8 ).
1076 digit_table( 0'9 ).
1077
1078 digits( [Digit|Digits] ) -->
1079 digit( Digit ),
1080 digits( Digits ).
1081 digits( [] ) --> [].
1082
1083 character_entity( "quot", 0'" ). %'
1084 character_entity( "amp", 0'& ). %'
1085 character_entity( "lt", 0'< ). %'
1086 character_entity( "gt", 0'> ). %'
1087 character_entity( "apos", 0'\' ).
1088 /* xml_diagnosis.pl : XML exception diagnosis.
1089 *
1090 * $Revision$
1091 */
1092
1093 /* xml_fault( +Term, +Indentation, ?SubTerm, ?Path, ?Message ) identifies SubTerm
1094 * as a sub-term of Term which cannot be serialized after Indentation.
1095 * Message is an atom naming the type of error; Path is a string encoding a
1096 * list of SubTerm's ancestor elements in the form <tag>{(id)}* where <tag> is the
1097 * element tag and <id> is the value of any attribute _named_ id.
1098 */
1099 xml_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1100 var( Term ).
1101 xml_fault( xml(Attributes,_Content), _Indent, Term, [], Message ) :-
1102 member( Attribute, Attributes ),
1103 attribute_fault( Attribute, Term, Message ).
1104 xml_fault( xml(_Attributes,Content), Indent, Culprit, Path, Message ) :-
1105 xml_content_fault( Content, Indent, Culprit, Path, Message ).
1106 xml_fault( Term, _Indent, Term, [], 'Illegal Term' ).
1107
1108 xml_content_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1109 var( Term ).
1110 xml_content_fault( pcdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1111 \+ is_chars( Chars ).
1112 xml_content_fault( cdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1113 \+ is_chars( Chars ).
1114 xml_content_fault( [H|_T], Indent, Culprit, Path, Message ) :-
1115 xml_content_fault( H, Indent, Culprit, Path, Message ).
1116 xml_content_fault( [_H|T], Indent, Culprit, Path, Message ) :-
1117 xml_content_fault( T, Indent, Culprit, Path, Message ).
1118 xml_content_fault( namespace(_URI,_Prefix,Element), Indent, Culprit, Path, Message ) :-
1119 element_fault( Element, [0' |Indent], Culprit, Path, Message ).
1120 xml_content_fault( Element, Indent, Culprit, Path, Message ) :-
1121 element_fault( Element, [0' |Indent], Culprit, Path, Message ).
1122 xml_content_fault( Term, Indent, Term, [], 'Illegal Term' ) :-
1123 \+ generation(Term, "", false, Indent, _Format, _Plus, _Minus ).
1124
1125 element_fault( element(Tag, _Attributes, _Contents), _Indent, Tag, [], 'Tag must be an atom' ) :-
1126 \+ atom( Tag ).
1127 element_fault( element(Tag, Attributes, _Contents), _Indent, Tag, [], 'Attributes must be instantiated' ) :-
1128 var( Attributes ).
1129 element_fault( element(Tag, Attributes, _Contents), _Indent, Faulty, Path, Message ) :-
1130 fault_path( Tag, Attributes, Path, [] ),
1131 member( Attribute, Attributes ),
1132 attribute_fault( Attribute, Faulty, Message ).
1133 element_fault( element(Tag, Attributes, Contents), Indent, Culprit, Path, Message ) :-
1134 fault_path( Tag, Attributes, Path, Path1 ),
1135 xml_content_fault( Contents, Indent, Culprit, Path1, Message ).
1136
1137 attribute_fault( Attribute, Attribute, 'Illegal Variable' ) :-
1138 var( Attribute ).
1139 attribute_fault( Name=Value, Name=Value, 'Attribute Name must be atom' ) :-
1140 \+ atom(Name).
1141 attribute_fault( Name=Value, Name=Value, 'Attribute Value must be chars' ) :-
1142 \+ is_chars( Value ).
1143 attribute_fault( Attribute, Attribute, 'Malformed Attribute' ) :-
1144 Attribute \= (_Name=_Value).
1145
1146 is_chars( Chars ) :-
1147 is_list( Chars ),
1148 \+ (member( Char, Chars ), \+ (integer(Char), Char >=0, Char =< 255)).
1149
1150 fault_path( Tag, Attributes ) -->
1151 {atom_codes( Tag, Chars )},
1152 chars( Chars ),
1153 fault_id( Attributes ),
1154 " ".
1155
1156 fault_id( Attributes ) -->
1157 {member( id=Chars, Attributes ), is_chars( Chars )},
1158 !,
1159 "(", chars(Chars), ")".
1160 fault_id( _Attributes ) --> "".
1161 /* xml_generation.pl : Document -> XML translation
1162 *
1163 * $Revision$
1164 */
1165
1166 /* document_generation( +Format, +Document ) is a DCG generating Document
1167 * as a list of character codes. Format is true|false defining whether layouts,
1168 * to provide indentation, should be added between the element content of
1169 * the resultant "string". Note that formatting is disabled for elements that
1170 * are interspersed with pcdata/1 terms, such as XHTML's 'inline' elements.
1171 * Also, Format is over-ridden, for an individual element, by an explicit
1172 * 'xml:space'="preserve" attribute.
1173 */
1174 document_generation( Format, xml(Attributes, Document) ) -->
1175 ? document_generation_body( Attributes, Format, Document ).
1176
1177 document_generation_body( [], Format, Document ) -->
1178 generation( Document, "", Format, [], _Format1 ).
1179 document_generation_body( Attributes, Format, Document ) -->
1180 { Attributes = [_|_],
1181 ? xml_declaration_attributes_valid( Attributes )
1182 },
1183 "<?xml",
1184 generated_attributes( Attributes, Format, Format0 ),
1185 "?>",
1186 indent( true, [] ),
1187 generation( Document, "", Format0, [], _Format1 ).
1188
1189 generation( [], _Prefix, Format, _Indent, Format ) --> [].
1190 generation( [Term|Terms], Prefix, Format0, Indent, Format ) -->
1191 generation( Term, Prefix, Format0, Indent, Format1 ),
1192 generation( Terms, Prefix, Format1, Indent, Format ).
1193 generation( doctype(Name, External), _Prefix, Format, [], Format ) -->
1194 "<!DOCTYPE ",
1195 generated_name( Name ),
1196 generated_external_id( External ),
1197 ">".
1198 generation( instructions(Target,Process), _Prefix, Format, Indent, Format ) -->
1199 indent( Format, Indent ),
1200 "<?", generated_name(Target), " ", chars( Process ) ,"?>".
1201 generation( pcdata(Chars), _Prefix, _Format, _Indent, false ) -->
1202 pcdata_generation( Chars ).
1203 generation( comment( Comment ), _Prefix, Format, Indent, Format ) -->
1204 indent( Format, Indent ),
1205 "<!--", chars( Comment ), "-->".
1206 generation( namespace(URI, Prefix, element(Name, Atts, Content)),
1207 _Prefix0, Format, Indent, Format ) -->
1208 indent( Format, Indent ),
1209 "<", generated_prefixed_name( Prefix, Name ),
1210 generated_prefixed_attributes( Prefix, URI, Atts, Format, Format1 ),
1211 generated_content( Content, Format1, Indent, Prefix, Name ).
1212 generation( element(Name, Atts, Content), Prefix, Format, Indent, Format ) -->
1213 indent( Format, Indent ),
1214 "<", generated_prefixed_name( Prefix, Name ),
1215 generated_attributes( Atts, Format, Format1 ),
1216 generated_content( Content, Format1, Indent, Prefix, Name ).
1217 generation( cdata(CData), _Prefix, Format, Indent, Format ) -->
1218 indent( Format, Indent ),
1219 "<![CDATA[", cdata_generation(CData), "]]>".
1220
1221 generated_attributes( [], Format, Format ) --> [].
1222 generated_attributes( [Name=Value|Attributes], Format0, Format ) -->
1223 {( Name == 'xml:space',
1224 Value="preserve" ->
1225 Format1 = false
1226 ;
1227 Format1 = Format0
1228 )},
1229 " ",
1230 generated_name( Name ),
1231 "=""",
1232 quoted_string( Value ),
1233 """",
1234 generated_attributes( Attributes, Format1, Format ).
1235
1236 generated_prefixed_name( [], Name ) -->
1237 generated_name( Name ).
1238 generated_prefixed_name( Prefix, Name ) -->
1239 {Prefix = [_|_]},
1240 chars( Prefix ), ":",
1241 generated_name( Name ).
1242
1243 generated_content( [], _Format, _Indent, _Prefix, _Namespace ) -->
1244 " />". % Leave an extra space for XHTML output.
1245 generated_content( [H|T], Format, Indent, Prefix, Namespace ) -->
1246 ">",
1247 generation( H, Prefix, Format, [0' |Indent], Format1 ),
1248 generation( T, Prefix, Format1, [0' |Indent], Format2 ),
1249 indent( Format2, Indent ),
1250 "</", generated_prefixed_name( Prefix, Namespace ), ">".
1251
1252 generated_prefixed_attributes( [_|_Prefix], _URI, Atts, Format0, Format ) -->
1253 generated_attributes( Atts, Format0, Format ).
1254 generated_prefixed_attributes( [], URI, Atts, Format0, Format ) -->
1255 {atom_codes( URI, Namespace ),
1256 findall( Attr, (member(Attr, Atts), Attr \= (xmlns=_Val)), Atts1 )
1257 },
1258 generated_attributes( [xmlns=Namespace|Atts1], Format0, Format ).
1259
1260 generated_name( Name, Plus, Minus ) :-
1261 atom_codes( Name, Chars ),
1262 append( Chars, Minus, Plus ).
1263
1264 generated_external_id( local ) --> "".
1265 generated_external_id( local(Literals) ) --> " [",
1266 generated_doctype_literals( Literals ), "\n\t]".
1267 generated_external_id( system(URL) ) -->
1268 " SYSTEM """,
1269 chars( URL ),
1270 """".
1271 generated_external_id( system(URL,Literals) ) -->
1272 " SYSTEM """,
1273 chars( URL ),
1274 """ [",
1275 generated_doctype_literals( Literals ), "\n\t]".
1276 generated_external_id( public(URN,URL) ) -->
1277 " PUBLIC """,
1278 chars( URN ),
1279 """ """,
1280 chars( URL ),
1281 """".
1282 generated_external_id( public(URN,URL,Literals) ) -->
1283 " PUBLIC """,
1284 chars( URN ),
1285 """ """,
1286 chars( URL ),
1287 """ [",
1288 generated_doctype_literals( Literals ), "\n\t]".
1289
1290 generated_doctype_literals( [] ) --> "".
1291 generated_doctype_literals( [dtd_literal(String)|Literals] ) --> "\n\t",
1292 "<!", cdata_generation( String ), ">",
1293 generated_doctype_literals( Literals ).
1294
1295 /* quoted_string( +Chars ) is a DCG representing Chars, a list of character
1296 * codes, as a legal XML attribute string. Any leading or trailing layout
1297 * characters are removed. &, " and < characters are replaced by &, "
1298 * and < respectively.
1299 */
1300 quoted_string( Raw, Plus, Minus ) :-
1301 quoted_string1( Raw, NoLeadingLayouts ),
1302 quoted_string2( NoLeadingLayouts, Layout, Layout, Plus, Minus ).
1303
1304 quoted_string1( [], [] ).
1305 quoted_string1( [Char|Chars], NoLeadingLayouts ) :-
1306 ( Char > 32 ->
1307 NoLeadingLayouts = [Char|Chars]
1308 ;
1309 quoted_string1( Chars, NoLeadingLayouts )
1310 ).
1311
1312 quoted_string2( [], _LayoutPlus, _LayoutMinus, List, List ).
1313 quoted_string2( [Char|Chars], LayoutPlus, LayoutMinus, Plus, Minus ) :-
1314 ( Char =< " " ->
1315 Plus = Plus1,
1316 LayoutMinus = [Char|LayoutMinus1],
1317 LayoutPlus = LayoutPlus1
1318 ; Char =< 127 ->
1319 Plus = LayoutPlus,
1320 pcdata_7bit( Char, LayoutMinus, Plus1 ),
1321 LayoutPlus1 = LayoutMinus1
1322 ? ; legal_xml_unicode( Char ) ->
1323 Plus = LayoutPlus,
1324 number_codes( Char, Codes ),
1325 pcdata_8bits_plus( Codes, LayoutMinus, Plus1 ),
1326 LayoutPlus1 = LayoutMinus1
1327 ;
1328 LayoutPlus = LayoutPlus1,
1329 LayoutMinus = LayoutMinus1,
1330 Plus = Plus1
1331 ),
1332 quoted_string2( Chars, LayoutPlus1, LayoutMinus1, Plus1, Minus ).
1333
1334 indent( false, _Indent ) --> [].
1335 indent( true, Indent ) --> "\n",
1336 chars( Indent ).
1337
1338 /* pcdata_generation( +Chars ) is a DCG representing Chars, a list of character
1339 * codes as legal XML "Parsed character data" (PCDATA) string. Any codes
1340 * which cannot be represented by a 7-bit character are replaced by their
1341 * decimal numeric character entity e.g. code 160 (non-breaking space) is
1342 * represented as  . Any character codes disallowed by the XML
1343 * specification are not encoded.
1344 */
1345 pcdata_generation( [], Plus, Plus ).
1346 pcdata_generation( [Char|Chars], Plus, Minus ) :-
1347 ( Char =< 127 ->
1348 pcdata_7bit( Char, Plus, Mid )
1349 ; legal_xml_unicode( Char ) ->
1350 number_codes( Char, Codes ),
1351 pcdata_8bits_plus( Codes, Plus, Mid )
1352 ;
1353 Plus = Mid
1354 ),
1355 pcdata_generation( Chars, Mid, Minus ).
1356
1357 /* pcdata_7bit(+Char) represents the ascii character set in its
1358 * simplest format, using the character entities & " < and >
1359 * which are common to both XML and HTML. The numeric entity ' is used in
1360 * place of ', because browsers don't recognize it in HTML.
1361 */
1362 pcdata_7bit( 0 ) --> "".
1363 pcdata_7bit( 1 ) --> "".
1364 pcdata_7bit( 2 ) --> "".
1365 pcdata_7bit( 3 ) --> "".
1366 pcdata_7bit( 4 ) --> "".
1367 pcdata_7bit( 5 ) --> "".
1368 pcdata_7bit( 6 ) --> "".
1369 pcdata_7bit( 7 ) --> "".
1370 pcdata_7bit( 8 ) --> "".
1371 pcdata_7bit( 9 ) --> [9].
1372 pcdata_7bit( 10 ) --> [10].
1373 pcdata_7bit( 11 ) --> "".
1374 pcdata_7bit( 12 ) --> "".
1375 pcdata_7bit( 13 ) --> [13].
1376 pcdata_7bit( 14 ) --> "".
1377 pcdata_7bit( 15 ) --> "".
1378 pcdata_7bit( 16 ) --> "".
1379 pcdata_7bit( 17 ) --> "".
1380 pcdata_7bit( 18 ) --> "".
1381 pcdata_7bit( 19 ) --> "".
1382 pcdata_7bit( 20 ) --> "".
1383 pcdata_7bit( 21 ) --> "".
1384 pcdata_7bit( 22 ) --> "".
1385 pcdata_7bit( 23 ) --> "".
1386 pcdata_7bit( 24 ) --> "".
1387 pcdata_7bit( 25 ) --> "".
1388 pcdata_7bit( 26 ) --> "".
1389 pcdata_7bit( 27 ) --> "".
1390 pcdata_7bit( 28 ) --> "".
1391 pcdata_7bit( 29 ) --> "".
1392 pcdata_7bit( 30 ) --> "".
1393 pcdata_7bit( 31 ) --> "".
1394 pcdata_7bit( 32 ) --> " ".
1395 pcdata_7bit( 33 ) --> "!".
1396 pcdata_7bit( 34 ) --> """.
1397 pcdata_7bit( 35 ) --> "#".
1398 pcdata_7bit( 36 ) --> "$".
1399 pcdata_7bit( 37 ) --> "%".
1400 pcdata_7bit( 38 ) --> "&".
1401 pcdata_7bit( 39 ) --> "'".
1402 pcdata_7bit( 40 ) --> "(".
1403 pcdata_7bit( 41 ) --> ")".
1404 pcdata_7bit( 42 ) --> "*".
1405 pcdata_7bit( 43 ) --> "+".
1406 pcdata_7bit( 44 ) --> ",".
1407 pcdata_7bit( 45 ) --> "-".
1408 pcdata_7bit( 46 ) --> ".".
1409 pcdata_7bit( 47 ) --> "/".
1410 pcdata_7bit( 48 ) --> "0".
1411 pcdata_7bit( 49 ) --> "1".
1412 pcdata_7bit( 50 ) --> "2".
1413 pcdata_7bit( 51 ) --> "3".
1414 pcdata_7bit( 52 ) --> "4".
1415 pcdata_7bit( 53 ) --> "5".
1416 pcdata_7bit( 54 ) --> "6".
1417 pcdata_7bit( 55 ) --> "7".
1418 pcdata_7bit( 56 ) --> "8".
1419 pcdata_7bit( 57 ) --> "9".
1420 pcdata_7bit( 58 ) --> ":".
1421 pcdata_7bit( 59 ) --> ";".
1422 pcdata_7bit( 60 ) --> "<".
1423 pcdata_7bit( 61 ) --> "=".
1424 pcdata_7bit( 62 ) --> ">".
1425 pcdata_7bit( 63 ) --> "?".
1426 pcdata_7bit( 64 ) --> "@".
1427 pcdata_7bit( 65 ) --> "A".
1428 pcdata_7bit( 66 ) --> "B".
1429 pcdata_7bit( 67 ) --> "C".
1430 pcdata_7bit( 68 ) --> "D".
1431 pcdata_7bit( 69 ) --> "E".
1432 pcdata_7bit( 70 ) --> "F".
1433 pcdata_7bit( 71 ) --> "G".
1434 pcdata_7bit( 72 ) --> "H".
1435 pcdata_7bit( 73 ) --> "I".
1436 pcdata_7bit( 74 ) --> "J".
1437 pcdata_7bit( 75 ) --> "K".
1438 pcdata_7bit( 76 ) --> "L".
1439 pcdata_7bit( 77 ) --> "M".
1440 pcdata_7bit( 78 ) --> "N".
1441 pcdata_7bit( 79 ) --> "O".
1442 pcdata_7bit( 80 ) --> "P".
1443 pcdata_7bit( 81 ) --> "Q".
1444 pcdata_7bit( 82 ) --> "R".
1445 pcdata_7bit( 83 ) --> "S".
1446 pcdata_7bit( 84 ) --> "T".
1447 pcdata_7bit( 85 ) --> "U".
1448 pcdata_7bit( 86 ) --> "V".
1449 pcdata_7bit( 87 ) --> "W".
1450 pcdata_7bit( 88 ) --> "X".
1451 pcdata_7bit( 89 ) --> "Y".
1452 pcdata_7bit( 90 ) --> "Z".
1453 pcdata_7bit( 91 ) --> "[".
1454 pcdata_7bit( 92 ) --> [92].
1455 pcdata_7bit( 93 ) --> "]".
1456 pcdata_7bit( 94 ) --> "^".
1457 pcdata_7bit( 95 ) --> "_".
1458 pcdata_7bit( 96 ) --> "`".
1459 pcdata_7bit( 97 ) --> "a".
1460 pcdata_7bit( 98 ) --> "b".
1461 pcdata_7bit( 99 ) --> "c".
1462 pcdata_7bit( 100 ) --> "d".
1463 pcdata_7bit( 101 ) --> "e".
1464 pcdata_7bit( 102 ) --> "f".
1465 pcdata_7bit( 103 ) --> "g".
1466 pcdata_7bit( 104 ) --> "h".
1467 pcdata_7bit( 105 ) --> "i".
1468 pcdata_7bit( 106 ) --> "j".
1469 pcdata_7bit( 107 ) --> "k".
1470 pcdata_7bit( 108 ) --> "l".
1471 pcdata_7bit( 109 ) --> "m".
1472 pcdata_7bit( 110 ) --> "n".
1473 pcdata_7bit( 111 ) --> "o".
1474 pcdata_7bit( 112 ) --> "p".
1475 pcdata_7bit( 113 ) --> "q".
1476 pcdata_7bit( 114 ) --> "r".
1477 pcdata_7bit( 115 ) --> "s".
1478 pcdata_7bit( 116 ) --> "t".
1479 pcdata_7bit( 117 ) --> "u".
1480 pcdata_7bit( 118 ) --> "v".
1481 pcdata_7bit( 119 ) --> "w".
1482 pcdata_7bit( 120 ) --> "x".
1483 pcdata_7bit( 121 ) --> "y".
1484 pcdata_7bit( 122 ) --> "z".
1485 pcdata_7bit( 123 ) --> "{".
1486 pcdata_7bit( 124 ) --> "|".
1487 pcdata_7bit( 125 ) --> "}".
1488 pcdata_7bit( 126 ) --> "~".
1489 pcdata_7bit( 127 ) --> "".
1490
1491 pcdata_8bits_plus( Codes ) -->
1492 "&#", chars( Codes ), ";".
1493
1494 /* cdata_generation( +Chars ) is a DCG representing Chars, a list of character
1495 * codes as a legal XML CDATA string. Any character codes disallowed by the XML
1496 * specification are not encoded.
1497 */
1498 cdata_generation( [] ) --> "".
1499 cdata_generation( [Char|Chars] ) -->
1500 ( {legal_xml_unicode( Char )}, !, [Char]
1501 | ""
1502 ),
1503 cdata_generation( Chars ).
1504
1505 legal_xml_unicode( 9 ).
1506 legal_xml_unicode( 10 ).
1507 legal_xml_unicode( 13 ).
1508 legal_xml_unicode( Code ) :-
1509 Code >= 32,
1510 Code =< 55295.
1511 legal_xml_unicode( Code ) :-
1512 Code >= 57344,
1513 Code =< 65533.
1514 legal_xml_unicode( Code ) :-
1515 Code >= 65536,
1516 Code =< 1114111.
1517
1518 /* xml_pp: "pretty print" an XML Document on the current output stream.
1519 *
1520 * Current Release: 1.9
1521 */
1522 /* xml_pp( +XMLDocument ) "pretty prints" XMLDocument on the current
1523 * output stream.
1524 */
1525 xml_pp( xml(Attributes, Document) ) :-
1526 write( 'xml( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1527 xml_pp_list( Document, "\t" ),
1528 format( ' ).~n', [] ).
1529 xml_pp( malformed(Attributes, Document) ) :-
1530 write( 'malformed( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1531 xml_pp_list( Document, "\t" ),
1532 format( ' ).~n', [] ).
1533
1534 xml_pp_indented( [], Indent ) :-
1535 format( '~s[]', [Indent] ).
1536 xml_pp_indented( List, Indent ) :-
1537 List = [_|_],
1538 format( '~s', [Indent] ),
1539 xml_pp_list( List, Indent ).
1540 xml_pp_indented( comment(Text), Indent ) :-
1541 format( '~scomment(', [Indent] ), pp_string(Text), put_code( 0') ).
1542 xml_pp_indented( namespace(URI,Prefix,Element), Indent ) :-
1543 format( '~snamespace( ~q, "~s",~n', [Indent,URI,Prefix] ),
1544 xml_pp_indented( Element, [0'\t|Indent] ),
1545 format( '~n~s)', [[0'\t|Indent]] ).
1546 xml_pp_indented( element(Tag,Attributes,Contents), Indent ) :-
1547 format( '~selement( ~q,~n', [Indent,Tag] ),
1548 pp_attributes( Attributes, [0'\t|Indent] ), put_code(0',), nl,
1549 xml_pp_list( Contents, [0'\t|Indent] ), write( ' )' ).
1550 xml_pp_indented( instructions(Target, Processing), Indent ) :-
1551 format( '~sinstructions( ~q, ', [Indent,Target] ),
1552 pp_string(Processing), put_code( 0') ).
1553 xml_pp_indented( doctype(Name, DoctypeId), Indent ) :-
1554 format( '~sdoctype( ~q, ', [Indent,Name] ),
1555 xml_pp_indented( DoctypeId, [0'\t|Indent] ),
1556 write( ' )' ).
1557 xml_pp_indented( cdata(CData), Indent ) :-
1558 format( '~scdata(', [Indent] ), pp_string(CData), put_code( 0') ).
1559 xml_pp_indented( pcdata(PCData), Indent ) :-
1560 format( '~spcdata(', [Indent] ), pp_string(PCData), put_code( 0') ).
1561 xml_pp_indented( public(URN,URL), _Indent ) :-
1562 format( 'public( "~s", "~s" )', [URN,URL] ).
1563 xml_pp_indented( public(URN,URL,Literals), Indent ) :-
1564 format( 'public( "~s", "~s",~n', [URN,URL] ),
1565 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1566 xml_pp_indented( system(URL), _Indent ) :-
1567 format( 'system( "~s" )', [URL] ).
1568 xml_pp_indented( system(URL,Literals), Indent ) :-
1569 format( 'system( "~s",~n', [URL] ),
1570 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1571 xml_pp_indented( local, _Indent ) :-
1572 write( local ).
1573 xml_pp_indented( local(Literals), Indent ) :-
1574 write( 'local(' ), nl,
1575 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1576 xml_pp_indented( dtd_literal(String), Indent ) :-
1577 format( '~sdtd_literal(', [Indent] ), pp_string(String), put_code( 0') ). %'
1578 xml_pp_indented( out_of_context(Tag), Indent ) :-
1579 format( '~s/* SYNTAX ERROR */ out_of_context( ~q )', [Indent,Tag] ).
1580 xml_pp_indented( unparsed(String), Indent ) :-
1581 format( '~s/* SYNTAX ERROR */ unparsed( ', [Indent] ),
1582 pp_string(String), put_code( 0') ).
1583
1584 xml_pp_list( [], Indent ) :-
1585 format( '~s[]', [Indent] ).
1586 xml_pp_list( [H|T], Indent ) :-
1587 format( '~s[~n', [Indent] ),
1588 xml_pp_indented( H, Indent ),
1589 xml_pp_list1( T, Indent ),
1590 format( '~s]', [Indent] ).
1591
1592 xml_pp_list1( [], _Indent ) :-
1593 nl.
1594 xml_pp_list1( [H|T], Indent ) :-
1595 put_code( 0', ), nl,
1596 xml_pp_indented( H, Indent ),
1597 xml_pp_list1( T, Indent ).
1598
1599 % PATCH LEUSCHEL:
1600 pp_attributes( Attributes, Indent ) :-
1601 (select('$attribute_linefeeds'=_Count,Attributes,Rest)
1602 -> pp_attributes2(Rest,Indent) % TO DO: insert linefeeds
1603 ; pp_attributes2(Attributes,Indent)).
1604
1605 pp_attributes2( [], Indent ) :-
1606 format( '~s[]', [Indent] ).
1607 pp_attributes2( [Attribute|Attributes], Indent ) :-
1608 format( '~s[', [Indent] ),
1609 pp_attributes1( Attributes, Attribute ),
1610 put_code( 0'] ).
1611 % END PATCH
1612 pp_attributes1( [], Name=Value ) :-
1613 format( '~q=', [Name] ), pp_string( Value ).
1614 pp_attributes1( [H|T], Name=Value ) :-
1615 format( '~q=', [Name] ), pp_string( Value ), write( ', ' ),
1616 pp_attributes1( T, H ).
1617 /* XML Utilities
1618 *
1619 * $Revision$
1620 */
1621
1622 % Entity and Namespace map operations: these maps are usually quite small, so
1623 % a linear list lookup is okay. They could be substituted by a logarithmic
1624 % data structure - in extremis.
1625
1626 /* empty_map( ?Map ) is true if Map is a null map.
1627 */
1628 empty_map( [] ).
1629
1630 /* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure
1631 * which records the pair Key-Data. Key must be ground.
1632 */
1633 map_member( Key0, [Key1-Data1|Rest], Data0 ) :-
1634 ( Key0 == Key1 ->
1635 Data0 = Data1
1636 ; Key0 @> Key1 ->
1637 map_member( Key0, Rest, Data0 )
1638 ).
1639
1640 /* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map
1641 * structure, Key must be ground, and Map1 is identical to Map0 except that
1642 * the pair Key-Data is recorded by Map1.
1643 */
1644 map_store( [], Key, Data, [Key-Data] ).
1645 map_store( [Key0-Data0|Map0], Key, Data, Map ) :-
1646 ( Key == Key0 ->
1647 Map = [Key-Data|Map0]
1648 ; Key @< Key0 ->
1649 Map = [Key-Data,Key0-Data0|Map0]
1650 ; % >
1651 Map = [Key0-Data0|Map1],
1652 map_store( Map0, Key, Data, Map1 )
1653 ).
1654
1655 /* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces )
1656 * is an ADT hiding the "state" arguments for XML Acquisition
1657 */
1658 initial_context(
1659 opt(Fmt,Ext,RemoveAttributePrefixes),
1660 context(void,PreserveSpace,'','',Entities,Empty,
1661 RemoveAttributePrefixes)
1662 ) :-
1663 empty_map( Empty ),
1664 ( Ext==false ->
1665 Entities = Empty
1666 ;
1667 extended_character_entities(Entities)
1668 ),
1669 ( Fmt==false ->
1670 PreserveSpace = true
1671 ;
1672 PreserveSpace = false
1673 ).
1674
1675 context_update( current_namespace, Context0, URI, Context1 ) :-
1676 Context0 = context(Element,Preserve,_Current,Default,Entities,
1677 Namespaces,RemoveAttributePrefixes),
1678 Context1 = context(Element,Preserve,URI,Default,Entities,
1679 Namespaces,RemoveAttributePrefixes).
1680 context_update( element, Context0, Tag, Context1 ) :-
1681 Context0 = context(_Element,Preserve,Current,Default,Entities,
1682 Namespaces,RemoveAttributePrefixes),
1683 Context1 = context(tag(Tag),Preserve,Current,Default,Entities,
1684 Namespaces,RemoveAttributePrefixes).
1685 context_update( default_namespace, Context0, URI, Context1 ):-
1686 Context0 = context(Element,Preserve,Current,_Default,Entities,
1687 Namespaces,RemoveAttributePrefixes),
1688 Context1 = context(Element,Preserve,Current,URI,Entities,
1689 Namespaces,RemoveAttributePrefixes).
1690 context_update( space_preserve, Context0, Boolean, Context1 ):-
1691 Context0 = context(Element,_Preserve,Current,Default,Entities,
1692 Namespaces,RemoveAttributePrefixes),
1693 Context1 = context(Element,Boolean,Current,Default,Entities,
1694 Namespaces,RemoveAttributePrefixes).
1695 context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :-
1696 Context0 = context(Element,Preserve,Current,Default,Entities,
1697 Namespaces0,RemoveAttributePrefixes),
1698 Context1 = context(Element,Preserve,Current,Default,Entities,
1699 Namespaces1,RemoveAttributePrefixes),
1700 map_store( Namespaces0, Prefix, URI, Namespaces1 ).
1701 context_update( entity(Name), Context0, String, Context1 ) :-
1702 Context0 = context(Element,Preserve,Current,Default,Entities0,
1703 Namespaces,RemoveAttributePrefixes),
1704 Context1 = context(Element,Preserve,Current,Default,Entities1,
1705 Namespaces,RemoveAttributePrefixes),
1706 map_store( Entities0, Name, String, Entities1 ).
1707
1708 remove_attribute_prefixes( Context ) :-
1709 Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1710 _Namespaces,true).
1711
1712 current_tag( Context, Tag ) :-
1713 Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities,
1714 _Namespaces,_RPFA).
1715
1716 current_namespace( Context, Current ) :-
1717 Context = context(_Element,_Preserve,Current,_Default,_Entities,
1718 _Namespaces,_RPFA).
1719
1720 default_namespace( Context, Default ) :-
1721 Context = context(_Element,_Preserve,_Current,Default,_Entities,
1722 _Namespaces,_RPFA).
1723
1724 space_preserve( Context ) :-
1725 Context = context(_Element,true,_Current,_Default,_Entities,
1726 _Namespaces,_RPFA).
1727
1728 specific_namespace( Prefix, Context, URI ) :-
1729 Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1730 Namespaces,_RPFA),
1731 map_member( Prefix, Namespaces, URI ).
1732
1733 defined_entity( Reference, Context, String ) :-
1734 Context = context(_Element,_Preserve,_Current,_Default,Entities,
1735 _Namespaces,_RPFA),
1736 map_member( Reference, Entities, String ).
1737
1738 close_context( Context, Terms, WellFormed ) :-
1739 Context = context(Element,_Preserve,_Current,_Default,_Entities,
1740 _Namespaces,_RPFA),
1741 close_context1( Element, Terms, WellFormed ).
1742
1743 close_context1( void, [], true ).
1744 close_context1( tag(TagChars), [out_of_context(Tag)], false ) :-
1745 atom_codes( Tag, TagChars ).
1746
1747 void_context(
1748 context(void,_Preserve,_Current,_Default,_Entities,_Namespaces)
1749 ).
1750
1751 /* pp_string( +String ) prints String onto the current output stream.
1752 * If String contains only 7-bit chars it is printed in shorthand quoted
1753 * format, otherwise it is written as a list.
1754 * If your Prolog uses " to delimit a special string type, just use write/1.
1755 */
1756 % [MC] rewritten
1757 pp_string(Chars) :-
1758 put_code(0'"),
1759 pp_string1(Chars),
1760 put_code(0'").
1761
1762 pp_string1([]).
1763 pp_string1([Char|Chars]) :-
1764 pp_string2(Char),
1765 pp_string1(Chars).
1766
1767 pp_string2(0'\a) :- !,
1768 put_code(0'\\),
1769 put_code(0'a).
1770 pp_string2(0'\b) :- !,
1771 put_code(0'\\),
1772 put_code(0'b).
1773 pp_string2(0'\t) :- !,
1774 put_code(0'\\),
1775 put_code(0't).
1776 pp_string2(0'\n) :- !,
1777 put_code(0'\\),
1778 put_code(0'n).
1779 pp_string2(0'\v) :- !,
1780 put_code(0'\\),
1781 put_code(0'v).
1782 pp_string2(0'\f) :- !,
1783 put_code(0'\\),
1784 put_code(0'f).
1785 pp_string2(0'\r) :- !,
1786 put_code(0'\\),
1787 put_code(0'r).
1788 pp_string2(0'\e) :- !,
1789 put_code(0'\\),
1790 put_code(0'e).
1791 pp_string2(0'\") :- !,
1792 put_code(0'\\),
1793 put_code(0'").
1794 pp_string2(0'\\) :- !,
1795 put_code(0'\\),
1796 put_code(0'\\).
1797 pp_string2(Char) :-
1798 Char>=32, Char=<126, !,
1799 put_code(Char).
1800 pp_string2(Char) :-
1801 format('\\~8r\\', [Char]).
1802
1803 xml_declaration_attributes_valid( [] ).
1804 xml_declaration_attributes_valid( [Name=Value|Attributes] ) :-
1805 ? xml_declaration_attribute_valid( Name, Value ),
1806 ? xml_declaration_attributes_valid( Attributes ).
1807
1808 xml_declaration_attribute_valid( Name, Value ) :-
1809 lowercase( Value, Lowercase ),
1810 debug_format(19,'Checking xml declaration attribute ~s=~s~n',[Name,Lowercase]),
1811 ? if(canonical_xml_declaration_attribute( Name, Lowercase ), true,
1812 (format(user_error,'Illegal <xml> declaration attribute: ~s = ~s~n',[Name,Value]),
1813 (valid_declaration_attribute_name(Name)
1814 -> ajoin(['Ignoring illegal value for attribute ',Name,': '],Msg),
1815 atom_codes(AValue,Value),
1816 add_xml_warning(Msg,AValue)
1817 ; add_xml_warning('Ignoring illegal <xml> declaration attribute: ',Name)
1818 ),
1819 fail
1820 )). % PATCH LEUSCHEL: add warning
1821
1822 % PATCH LEUSCHEL:
1823 valid_declaration_attribute_name(version).
1824 valid_declaration_attribute_name(standalone).
1825 valid_declaration_attribute_name(encoding).
1826
1827
1828 canonical_xml_declaration_attribute( version, "1.0" ).
1829 canonical_xml_declaration_attribute( standalone, "yes" ).
1830 canonical_xml_declaration_attribute( standalone, "no" ).
1831 canonical_xml_declaration_attribute( encoding, "utf-8" ).
1832 canonical_xml_declaration_attribute( encoding, "utf-16" ).
1833 canonical_xml_declaration_attribute( encoding, "ascii" ).
1834 canonical_xml_declaration_attribute( encoding, "iso-8859-1" ).
1835 canonical_xml_declaration_attribute( encoding, "iso-8859-2" ).
1836 canonical_xml_declaration_attribute( encoding, "iso-8859-15" ).
1837 canonical_xml_declaration_attribute( encoding, "windows-1252" ).
1838
1839 % PATCH LEUSCHEL: some more sets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
1840 %canonical_xml_declaration_attribute( encoding, "iso-8859-3" ).
1841 %canonical_xml_declaration_attribute( encoding, "iso-8859-4" ).
1842 canonical_xml_declaration_attribute( encoding, Encoding ) :- atom_codes(Atom,Encoding),
1843 add_xml_warning('Unsupported xml encoding (which can lead to unexpected results): ',Atom).
1844
1845
1846
1847 /* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are
1848 * lists of character codes, and Lowercase is identical to MixedCase with
1849 * every uppercase character replaced by its lowercase equivalent.
1850 */
1851 lowercase( [], [] ).
1852 lowercase( [Char|Chars], [Lower|LowerCase] ) :-
1853 ( Char >= 0'A, Char =< 0'Z
1854 ->
1855 Lower is Char + 0'a - 0'A
1856 ;
1857 Lower = Char
1858 ),
1859 lowercase( Chars, LowerCase ).
1860
1861 :- dynamic extended_character_entities/1. % no point compiling it
1862 extended_character_entities( [
1863 "Aacute"-[193], % latin capital letter A with acute,
1864 "aacute"-[225], % latin small letter a with acute,
1865 "Acirc"-[194], % latin capital letter A with circumflex,
1866 "acirc"-[226], % latin small letter a with circumflex,
1867 "acute"-[180], % acute accent = spacing acute,
1868 "AElig"-[198], % latin capital letter AE
1869 "aelig"-[230], % latin small letter ae
1870 "Agrave"-[192], % latin capital letter A with grave
1871 "agrave"-[224], % latin small letter a with grave
1872 "alefsym"-[8501], % alef symbol = first transfinite cardinal,
1873 "Alpha"-[913], % greek capital letter alpha, U+0391
1874 "alpha"-[945], % greek small letter alpha,
1875 "and"-[8743], % logical and = wedge, U+2227 ISOtech
1876 "ang"-[8736], % angle, U+2220 ISOamso
1877 "Aring"-[197], % latin capital letter A with ring above
1878 "aring"-[229], % latin small letter a with ring above
1879 "asymp"-[8776], % almost equal to = asymptotic to,
1880 "Atilde"-[195], % latin capital letter A with tilde,
1881 "atilde"-[227], % latin small letter a with tilde,
1882 "Auml"-[196], % latin capital letter A with diaeresis,
1883 "auml"-[228], % latin small letter a with diaeresis,
1884 "bdquo"-[8222], % double low-9 quotation mark, U+201E NEW
1885 "Beta"-[914], % greek capital letter beta, U+0392
1886 "beta"-[946], % greek small letter beta, U+03B2 ISOgrk3
1887 "brvbar"-[166], % broken bar = broken vertical bar,
1888 "bull"-[8226], % bullet = black small circle,
1889 "cap"-[8745], % intersection = cap, U+2229 ISOtech
1890 "Ccedil"-[199], % latin capital letter C with cedilla,
1891 "ccedil"-[231], % latin small letter c with cedilla,
1892 "cedil"-[184], % cedilla = spacing cedilla, U+00B8 ISOdia>
1893 "cent"-[162], % cent sign, U+00A2 ISOnum>
1894 "Chi"-[935], % greek capital letter chi, U+03A7
1895 "chi"-[967], % greek small letter chi, U+03C7 ISOgrk3
1896 "circ"-[710], % modifier letter circumflex accent,
1897 "clubs"-[9827], % black club suit = shamrock,
1898 "cong"-[8773], % approximately equal to, U+2245 ISOtech
1899 "copy"-[169], % copyright sign, U+00A9 ISOnum>
1900 "crarr"-[8629], % downwards arrow with corner leftwards
1901 "cup"-[8746], % union = cup, U+222A ISOtech
1902 "curren"-[164], % currency sign, U+00A4 ISOnum>
1903 "dagger"-[8224], % dagger, U+2020 ISOpub
1904 "Dagger"-[8225], % double dagger, U+2021 ISOpub
1905 "darr"-[8595], % downwards arrow, U+2193 ISOnum
1906 "dArr"-[8659], % downwards double arrow, U+21D3 ISOamsa
1907 "deg"-[176], % degree sign, U+00B0 ISOnum>
1908 "Delta"-[916], % greek capital letter delta,
1909 "delta"-[948], % greek small letter delta,
1910 "diams"-[9830], % black diamond suit, U+2666 ISOpub
1911 "divide"-[247], % division sign, U+00F7 ISOnum>
1912 "Eacute"-[201], % latin capital letter E with acute,
1913 "eacute"-[233], % latin small letter e with acute,
1914 "Ecirc"-[202], % latin capital letter E with circumflex,
1915 "ecirc"-[234], % latin small letter e with circumflex,
1916 "Egrave"-[200], % latin capital letter E with grave,
1917 "egrave"-[232], % latin small letter e with grave,
1918 "empty"-[8709], % empty set = null set = diameter,
1919 "emsp"-[8195], % em space, U+2003 ISOpub
1920 "ensp"-[8194], % en space, U+2002 ISOpub
1921 "Epsilon"-[917], % greek capital letter epsilon, U+0395
1922 "epsilon"-[949], % greek small letter epsilon,
1923 "equiv"-[8801], % identical to, U+2261 ISOtech
1924 "Eta"-[919], % greek capital letter eta, U+0397
1925 "eta"-[951], % greek small letter eta, U+03B7 ISOgrk3
1926 "ETH"-[208], % latin capital letter ETH, U+00D0 ISOlat1>
1927 "eth"-[240], % latin small letter eth, U+00F0 ISOlat1>
1928 "Euml"-[203], % latin capital letter E with diaeresis,
1929 "euml"-[235], % latin small letter e with diaeresis,
1930 "euro"-[8364], % euro sign, U+20AC NEW
1931 "exist"-[8707], % there exists, U+2203 ISOtech
1932 "fnof"-[402], % latin small f with hook = function
1933 "forall"-[8704], % for all, U+2200 ISOtech
1934 "frac12"-[189], % vulgar fraction one half
1935 "frac14"-[188], % vulgar fraction one quarter
1936 "frac34"-[190], % vulgar fraction three quarters
1937 "frasl"-[8260], % fraction slash, U+2044 NEW
1938 "Gamma"-[915], % greek capital letter gamma,
1939 "gamma"-[947], % greek small letter gamma,
1940 "ge"-[8805], % greater-than or equal to,
1941 "harr"-[8596], % left right arrow, U+2194 ISOamsa
1942 "hArr"-[8660], % left right double arrow,
1943 "hearts"-[9829], % black heart suit = valentine,
1944 "hellip"-[8230], % horizontal ellipsis = three dot leader,
1945 "Iacute"-[205], % latin capital letter I with acute,
1946 "iacute"-[237], % latin small letter i with acute,
1947 "Icirc"-[206], % latin capital letter I with circumflex,
1948 "icirc"-[238], % latin small letter i with circumflex,
1949 "iexcl"-[161], % inverted exclamation mark, U+00A1 ISOnum>
1950 "Igrave"-[204], % latin capital letter I with grave,
1951 "igrave"-[236], % latin small letter i with grave,
1952 "image"-[8465], % blackletter capital I = imaginary part,
1953 "infin"-[8734], % infinity, U+221E ISOtech
1954 "int"-[8747], % integral, U+222B ISOtech
1955 "Iota"-[921], % greek capital letter iota, U+0399
1956 "iota"-[953], % greek small letter iota, U+03B9 ISOgrk3
1957 "iquest"-[191], % inverted question mark
1958 "isin"-[8712], % element of, U+2208 ISOtech
1959 "Iuml"-[207], % latin capital letter I with diaeresis,
1960 "iuml"-[239], % latin small letter i with diaeresis,
1961 "Kappa"-[922], % greek capital letter kappa, U+039A
1962 "kappa"-[954], % greek small letter kappa,
1963 "Lambda"-[923], % greek capital letter lambda,
1964 "lambda"-[955], % greek small letter lambda,
1965 "lang"-[9001], % left-pointing angle bracket = bra,
1966 "laquo"-[171], % left-pointing double angle quotation mark
1967 "larr"-[8592], % leftwards arrow, U+2190 ISOnum
1968 "lArr"-[8656], % leftwards double arrow, U+21D0 ISOtech
1969 "lceil"-[8968], % left ceiling = apl upstile,
1970 "ldquo"-[8220], % left double quotation mark,
1971 "le"-[8804], % less-than or equal to, U+2264 ISOtech
1972 "lfloor"-[8970], % left floor = apl downstile,
1973 "lowast"-[8727], % asterisk operator, U+2217 ISOtech
1974 "loz"-[9674], % lozenge, U+25CA ISOpub
1975 "lrm"-[8206], % left-to-right mark, U+200E NEW RFC 2070
1976 "lsaquo"-[8249], % single left-pointing angle quotation mark,
1977 "lsquo"-[8216], % left single quotation mark,
1978 "macr"-[175], % macron = spacing macron = overline
1979 "mdash"-[8212], % em dash, U+2014 ISOpub
1980 "micro"-[181], % micro sign, U+00B5 ISOnum>
1981 "middot"-[183], % middle dot = Georgian comma
1982 "minus"-[8722], % minus sign, U+2212 ISOtech
1983 "Mu"-[924], % greek capital letter mu, U+039C
1984 "mu"-[956], % greek small letter mu, U+03BC ISOgrk3
1985 "nabla"-[8711], % nabla = backward difference,
1986 "nbsp"-[160], % no-break space = non-breaking space,
1987 "ndash"-[8211], % en dash, U+2013 ISOpub
1988 "ne"-[8800], % not equal to, U+2260 ISOtech
1989 "ni"-[8715], % contains as member, U+220B ISOtech
1990 "not"-[172], % not sign, U+00AC ISOnum>
1991 "notin"-[8713], % not an element of, U+2209 ISOtech
1992 "nsub"-[8836], % not a subset of, U+2284 ISOamsn
1993 "Ntilde"-[209], % latin capital letter N with tilde,
1994 "ntilde"-[241], % latin small letter n with tilde,
1995 "Nu"-[925], % greek capital letter nu, U+039D
1996 "nu"-[957], % greek small letter nu, U+03BD ISOgrk3
1997 "Oacute"-[211], % latin capital letter O with acute,
1998 "oacute"-[243], % latin small letter o with acute,
1999 "Ocirc"-[212], % latin capital letter O with circumflex,
2000 "ocirc"-[244], % latin small letter o with circumflex,
2001 "OElig"-[338], % latin capital ligature OE,
2002 "oelig"-[339], % latin small ligature oe, U+0153 ISOlat2
2003 "Ograve"-[210], % latin capital letter O with grave,
2004 "ograve"-[242], % latin small letter o with grave,
2005 "oline"-[8254], % overline = spacing overscore,
2006 "Omega"-[937], % greek capital letter omega,
2007 "omega"-[969], % greek small letter omega,
2008 "Omicron"-[927], % greek capital letter omicron, U+039F
2009 "omicron"-[959], % greek small letter omicron, U+03BF NEW
2010 "oplus"-[8853], % circled plus = direct sum,
2011 "or"-[8744], % logical or = vee, U+2228 ISOtech
2012 "ordf"-[170], % feminine ordinal indicator, U+00AA ISOnum>
2013 "ordm"-[186], % masculine ordinal indicator,
2014 "Oslash"-[216], % latin capital letter O with stroke
2015 "oslash"-[248], % latin small letter o with stroke,
2016 "Otilde"-[213], % latin capital letter O with tilde,
2017 "otilde"-[245], % latin small letter o with tilde,
2018 "otimes"-[8855], % circled times = vector product,
2019 "Ouml"-[214], % latin capital letter O with diaeresis,
2020 "ouml"-[246], % latin small letter o with diaeresis,
2021 "para"-[182], % pilcrow sign = paragraph sign,
2022 "part"-[8706], % partial differential, U+2202 ISOtech
2023 "permil"-[8240], % per mille sign, U+2030 ISOtech
2024 "perp"-[8869], % up tack = orthogonal to = perpendicular,
2025 "Phi"-[934], % greek capital letter phi,
2026 "phi"-[966], % greek small letter phi, U+03C6 ISOgrk3
2027 "Pi"-[928], % greek capital letter pi, U+03A0 ISOgrk3
2028 "pi"-[960], % greek small letter pi, U+03C0 ISOgrk3
2029 "piv"-[982], % greek pi symbol, U+03D6 ISOgrk3
2030 "plusmn"-[177], % plus-minus sign = plus-or-minus sign,
2031 "pound"-[163], % pound sign, U+00A3 ISOnum>
2032 "prime"-[8242], % prime = minutes = feet, U+2032 ISOtech
2033 "Prime"-[8243], % double prime = seconds = inches,
2034 "prod"-[8719], % n-ary product = product sign,
2035 "prop"-[8733], % proportional to, U+221D ISOtech
2036 "Psi"-[936], % greek capital letter psi,
2037 "psi"-[968], % greek small letter psi, U+03C8 ISOgrk3
2038 "radic"-[8730], % square root = radical sign,
2039 "rang"-[9002], % right-pointing angle bracket = ket,
2040 "raquo"-[187], % right-pointing double angle quotation mark
2041 "rarr"-[8594], % rightwards arrow, U+2192 ISOnum
2042 "rArr"-[8658], % rightwards double arrow,
2043 "rceil"-[8969], % right ceiling, U+2309 ISOamsc
2044 "rdquo"-[8221], % right double quotation mark,
2045 "real"-[8476], % blackletter capital R = real part symbol,
2046 "reg"-[174], % registered sign = registered trade mark sign,
2047 "rfloor"-[8971], % right floor, U+230B ISOamsc
2048 "Rho"-[929], % greek capital letter rho, U+03A1
2049 "rho"-[961], % greek small letter rho, U+03C1 ISOgrk3
2050 "rlm"-[8207], % right-to-left mark, U+200F NEW RFC 2070
2051 "rsaquo"-[8250], % single right-pointing angle quotation mark,
2052 "rsquo"-[8217], % right single quotation mark,
2053 "sbquo"-[8218], % single low-9 quotation mark, U+201A NEW
2054 "Scaron"-[352], % latin capital letter S with caron,
2055 "scaron"-[353], % latin small letter s with caron,
2056 "sdot"-[8901], % dot operator, U+22C5 ISOamsb
2057 "sect"-[167], % section sign, U+00A7 ISOnum>
2058 "shy"-[173], % soft hyphen = discretionary hyphen,
2059 "Sigma"-[931], % greek capital letter sigma,
2060 "sigma"-[963], % greek small letter sigma,
2061 "sigmaf"-[962], % greek small letter final sigma,
2062 "sim"-[8764], % tilde operator = varies with = similar to,
2063 "spades"-[9824], % black spade suit, U+2660 ISOpub
2064 "sub"-[8834], % subset of, U+2282 ISOtech
2065 "sube"-[8838], % subset of or equal to, U+2286 ISOtech
2066 "sum"-[8721], % n-ary sumation, U+2211 ISOamsb
2067 "sup"-[8835], % superset of, U+2283 ISOtech
2068 "sup1"-[185], % superscript one = superscript digit one,
2069 "sup2"-[178], % superscript two = superscript digit two
2070 "sup3"-[179], % superscript three = superscript digit three
2071 "supe"-[8839], % superset of or equal to,
2072 "szlig"-[223], % latin small letter sharp s = ess-zed,
2073 "Tau"-[932], % greek capital letter tau, U+03A4
2074 "tau"-[964], % greek small letter tau, U+03C4 ISOgrk3
2075 "there4"-[8756], % therefore, U+2234 ISOtech
2076 "Theta"-[920], % greek capital letter theta,
2077 "theta"-[952], % greek small letter theta,
2078 "thetasym"-[977], % greek small letter theta symbol,
2079 "thinsp"-[8201], % thin space, U+2009 ISOpub
2080 "THORN"-[222], % latin capital letter THORN,
2081 "thorn"-[254], % latin small letter thorn with,
2082 "tilde"-[732], % small tilde, U+02DC ISOdia
2083 "times"-[215], % multiplication sign, U+00D7 ISOnum>
2084 "trade"-[8482], % trade mark sign, U+2122 ISOnum
2085 "Uacute"-[218], % latin capital letter U with acute,
2086 "uacute"-[250], % latin small letter u with acute,
2087 "uarr"-[8593], % upwards arrow, U+2191 ISOnum
2088 "uArr"-[8657], % upwards double arrow, U+21D1 ISOamsa
2089 "Ucirc"-[219], % latin capital letter U with circumflex,
2090 "ucirc"-[251], % latin small letter u with circumflex,
2091 "Ugrave"-[217], % latin capital letter U with grave,
2092 "ugrave"-[249], % latin small letter u with grave,
2093 "uml"-[168], % diaeresis = spacing diaeresis,
2094 "upsih"-[978], % greek upsilon with hook symbol,
2095 "Upsilon"-[933], % greek capital letter upsilon,
2096 "upsilon"-[965], % greek small letter upsilon,
2097 "Uuml"-[220], % latin capital letter U with diaeresis,
2098 "uuml"-[252], % latin small letter u with diaeresis,
2099 "weierp"-[8472], % script capital P = power set
2100 "Xi"-[926], % greek capital letter xi, U+039E ISOgrk3
2101 "xi"-[958], % greek small letter xi, U+03BE ISOgrk3
2102 "Yacute"-[221], % latin capital letter Y with acute,
2103 "yacute"-[253], % latin small letter y with acute,
2104 "yen"-[165], % yen sign = yuan sign, U+00A5 ISOnum>
2105 "yuml"-[255], % latin small letter y with diaeresis,
2106 "Yuml"-[376], % latin capital letter Y with diaeresis,
2107 "Zeta"-[918], % greek capital letter zeta, U+0396
2108 "zeta"-[950], % greek small letter zeta, U+03B6 ISOgrk3
2109 "zwj"-[8205], % zero width joiner, U+200D NEW RFC 2070
2110 "zwnj"-[8204] % zero width non-joiner,
2111 ] ).
2112
2113 /* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to
2114 * copy the list Chars inline.
2115 *
2116 * This is best expressed in terms of append/3 where append/3 is built-in.
2117 * For other Prologs, a straightforward specification can be used:
2118 *
2119 * chars( [] ) --> "".
2120 * chars( [Char|Chars] ) -->
2121 * [Char],
2122 * chars( Chars ).
2123 */
2124 chars( Chars, Plus, Minus ) :-
2125 append( Chars, Minus, Plus ).