1 /* xml_prob.pl : Contains xml_parse/[2,3] a bi-directional XML parser written in
2 * Prolog.
3 *
4 * Copyright (C) 2001-2005 Binding Time Limited
5 * Copyright (C) 2005 John Fletcher
6 *
7 * Current Release: $Revision$
8 *
9 * TERMS AND CONDITIONS:
10 *
11 * This program is offered free of charge, as unsupported source code. You may
12 * use it, copy it, distribute it, modify it or sell it without restriction,
13 * but entirely at your own risk.
14 *
15 */
16
17 % minimally adapted for SICStus Prolog
18 % Mats Carlsson, 2003-2006
19 % added linefeed counting for attributes (attributes_count_lf,spaces_count_linebreaks)
20 % Michael Leuschel, 2017-2022 (changes marked by PATCH LEUSCHEL)
21 % replaces library(xml) for SICStus
22
23 :- module( xml_prob, [
24 xml_parse/2,
25 xml_parse/3,
26 xml_subterm/2,
27 xml_pp/1,
28
29 xml_parse/4
30 ]).
31
32 :- use_module(module_information,[module_info/2]).
33 :- module_info(group,external_functions).
34 :- module_info(description,'This module transforms XML into a Prolog representation, adaptation of SICStus library module.').
35
36 :- use_module(probsrc(error_manager)).
37 :- use_module(probsrc(debug),[debug_format/3]).
38 :- use_module(probsrc(tools_strings),[ajoin/2]).
39
40 % added by Leuschel:
41 xml_parse( Chars, Document, Controls, Span ) :-
42 bb_put(xml_prob_current_span,Span),
43 call_cleanup(xml_parse( Chars, Document, Controls),
44 bb_delete(xml_prob_current_span,_)).
45
46 add_xml_warning(Msg,Term) :-
47 bb_get(xml_prob_current_span,Span),!,
48 add_warning(xml_prob,Msg,Term,Span).
49 add_xml_warning(Msg,Term) :-
50 add_warning(xml_prob,Msg,Term).
51
52 add_xml_error(Msg,Term) :-
53 bb_get(xml_prob_current_span,Span),!,
54 add_error(xml_prob,Msg,Term,Span).
55 add_xml_error(Msg,Term) :-
56 add_error(xml_prob,Msg,Term).
57 :- use_module(library(lists),[append/2]).
58 expect_tokens(Tokens,_) --> Tokens,!.
59 expect_tokens(Tokens,Ctxt,Input,_) :-
60 append(["XML parse error, expected '",Tokens,"' in ",Ctxt,", but got:"],Codes),
61 atom_codes(Msg,Codes),
62 length(Tokens,Len),
63 get_next_tokens(Input,Len,Nxt),
64 atom_codes(NxtAtom,Nxt),
65 add_xml_error(Msg,NxtAtom), % TO DO: extract next tokens
66 fail.
67
68 get_next_tokens(_,N,R) :- N<1,!, R=[].
69 get_next_tokens(Var,_,Res) :- var(Var),!, Res="<EOF>".
70 get_next_tokens([],_,"<EOF>").
71 get_next_tokens([H|T],Nr,[H|R]) :- N1 is Nr-1, get_next_tokens(T,N1,R).
72
73 :- use_module(library(types), [
74 must_be/4,
75 illarg/3,
76 illarg/4
77 ]).
78 :- use_module(library(lists), [
79 select/3,
80 is_list/1
81 ]).
82
83 :- set_prolog_flag(double_quotes, codes).
84
85 %@ This is a package for parsing XML with Prolog, which
86 %@ provides Prolog applications with a simple ``Document Value Model''
87 %@ interface to XML documents. A description of the subset of XML that it
88 %@ supports can be found at:
89 %@ @c [PM] 4.1 link updated 2009-08-14
90 %@ @uref{http://www.binding-time.co.uk/xmlpl.html}
91 %@
92 %@ The package, originally written by Binding Time Ltd., is in the public
93 %@ domain and unsupported. To use the package, enter the query:
94 %@
95 %@ @example
96 %@ @group
97 %@ | ?- use_module(library(xml)).
98 %@ @end group
99 %@ @end example
100 %@
101 %@ The package represents XML documents by the abstract data type
102 %@ @var{document}, which is defined by the following grammar:
103 %@
104 %@ @multitable @columnfractions .2 .3 .5
105 %@ @item @var{document} @tab ::= @code{xml(@var{attributes},@var{content})} @tab @r{@{ well-formed document @}}
106 %@ @item @tab | @code{malformed(@var{attributes},@var{content})} @tab @r{@{ malformed document @}}
107 %@ @item
108 %@ @item @var{attributes} @tab ::= @code{[]}
109 %@ @item @tab | @code{[@var{name}=@var{char-data}|@var{attributes}]}
110 %@ @item
111 %@ @item @var{content} @tab ::= @code{[]}
112 %@ @item @tab | @code{[@var{cterm}|@var{content}]}
113 %@ @item
114 %@ @item @var{cterm} @tab ::= @code{pcdata(@var{char-data})} @tab @r{@{ text @}}
115 %@ @item @tab | @code{comment(@var{char-data})} @tab @r{@{ an XML comment @}}
116 %@ @item @tab | @code{namespace(@var{URI},@var{prefix},@var{element})} @tab @r{@{ a Namespace @}}
117 %@ @item @tab | @code{element(@var{tag},@var{attributes},@var{content})} @tab @r{@{ <@var{tag}>..</@var{tag}> encloses @var{content} or <@var{tag} /> if empty @}}
118 %@ @item @tab | @code{instructions(@var{name},@var{char-data})} @tab @r{@{ A PI <? @var{name} @var{char-data} ?> @}}
119 %@ @item @tab | @code{cdata(@var{char-data})} @tab @r{@{ <![CDATA[@var{char-data}]]> @}}
120 %@ @item @tab | @code{doctype(@var{tag},@var{doctype-id})} @tab @r{@{ DTD <!DOCTYPE .. > @}}
121 %@ @item @tab | @code{unparsed(@var{char-data})} @tab @r{@{ text that hasn't been parsed @}}
122 %@ @item @tab | @code{out_of_context(@var{tag})} @tab @r{@{ @var{tag} is not closed @}}
123 %@ @item
124 %@ @item @var{tag} @tab ::= @dfn{atom} @tab @r{@{ naming an element @}}
125 %@ @item
126 %@ @item @var{name} @tab ::= @dfn{atom} @tab @r{@{ not naming an element @}}
127 %@ @item
128 %@ @item @var{URI} @tab ::= @dfn{atom} @tab @r{@{ giving the URI of a namespace @}}
129 %@ @item
130 %@ @item @var{char-data} @tab ::= @dfn{code-list}
131 %@ @item
132 %@ @item @var{doctype-id} @tab ::= @code{public(@var{char-data},@var{char-data})}
133 %@ @item @tab | @code{public(@var{char-data},@var{dtd-literals})}
134 %@ @item @tab | @code{system(@var{char-data})}
135 %@ @item @tab | @code{system(@var{char-data},@var{dtd-literals})}
136 %@ @item @tab | @code{local}
137 %@ @item @tab | @code{local,@var{dtd-literals}}
138 %@ @item
139 %@ @item @var{dtd-literals} @tab ::= @code{[]}
140 %@ @item @tab | @code{[dtd_literal(@var{char-data})|@var{dtd-literals}]}
141 %@ @end multitable
142 %@
143 %@ The following predicates are exported by the package:
144 %@
145 %@ @table @code
146 %@ @item xml_parse(@var{?Chars}, @var{?Document})
147 %@ @itemx xml_parse(@var{?Chars}, @var{?Document}, @var{+Options})
148 %@ @PLXindex {xml_parse/[2,3] (xml)}
149 %@ Either parses @var{Chars}, a @dfn{code-list}, to @var{Document}, a
150 %@ @var{document}. @var{Chars} is not required to represent strictly
151 %@ well-formed XML.
152 %@ Or generates @var{Chars}, a @dfn{code-list},
153 %@ from @var{Document}, a @var{document}.
154 %@ If @var{Document} is not a valid @var{document} term representing
155 %@ well-formed XML, an exception is raised.
156 %@ In the second usage of the predicate, the only option available is @code{format/1}.
157 %@
158 %@ @var{Options} is a list of zero or more of the following, where
159 %@ @var{Boolean} must be @code{true} or @code{false}:
160 %@
161 %@ @table @code
162 %@ @item format(@var{Boolean})
163 %@ @findex format/1 (xml_parse/3 option)
164 %@ Indent the element content (default @code{true}).
165 %@
166 %@ @item extended_characters(@var{Boolean})
167 %@ @findex extended_characters/1 (xml_parse/3 option)
168 %@ Use the extended character entities for XHTML (default @code{true}).
169 %@
170 %@ @item remove_attribute_prefixes(@var{Boolean})
171 %@ @findex remove_attribute_prefixes/1 (xml_parse/3 option)
172 %@ Remove namespace prefixes from attributes when it's the same as the
173 %@ prefix of the parent element (default @code{false}).
174 %@ @end table
175 %@
176 %@ @item xml_subterm(@var{+Term}, @var{?Subterm})
177 %@ @PLXindex {xml_subterm/2 (xml)}
178 %@ Unifies @var{Subterm} with a sub-term of @var{Term}, a
179 %@ @var{document}. This can be especially useful when trying to test or
180 %@ retrieve a deeply-nested subterm from a document.
181 %@
182 %@ @item xml_pp(@var{+Document})
183 %@ @PLXindex {xml_pp/1 (xml)}
184 %@ ``Pretty prints'' @var{Document}, a @var{document}, on the current
185 %@ output stream.
186 %@ @end table
187
188
189 /* xml_parse(+?Chars, ?+Document[, +Controls]) parses Chars to/from a data
190 * structure of the form xml(<atts>, <content>). <atts> is a list of
191 * <atom>=<string> attributes from the (possibly implicit) XML signature of the
192 * document. <content> is a (possibly empty) list comprising occurrences of :
193 *
194 * pcdata(<string>) : Text
195 * comment(<string>) : An xml comment;
196 * element(<tag>,<atts>,<content>) : <tag>..</tag> encloses <content>
197 * : <tag /> if empty
198 * instructions(<atom>, <string>) : Processing <? <atom> <params> ?>"
199 * cdata( <string> ) : <![CDATA[ <string> ]]>
200 * doctype(<atom>, <doctype id>) : DTD <!DOCTYPE .. >
201 *
202 * The conversions are not completely symmetrical, in that weaker XML is
203 * accepted than can be generated. Specifically, in-bound (Chars -> Document)
204 * does not require strictly well-formed XML. Document is instantiated to the
205 * term malformed(Attributes, Content) if Chars does not represent well-formed
206 * XML. The Content of a malformed/2 structure can contain:
207 *
208 * unparsed( <string> ) : Text which has not been parsed
209 * out_of_context( <tag> ) : <tag> is not closed
210 *
211 * in addition to the standard term types.
212 *
213 * Out-bound (Document -> Chars) parsing _does_ require that Document defines
214 * strictly well-formed XML. If an error is detected a 'domain' exception is
215 * raised.
216 *
217 * The domain exception will attempt to identify the particular sub-term in
218 * error and the message will show a list of its ancestor elements in the form
219 * <tag>{(id)}* where <id> is the value of any attribute _named_ id.
220 *
221 * At this release, the Controls applying to in-bound (Chars -> Document)
222 * parsing are:
223 *
224 * extended_characters(<bool>) : Use the extended character
225 * : entities for XHTML (default true)
226 *
227 * format(<bool>) : Strip layouts when no character data
228 * : appears between elements.
229 * : (default true)
230 *
231 * remove_attribute_prefixes( <bool>) : Remove namespace prefixes from
232 * : attributes when it's the same as the
233 * : prefix of the parent element
234 * : (default false).
235 *
236 * [<bool> is one of 'true' or 'false']
237 *
238 * For out-bound (Document -> Chars) parsing, the only available option is:
239 *
240 * format(<Bool>) : Indent the element content
241 * : (default true)
242 *
243 * Different DCGs for input and output are used because input parsing is
244 * more flexible than output parsing. Errors in input are recorded as part
245 * of the data structure. Output parsing throws an exception if the document
246 * is not well-formed, diagnosis tries to identify the specific culprit term.
247 */
248 xml_parse( Chars, Document ) :-
249 xml_parse( Chars, Document, [] ).
250
251 xml_parse( Chars, Document, Controls ) :-
252 Goal = xml_parse(Chars,Document,Controls),
253 % format, extended_characters, remove_attribute_prefixes
254 must_be(Controls, proper_list, Goal, 3),
255 xml_options(Controls, opt(true,true,false), Options, Goal, 3),
256 ( ground( Chars ) ->
257 xml_to_document( Options, Chars, Document )
258 ; ground(Document) ->
259 document_to_xml( Options, Document, Chars )
260 ; illarg(var, Goal, 0)
261 ), !. /*MC*/
262
263 xml_options([], Opt, Opt, _, _) :- !.
264 xml_options([X|L], Opt0, Opt, Goal, ArgNo) :- !,
265 ( callable(X),
266 xml_option(X, Opt0, Opt1) -> true
267 ; illarg(domain(term,xml_parse_option), Goal, ArgNo, X)
268 ),
269 xml_options(L, Opt1, Opt, Goal, ArgNo).
270
271 xml_option(format(X), opt(_,B,C), opt(X,B,C)) :-
272 bool_option(X).
273 xml_option(extended_characters(X), opt(A,_,C), opt(A,X,C)) :-
274 bool_option(X).
275 xml_option(remove_attribute_prefixes(X), opt(A,B,_), opt(A,B,X)) :-
276 bool_option(X).
277
278 bool_option(X) :- var(X), !, fail.
279 bool_option(false).
280 bool_option(true).
281
282 document_to_xml( opt(Format,_,_), Document, Chars ) :-
283 ? ( document_generation(Format, Document, Chars0, [] ) ->
284 Chars = Chars0
285 ;
286 xml_fault( Document, [], Culprit, Path, Message ),
287 throw(xml_parse(Message,Document,Culprit,Path))
288 ).
289
290 :- multifile user:generate_message_hook/3.
291 user:generate_message_hook(xml_parse(Message,Document,Culprit,Path)) --> !,
292 ['XML Parse: ~a in ~q'-[Message,Document],nl,
293 'Culprit: ~q'-[Culprit],nl],
294 ( {Path==[]} -> []
295 ; ['Path: ~s'-[Path],nl]
296 ).
297
298
299 /* xml_subterm( +XMLTerm, ?Subterm ) unifies Subterm with a sub-term of Term.
300 * Note that XMLTerm is a sub-term of itself.
301 */
302 xml_subterm( Term, Term ).
303 xml_subterm( xml(_Attributes, Content), Term ) :-
304 xml_subterm( Content, Term ).
305 xml_subterm( [H|T], Term ) :-
306 ( xml_subterm( H, Term )
307 ; xml_subterm( T, Term )
308 ).
309 xml_subterm( element(_Name,_Attributes,Content), Term ) :-
310 xml_subterm( Content, Term ).
311 xml_subterm( namespace(_URI,_Prefix,Content), Term ) :-
312 xml_subterm( Content, Term ).
313
314 /* xml is intended to be a rather modular module: it should be easy to
315 * build a program that can output XML, but not read it, or vice versa.
316 * Similarly, you may be happy to dispense with diagnosis once you are
317 * sure that your code will only try to make valid calls to xml_parse/2.
318 *
319 * It is intended that the code should be very portable too. Clearly,
320 * some small changes will be needed between platforms, but these should
321 * be limited to xml_utilities. xml_utilities contains most of the shared
322 * code and most of the potentially non-portable code.
323 */
324 /* xml_acquisition.pl : XML -> Document translation.
325 *
326 * $Revision$
327 *
328 */
329
330 /* xml_to_document( +Options, +XML, ?Document ) translates the list of
331 * character codes XML into the Prolog term Document. Options is
332 * the parsed options list.
333 */
334 xml_to_document( Options, XML, Document ) :-
335 initial_context( Options, Context ),
336 debug_format(19,'Parsing XML:~n~s~n',[XML]), %PATCH: LEUSCHEL
337 ( xml_declaration( Attributes0, XML, XML1 ) ->
338 Attributes = Attributes0
339 ;
340 format('No <xml> header declaration~n',[]), %PATCH LEUSCHEL
341 XML1 = XML,
342 Attributes = []
343 ),
344 xml_to_document( XML1, Context, Terms, [], WellFormed ),
345 xml_to_document1( WellFormed, Attributes, Terms, Document ).
346
347 xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ).
348 xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
349
350 % unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
351 unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
352
353 xml_declaration( Attributes ) -->
354 spaces,
355 "<?",
356 nmtoken( xml ),
357 xml_declaration_attributes( Attributes ),
358 spaces,
359 "?>".
360
361 xml_to_document( [], Context, Terms, [], WF ) :-
362 close_context( Context, Terms, WF ).
363 xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :-
364 ( Char =:= "<" ->
365 xml_markup_structure( Chars, Context, Terms, Residue, WF )
366 ; Char =:= "&" ->
367 entity_reference( Chars, Context, Terms, Residue, WF )
368 ; Char =< " ",
369 \+ space_preserve( Context ) ->
370 layouts( Chars, Context, [Char|T], T, Terms, Residue, WF )
371 ; void_context( Context ) ->
372 unparsed( [Char|Chars], Context, Terms, Residue, WF )
373 ;
374 Terms = [pcdata([Char|Chars1])|Terms1],
375 acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
376 ).
377
378 layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
379 close_context( Context, Terms, WF ).
380 layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
381 ( Char =:= "<" ->
382 xml_markup_structure( Chars, Context, Terms, Residue, WF )
383 ; Char =:= "&" ->
384 entity_reference( Chars, Context, Terms, Residue, WF )
385 ; Char =< " " ->
386 Minus = [Char|Minus1],
387 layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
388 ; void_context( Context ) ->
389 unparsed( [Char|Chars], Context, Terms, Residue, WF )
390 ;
391 Terms = [pcdata(Plus)|Terms1],
392 Minus = [Char|Chars1],
393 context_update( space_preserve, Context, true, Context1 ),
394 acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
395 ).
396
397 acquire_pcdata( [], Context, [], Terms, [], WF ) :-
398 close_context( Context, Terms, WF ).
399 acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :-
400 ( Char =:= "<" ->
401 Chars1 = [],
402 xml_markup_structure( Chars, Context, Terms, Residue, WF )
403 ; Char =:= "&" ->
404 reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
405 ;
406 Chars1 = [Char|Chars2],
407 acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
408 ).
409
410 xml_markup_structure( [], Context, Terms, Residue, WF ) :-
411 unparsed( "<", Context, Terms, Residue, WF ).
412 xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
413 Chars = [Char|Chars1],
414 ( Char =:= "/" ->
415 closing_tag( Context, Chars1, Terms, Residue, WF )
416 ; Char =:= "?" ->
417 pi_acquisition( Chars1, Context, Terms, Residue, WF )
418 ; Char =:= "!" ->
419 declaration_acquisition( Chars1, Context, Terms, Residue, WF )
420 ; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
421 push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
422 ;
423 unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %'
424 ).
425
426 push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
427 new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
428 push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
429
430 push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :-
431 xml_to_document( Chars, Context, Terms, Residue, WF ).
432 push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
433
434 new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
435 namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
436 ( append( NSChars, [0':|TagChars1], TagChars ), %'
437 specific_namespace( NSChars, Context1, SpecificNamespace ) ->
438 Namespace0 = SpecificNamespace
439 ;
440 NSChars = "",
441 TagChars1 = TagChars,
442 default_namespace( Context1, Namespace0 )
443 ),
444 current_namespace( Context1, CurrentNamespace ),
445 ( Namespace0 == CurrentNamespace ->
446 Term = element(Tag, Attributes, Contents),
447 Context2 = Context1
448 ;
449 Term = namespace( Namespace0, NSChars,
450 element(Tag, Attributes, Contents)
451 ),
452 context_update( current_namespace, Context1, Namespace0, Context2 )
453 ),
454 input_attributes( Attributes1, Context2, Attributes ),
455 atom_codes( Tag, TagChars1 ),
456 close_tag( Type, Chars, Context2, Contents, Residue, WF ).
457
458 close_tag( empty, Residue, _Context, [], Residue, true ).
459 close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
460 context_update( element, Context0, Tag, Context1 ),
461 xml_to_document( Chars, Context1, Contents, Residue, WF ).
462
463 pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
464 ( inline_instruction(Target, Processing, Chars, Rest ),
465 Target \== xml ->
466 Terms = [instructions(Target, Processing)|Terms1],
467 xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
468 ;
469 unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed )
470 ).
471
472 declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
473 ( declaration_type( Chars, Type, Chars1 ),
474 declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
475 Terms = [Term|Terms1],
476 xml_to_document( Rest, Context1, Terms1, Residue, WF )
477 ;
478 unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF )
479 ).
480
481 open_tag( Tag, Namespaces, Attributes, Termination ) -->
482 nmtoken_chars( Tag ),
483 attributes( Attributes, [], Namespaces ),
484 spaces,
485 ? (open_tag_terminator( Tag, Termination ) -> []
486 ; {atom_codes(ATag,Tag), add_xml_warning('XML tag not properly terminated with >: ',ATag),fail}
487 ).
488
489 open_tag_terminator( Tag, push(Tag) ) -->
490 ">".
491 open_tag_terminator( _Tag, empty ) -->
492 "/>".
493
494 declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
495 comment(Comment).
496 declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
497 cdata( CData ).
498 declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
499 doctype( Name, Names, Namespaces0, Namespaces ),
500 spaces,
501 ">".
502
503 inline_instruction( Target, Processing, Plus, Minus ) :-
504 nmtoken(Target, Plus, Mid0 ),
505 spaces( Mid0, Mid1 ),
506 append( Processing, [0'?,0'>|Minus], Mid1 ),
507 !.
508
509 entity_reference_name( Reference ) -->
510 nmtoken_chars( Reference ),
511 ";".
512
513 declaration_type( [Char1,Char2|Chars1], Class, Rest ) :-
514 Chars = [Char1,Char2|Chars1],
515 ( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
516 Class = Class0,
517 Rest = Residue
518 ;
519 Class = generic,
520 Rest = Chars
521 ).
522
523 declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
524 declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
525 append( "DATA[", Residue, Chars ).
526 declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
527 append( "CTYPE", Residue, Chars ).
528
529 closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
530 ( closing_tag_name( Tag, Chars, Rest ),
531 current_tag( Context, Tag ) ->
532 Terms = [],
533 Residue = Rest,
534 WellFormed = true
535 ;
536 unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed )
537 ).
538
539 closing_tag_name( Tag ) -->
540 nmtoken_chars( Tag ),
541 spaces,
542 ">".
543
544 entity_reference( Chars, Context, Terms, Residue, WF ) :-
545 ( standard_character_entity( Char, Chars, Rest ) ->
546 Terms = [pcdata([Char|Chars1])|Terms1],
547 acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
548 ; entity_reference_name( Reference, Chars, Rest ),
549 defined_entity( Reference, Context, String ) ->
550 append( String, Rest, Full ),
551 xml_to_document( Full, Context, Terms, Residue, WF )
552 ;
553 unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %'
554 ).
555
556 reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
557 ( standard_character_entity(Char, Chars0, Rest ) ->
558 Chars1 = [Char|Chars2],
559 acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
560 ; entity_reference_name(Reference, Chars0, Rest ),
561 defined_entity( Reference, Context, String ) ->
562 append( String, Rest, Full ),
563 acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
564 ;
565 Chars1 = [],
566 unparsed( [0'&|Chars0], Context, Terms, Residue, WF ) %'
567 ).
568
569 namespace_attributes( [], Context, Context, [] ).
570 namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
571 Attributes0 = [_|_],
572 append( "xmlns:", Unqualified, QualifiedNameChars ),
573 ? ( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
574 atom_codes( URI, Value ),
575 context_update( default_namespace, Context0, URI, Context1 ),
576 namespace_attributes( Attributes1, Context1, Context, Attributes )
577 ? ; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
578 Attributes = [QualifiedNameChars=Value|Attributes2],
579 atom_codes( URI, Value ),
580 context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
581 namespace_attributes( Attributes1, Context1, Context, Attributes2 )
582 ; member( "xml:space"="preserve", Attributes0 ) ->
583 Attributes = Attributes0,
584 context_update( space_preserve, Context0, true, Context )
585 ;
586 Context = Context0,
587 Attributes = Attributes0
588 ).
589
590 input_attributes( [], _Context, [] ).
591 input_attributes( [NameChars=Value|Attributes0], Context,
592 [Name=Value|Attributes] ) :-
593 ( remove_attribute_prefixes( Context ),
594 append( NSChars, [0':|NameChars1], NameChars ), %'
595 NSChars \== "xmlns",
596 specific_namespace( NSChars, Context, Namespace ),
597 current_namespace( Context, Namespace ) ->
598 atom_codes( Name, NameChars1 )
599 ;
600 atom_codes( Name, NameChars )
601 ),
602 input_attributes( Attributes0, Context, Attributes ).
603
604 attributes( Attributes, Seen, Namespaces ) -->
605 attributes_count_lf(0, Attributes, Seen, Namespaces ). % PATCH LEUSCHEL: add count parameter
606
607 attributes_count_lf(Count0, [Name=Value|Attributes], Seen, Namespaces ) -->
608 spaces_count_linebreaks(Count0,C1),
609 nmtoken_chars( Name ),
610 {\+ member(Name, Seen)},
611 spaces_count_linebreaks(C1,C2),
612 expect_tokens("=","attributes"),
613 spaces_count_linebreaks(C2,C3),
614 attribute_value( CountValue, Value, Namespaces, Name ), % Value does not contain the line breaks anymore, so we have to count them in attribute_layouts.
615 !,
616 {C4 is C3+CountValue},
617 attributes_count_lf( C4, Attributes, [Name|Seen], Namespaces ).
618 attributes_count_lf( 0, [], _Seen, _Namespaces ) --> !, "". % ,{print(no_linefeeds),nl}.
619 attributes_count_lf( Count, ["$attribute_linefeeds"=Count], _Seen, _Namespaces ) --> "". %, {print(linefeeds(Count)),nl}.
620
621 spaces_count_linebreaks(C,C, [], [] ).
622 spaces_count_linebreaks(Count,ResCount, [Char|Chars0], Chars1 ) :-
623 ( Char=10 ->
624 Count1 is Count+1, spaces_count_linebreaks(Count1, ResCount, Chars0, Chars1 )
625 ; Char =< 32 ->
626 spaces_count_linebreaks(Count,ResCount, Chars0, Chars1 )
627 ;
628 Count=ResCount, Chars1 = [Char|Chars0]
629 ).
630 % END PATCH
631
632 xml_declaration_attributes( Res ) -->
633 spaces1,!,
634 xml_declaration_attributes( Res ).
635 xml_declaration_attributes( Res ) -->
636 nmtoken( Name ), !,
637 spaces,
638 expect_tokens("=","declaration attribute"),
639 spaces,
640 xml_string( Value ),
641 ? {xml_declaration_attribute_valid(Name, Value)
642 -> Res = [Name-Value|Attributes] ; Res = [Attributes]}, % PATCH LEUSCHEL: also continue with invalid declarations
643 xml_declaration_attributes( Attributes ),
644 spaces.
645 xml_declaration_attributes( [] ) --> "".
646
647 doctype( Name, External, Namespaces0, Namespaces1 ) -->
648 spaces,
649 nmtoken( Name ),
650 spaces,
651 doctype_id( External0 ),
652 spaces,
653 doctype1( Namespaces0, Literals, Namespaces1 ),
654 {doctype_extension(Literals, External0, External)}.
655
656 doctype_extension( [], External, External ).
657 doctype_extension( [Literal|Literals], External0, External ) :-
658 extended_doctype( External0, [Literal|Literals], External ).
659
660 extended_doctype( system(URL), Literals, system(URL,Literals) ).
661 extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
662 extended_doctype( local, Literals, local(Literals) ).
663
664 doctype1( Namespaces0, Literals, Namespaces1 ) -->
665 "[",
666 !,
667 dtd( Namespaces0, Literals, Namespaces1 ),
668 expect_tokens("]","doctype").
669 doctype1( Namespaces, [], Namespaces ) --> "".
670
671 doctype_id( system(URL) ) -->
672 "SYSTEM",
673 spaces,
674 uri( URL ).
675 doctype_id( public(URN,URL) ) -->
676 "PUBLIC",
677 spaces,
678 uri( URN ),
679 spaces,
680 uri( URL ).
681 doctype_id( local ) --> "".
682
683 dtd( Namespaces0, Literals, Namespaces1 ) -->
684 spaces,
685 "<!ENTITY",
686 !,
687 spaces,
688 nmtoken_chars( Name ),
689 spaces,
690 quote( Quote ),
691 entity_value( Quote, Namespaces0, String ),
692 spaces,
693 expect_tokens(">","ENTITY"),
694 {\+ character_entity( Name, _StandardChar ),
695 % Don't allow < "e; etc. to be updated
696 context_update( entity(Name), Namespaces0, String, Namespaces2 )
697 },
698 dtd( Namespaces2, Literals, Namespaces1 ).
699 dtd( Namespaces0, Literals, Namespaces1 ) -->
700 spaces,
701 "<!--",
702 !,
703 dtd_comment,
704 expect_tokens(">","comment"),
705 dtd( Namespaces0, Literals, Namespaces1 ).
706 dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) -->
707 spaces,
708 "<!",
709 !,
710 dtd_literal( Literal ),
711 dtd( Namespaces0, Literals, Namespaces1 ).
712 dtd( Namespaces, [], Namespaces ) --> spaces.
713
714 dtd_literal( [] ) --> ">", !.
715 dtd_literal( Chars ) -->
716 "--",
717 !,
718 dtd_comment,
719 dtd_literal( Chars ).
720 dtd_literal( [Char|Chars] ) -->
721 [Char],
722 dtd_literal( Chars ).
723
724 dtd_comment( Plus, Minus ) :-
725 append( _Chars, [0'-,0'-|Minus], Plus ),
726 !.
727
728 entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :-
729 ( Char == Quote ->
730 String = [],
731 Minus = Plus
732 ; Char =:= "&" ->
733 reference_in_entity( Namespaces, Quote, String, Plus, Minus )
734 ;
735 String = [Char|String1],
736 entity_value( Quote, Namespaces, String1, Plus, Minus )
737 ).
738
739 attribute_value( NL, String, Namespaces, Name ) --> % PATCH : add counter NL for line breaks in attribute values
740 ? (quote( Quote ) -> []
741 ; {atom_codes(AN,Name), add_xml_warning('Expecting starting quotes for value of XML attribute: ',AN),fail}),
742 attribute_leading_layouts( NL, Quote, Namespaces, String ).
743
744 attribute_leading_layouts( 0, _Quote, _Namespace, [], [], [] ).
745 attribute_leading_layouts( NL, Quote, Namespaces, String, [Char|Plus], Minus ) :-
746 ( Char == Quote ->
747 String = [],
748 Minus = Plus,
749 NL = 0
750 ; Char =:= "&" ->
751 reference_in_layout( NL, Namespaces, Quote, String, Plus, Minus ),
752 NL = 0
753 ; Char =:= 10 -> % count linebreaks for correct XML line numbers, but don't add to attribute value (according to https://www.w3.org/TR/1998/REC-xml-19980210#AVNormalize)
754 attribute_layouts( OldL, Quote, Namespaces, false, String, Plus, Minus ),
755 NL is OldL+1
756 ; Char > 32, Char \== 160 ->
757 String = [Char|String1],
758 attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
759 ;
760 attribute_leading_layouts( NL, Quote, Namespaces, String, Plus, Minus )
761 ).
762
763 attribute_layouts( 0, _Quote, _Namespaces, _Layout, [], [], [] ).
764 attribute_layouts( NL, Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :-
765 ( Char == Quote ->
766 String = [],
767 Minus = Plus,
768 NL = 0
769 ; Char =:= "&" ->
770 reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ),
771 NL = 0
772 ; Char =:= 10 -> % count linebreaks, same as above
773 attribute_layouts( OldL, Quote, Namespaces, false, String, Plus, Minus ),
774 NL is OldL+1
775 ; Char > 32, Char \== 160 ->
776 ( Layout == true ->
777 String = [0' ,Char|String1] %'
778 ;
779 String = [Char|String1]
780 ),
781 attribute_layouts( NL, Quote, Namespaces, false, String1, Plus, Minus )
782 ;
783 attribute_layouts( NL, Quote, Namespaces, true, String, Plus, Minus )
784 ). % PATCH end
785
786 reference_in_layout( NS, Quote, String, Plus, Minus ) :-
787 ( standard_character_entity( Char, Plus, Mid ) ->
788 String = [Char|String1],
789 attribute_layouts( Quote, NS, false, String1, Mid, Minus )
790 ; entity_reference_name( Name, Plus, Suffix ),
791 defined_entity( Name, NS, Text ) ->
792 append( Text, Suffix, Mid ),
793 attribute_leading_layouts( Quote, NS, String, Mid, Minus )
794 ; % Just & is okay in a value
795 String = [0'&|String1], %'
796 attribute_layouts( Quote, NS, false, String1, Plus, Minus )
797 ).
798
799 reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :-
800 ( standard_character_entity( Char, Plus, Mid ) ->
801 ( Layout == true ->
802 String = [0' ,Char|String1] %'
803 ;
804 String = [Char|String1]
805 ),
806 Layout1 = false
807 ; entity_reference_name( Name, Plus, Suffix ),
808 defined_entity( Name, Namespaces, Text ) ->
809 String = String1,
810 append( Text, Suffix, Mid ),
811 Layout1 = Layout
812 ; % Just & is okay in a value
813 Mid = Plus,
814 String = [0'&|String1], %'
815 Layout1 = false
816 ),
817 attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ).
818
819 /* References are resolved backwards in Entity defintions so that
820 * circularity is avoided.
821 */
822 reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
823 ( standard_character_entity( _SomeChar, Plus, _Rest ) ->
824 String = [0'&|String1], % ' Character entities are unparsed
825 Mid = Plus
826 ; entity_reference_name( Name, Plus, Suffix ),
827 defined_entity( Name, Namespaces, Text ) ->
828 String = String1,
829 append( Text, Suffix, Mid )
830 ),
831 entity_value( Quote, Namespaces, String1, Mid, Minus ).
832
833 standard_character_entity( Char ) -->
834 "#x", !, hex_character_reference( Char ), ";".
835 standard_character_entity( Char ) -->
836 "#", !, digit( Digit ), digits( Digits ), ";",
837 {number_codes( Char, [Digit|Digits])}.
838 standard_character_entity( C ) -->
839 chars( String ),
840 ";",
841 !,
842 {character_entity(String, C)}. % quot, amp, lt, gt, apos
843
844 uri( URI ) -->
845 quote( Quote ),
846 uri1( Quote, URI ).
847
848 uri1( Quote, [] ) -->
849 quote( Quote ),
850 !.
851 uri1( Quote, [Char|Chars] ) -->
852 [Char],
853 uri1( Quote, Chars ).
854
855 comment( Chars, Plus, Minus ) :-
856 append( Chars, [0'-,0'-,0'>|Minus], Plus ), %'
857 !.
858
859 cdata( Chars, Plus, Minus ) :-
860 append( Chars, [0'],0'],0'>|Minus], Plus ), %'
861 !.
862 % Syntax Components
863
864 hex_character_reference( Code ) -->
865 hex_character_reference1( 0, Code ).
866
867 hex_character_reference1( Current, Code ) -->
868 hex_digit_char( Value ),
869 !,
870 {New is (Current << 4) + Value},
871 hex_character_reference1( New, Code ).
872 hex_character_reference1( Code, Code ) --> "".
873
874 hex_digit_char( 0 ) --> "0".
875 hex_digit_char( 1 ) --> "1".
876 hex_digit_char( 2 ) --> "2".
877 hex_digit_char( 3 ) --> "3".
878 hex_digit_char( 4 ) --> "4".
879 hex_digit_char( 5 ) --> "5".
880 hex_digit_char( 6 ) --> "6".
881 hex_digit_char( 7 ) --> "7".
882 hex_digit_char( 8 ) --> "8".
883 hex_digit_char( 9 ) --> "9".
884 hex_digit_char( 10 ) --> "A".
885 hex_digit_char( 11 ) --> "B".
886 hex_digit_char( 12 ) --> "C".
887 hex_digit_char( 13 ) --> "D".
888 hex_digit_char( 14 ) --> "E".
889 hex_digit_char( 15 ) --> "F".
890 hex_digit_char( 10 ) --> "a".
891 hex_digit_char( 11 ) --> "b".
892 hex_digit_char( 12 ) --> "c".
893 hex_digit_char( 13 ) --> "d".
894 hex_digit_char( 14 ) --> "e".
895 hex_digit_char( 15 ) --> "f".
896
897 quote( 0'" ) --> %'
898 """".
899 quote( 0'\' ) -->
900 "'".
901
902 spaces( [], [] ).
903 spaces( [Char|Chars0], Chars1 ) :-
904 ( Char =< 32 ->
905 spaces( Chars0, Chars1 )
906 ;
907 Chars1 = [Char|Chars0]
908 ).
909
910 spaces1( [Char|Chars0], Chars1 ) :-
911 ( Char =< 32 ->
912 spaces( Chars0, Chars1 )
913 ).
914
915 nmtoken( Name ) -->
916 nmtoken_chars( Chars ),
917 {atom_codes(Name, Chars)}.
918
919 nmtoken_chars( [Char|Chars] ) -->
920 [Char],
921 {nmtoken_first( Char )},
922 nmtoken_chars_tail( Chars ).
923
924 nmtoken_chars_tail( [Char|Chars] ) -->
925 [Char],
926 {nmtoken_char(Char)},
927 !,
928 nmtoken_chars_tail( Chars ).
929 nmtoken_chars_tail([]) --> "".
930
931 nmtoken_first( 0': ).
932 nmtoken_first( 0'_ ).
933 nmtoken_first( Char ) :-
934 alphabet( Char ).
935
936 nmtoken_char( 0'a ).
937 nmtoken_char( 0'b ).
938 nmtoken_char( 0'c ).
939 nmtoken_char( 0'd ).
940 nmtoken_char( 0'e ).
941 nmtoken_char( 0'f ).
942 nmtoken_char( 0'g ).
943 nmtoken_char( 0'h ).
944 nmtoken_char( 0'i ).
945 nmtoken_char( 0'j ).
946 nmtoken_char( 0'k ).
947 nmtoken_char( 0'l ).
948 nmtoken_char( 0'm ).
949 nmtoken_char( 0'n ).
950 nmtoken_char( 0'o ).
951 nmtoken_char( 0'p ).
952 nmtoken_char( 0'q ).
953 nmtoken_char( 0'r ).
954 nmtoken_char( 0's ).
955 nmtoken_char( 0't ).
956 nmtoken_char( 0'u ).
957 nmtoken_char( 0'v ).
958 nmtoken_char( 0'w ).
959 nmtoken_char( 0'x ).
960 nmtoken_char( 0'y ).
961 nmtoken_char( 0'z ).
962 nmtoken_char( 0'A ).
963 nmtoken_char( 0'B ).
964 nmtoken_char( 0'C ).
965 nmtoken_char( 0'D ).
966 nmtoken_char( 0'E ).
967 nmtoken_char( 0'F ).
968 nmtoken_char( 0'G ).
969 nmtoken_char( 0'H ).
970 nmtoken_char( 0'I ).
971 nmtoken_char( 0'J ).
972 nmtoken_char( 0'K ).
973 nmtoken_char( 0'L ).
974 nmtoken_char( 0'M ).
975 nmtoken_char( 0'N ).
976 nmtoken_char( 0'O ).
977 nmtoken_char( 0'P ).
978 nmtoken_char( 0'Q ).
979 nmtoken_char( 0'R ).
980 nmtoken_char( 0'S ).
981 nmtoken_char( 0'T ).
982 nmtoken_char( 0'U ).
983 nmtoken_char( 0'V ).
984 nmtoken_char( 0'W ).
985 nmtoken_char( 0'X ).
986 nmtoken_char( 0'Y ).
987 nmtoken_char( 0'Z ).
988 nmtoken_char( 0'0 ).
989 nmtoken_char( 0'1 ).
990 nmtoken_char( 0'2 ).
991 nmtoken_char( 0'3 ).
992 nmtoken_char( 0'4 ).
993 nmtoken_char( 0'5 ).
994 nmtoken_char( 0'6 ).
995 nmtoken_char( 0'7 ).
996 nmtoken_char( 0'8 ).
997 nmtoken_char( 0'9 ).
998 nmtoken_char( 0'. ).
999 nmtoken_char( 0'- ).
1000 nmtoken_char( 0'_ ).
1001 nmtoken_char( 0': ).
1002
1003 xml_string( String ) -->
1004 ? quote( Quote ),!,
1005 xml_string1( Quote, String ).
1006
1007 xml_string1( Quote, [] ) -->
1008 quote( Quote ),
1009 !.
1010 xml_string1( Quote, [Char|Chars] ) -->
1011 [Char],
1012 xml_string1( Quote, Chars ).
1013
1014 alphabet( 0'a ).
1015 alphabet( 0'b ).
1016 alphabet( 0'c ).
1017 alphabet( 0'd ).
1018 alphabet( 0'e ).
1019 alphabet( 0'f ).
1020 alphabet( 0'g ).
1021 alphabet( 0'h ).
1022 alphabet( 0'i ).
1023 alphabet( 0'j ).
1024 alphabet( 0'k ).
1025 alphabet( 0'l ).
1026 alphabet( 0'm ).
1027 alphabet( 0'n ).
1028 alphabet( 0'o ).
1029 alphabet( 0'p ).
1030 alphabet( 0'q ).
1031 alphabet( 0'r ).
1032 alphabet( 0's ).
1033 alphabet( 0't ).
1034 alphabet( 0'u ).
1035 alphabet( 0'v ).
1036 alphabet( 0'w ).
1037 alphabet( 0'x ).
1038 alphabet( 0'y ).
1039 alphabet( 0'z ).
1040 alphabet( 0'A ).
1041 alphabet( 0'B ).
1042 alphabet( 0'C ).
1043 alphabet( 0'D ).
1044 alphabet( 0'E ).
1045 alphabet( 0'F ).
1046 alphabet( 0'G ).
1047 alphabet( 0'H ).
1048 alphabet( 0'I ).
1049 alphabet( 0'J ).
1050 alphabet( 0'K ).
1051 alphabet( 0'L ).
1052 alphabet( 0'M ).
1053 alphabet( 0'N ).
1054 alphabet( 0'O ).
1055 alphabet( 0'P ).
1056 alphabet( 0'Q ).
1057 alphabet( 0'R ).
1058 alphabet( 0'S ).
1059 alphabet( 0'T ).
1060 alphabet( 0'U ).
1061 alphabet( 0'V ).
1062 alphabet( 0'W ).
1063 alphabet( 0'X ).
1064 alphabet( 0'Y ).
1065 alphabet( 0'Z ).
1066
1067 digit( C ) --> [C], {digit_table( C )}.
1068
1069 digit_table( 0'0 ).
1070 digit_table( 0'1 ).
1071 digit_table( 0'2 ).
1072 digit_table( 0'3 ).
1073 digit_table( 0'4 ).
1074 digit_table( 0'5 ).
1075 digit_table( 0'6 ).
1076 digit_table( 0'7 ).
1077 digit_table( 0'8 ).
1078 digit_table( 0'9 ).
1079
1080 digits( [Digit|Digits] ) -->
1081 digit( Digit ),
1082 digits( Digits ).
1083 digits( [] ) --> [].
1084
1085 character_entity( "quot", 0'" ). %'
1086 character_entity( "amp", 0'& ). %'
1087 character_entity( "lt", 0'< ). %'
1088 character_entity( "gt", 0'> ). %'
1089 character_entity( "apos", 0'\' ).
1090 /* xml_diagnosis.pl : XML exception diagnosis.
1091 *
1092 * $Revision$
1093 */
1094
1095 /* xml_fault( +Term, +Indentation, ?SubTerm, ?Path, ?Message ) identifies SubTerm
1096 * as a sub-term of Term which cannot be serialized after Indentation.
1097 * Message is an atom naming the type of error; Path is a string encoding a
1098 * list of SubTerm's ancestor elements in the form <tag>{(id)}* where <tag> is the
1099 * element tag and <id> is the value of any attribute _named_ id.
1100 */
1101 xml_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1102 var( Term ).
1103 xml_fault( xml(Attributes,_Content), _Indent, Term, [], Message ) :-
1104 member( Attribute, Attributes ),
1105 attribute_fault( Attribute, Term, Message ).
1106 xml_fault( xml(_Attributes,Content), Indent, Culprit, Path, Message ) :-
1107 xml_content_fault( Content, Indent, Culprit, Path, Message ).
1108 xml_fault( Term, _Indent, Term, [], 'Illegal Term' ).
1109
1110 xml_content_fault( Term, _Indent, Term, [], 'Illegal Variable' ) :-
1111 var( Term ).
1112 xml_content_fault( pcdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1113 \+ is_chars( Chars ).
1114 xml_content_fault( cdata(Chars), _Indent, Chars, [], 'Invalid Character Data' ) :-
1115 \+ is_chars( Chars ).
1116 xml_content_fault( [H|_T], Indent, Culprit, Path, Message ) :-
1117 xml_content_fault( H, Indent, Culprit, Path, Message ).
1118 xml_content_fault( [_H|T], Indent, Culprit, Path, Message ) :-
1119 xml_content_fault( T, Indent, Culprit, Path, Message ).
1120 xml_content_fault( namespace(_URI,_Prefix,Element), Indent, Culprit, Path, Message ) :-
1121 element_fault( Element, [0' |Indent], Culprit, Path, Message ).
1122 xml_content_fault( Element, Indent, Culprit, Path, Message ) :-
1123 element_fault( Element, [0' |Indent], Culprit, Path, Message ).
1124 xml_content_fault( Term, Indent, Term, [], 'Illegal Term' ) :-
1125 \+ generation(Term, "", false, Indent, _Format, _Plus, _Minus ).
1126
1127 element_fault( element(Tag, _Attributes, _Contents), _Indent, Tag, [], 'Tag must be an atom' ) :-
1128 \+ atom( Tag ).
1129 element_fault( element(Tag, Attributes, _Contents), _Indent, Tag, [], 'Attributes must be instantiated' ) :-
1130 var( Attributes ).
1131 element_fault( element(Tag, Attributes, _Contents), _Indent, Faulty, Path, Message ) :-
1132 fault_path( Tag, Attributes, Path, [] ),
1133 member( Attribute, Attributes ),
1134 attribute_fault( Attribute, Faulty, Message ).
1135 element_fault( element(Tag, Attributes, Contents), Indent, Culprit, Path, Message ) :-
1136 fault_path( Tag, Attributes, Path, Path1 ),
1137 xml_content_fault( Contents, Indent, Culprit, Path1, Message ).
1138
1139 attribute_fault( Attribute, Attribute, 'Illegal Variable' ) :-
1140 var( Attribute ).
1141 attribute_fault( Name=Value, Name=Value, 'Attribute Name must be atom' ) :-
1142 \+ atom(Name).
1143 attribute_fault( Name=Value, Name=Value, 'Attribute Value must be chars' ) :-
1144 \+ is_chars( Value ).
1145 attribute_fault( Attribute, Attribute, 'Malformed Attribute' ) :-
1146 Attribute \= (_Name=_Value).
1147
1148 is_chars( Chars ) :-
1149 is_list( Chars ),
1150 \+ (member( Char, Chars ), \+ (integer(Char), Char >=0, Char =< 255)).
1151
1152 fault_path( Tag, Attributes ) -->
1153 {atom_codes( Tag, Chars )},
1154 chars( Chars ),
1155 fault_id( Attributes ),
1156 " ".
1157
1158 fault_id( Attributes ) -->
1159 {member( id=Chars, Attributes ), is_chars( Chars )},
1160 !,
1161 "(", chars(Chars), ")".
1162 fault_id( _Attributes ) --> "".
1163 /* xml_generation.pl : Document -> XML translation
1164 *
1165 * $Revision$
1166 */
1167
1168 /* document_generation( +Format, +Document ) is a DCG generating Document
1169 * as a list of character codes. Format is true|false defining whether layouts,
1170 * to provide indentation, should be added between the element content of
1171 * the resultant "string". Note that formatting is disabled for elements that
1172 * are interspersed with pcdata/1 terms, such as XHTML's 'inline' elements.
1173 * Also, Format is over-ridden, for an individual element, by an explicit
1174 * 'xml:space'="preserve" attribute.
1175 */
1176 document_generation( Format, xml(Attributes, Document) ) -->
1177 ? document_generation_body( Attributes, Format, Document ).
1178
1179 document_generation_body( [], Format, Document ) -->
1180 generation( Document, "", Format, [], _Format1 ).
1181 document_generation_body( Attributes, Format, Document ) -->
1182 { Attributes = [_|_],
1183 ? xml_declaration_attributes_valid( Attributes )
1184 },
1185 "<?xml",
1186 generated_attributes( Attributes, Format, Format0 ),
1187 "?>",
1188 indent( true, [] ),
1189 generation( Document, "", Format0, [], _Format1 ).
1190
1191 generation( [], _Prefix, Format, _Indent, Format ) --> [].
1192 generation( [Term|Terms], Prefix, Format0, Indent, Format ) -->
1193 generation( Term, Prefix, Format0, Indent, Format1 ),
1194 generation( Terms, Prefix, Format1, Indent, Format ).
1195 generation( doctype(Name, External), _Prefix, Format, [], Format ) -->
1196 "<!DOCTYPE ",
1197 generated_name( Name ),
1198 generated_external_id( External ),
1199 ">".
1200 generation( instructions(Target,Process), _Prefix, Format, Indent, Format ) -->
1201 indent( Format, Indent ),
1202 "<?", generated_name(Target), " ", chars( Process ) ,"?>".
1203 generation( pcdata(Chars), _Prefix, _Format, _Indent, false ) -->
1204 pcdata_generation( Chars ).
1205 generation( comment( Comment ), _Prefix, Format, Indent, Format ) -->
1206 indent( Format, Indent ),
1207 "<!--", chars( Comment ), "-->".
1208 generation( namespace(URI, Prefix, element(Name, Atts, Content)),
1209 _Prefix0, Format, Indent, Format ) -->
1210 indent( Format, Indent ),
1211 "<", generated_prefixed_name( Prefix, Name ),
1212 generated_prefixed_attributes( Prefix, URI, Atts, Format, Format1 ),
1213 generated_content( Content, Format1, Indent, Prefix, Name ).
1214 generation( element(Name, Atts, Content), Prefix, Format, Indent, Format ) -->
1215 indent( Format, Indent ),
1216 "<", generated_prefixed_name( Prefix, Name ),
1217 generated_attributes( Atts, Format, Format1 ),
1218 generated_content( Content, Format1, Indent, Prefix, Name ).
1219 generation( cdata(CData), _Prefix, Format, Indent, Format ) -->
1220 indent( Format, Indent ),
1221 "<![CDATA[", cdata_generation(CData), "]]>".
1222
1223 generated_attributes( [], Format, Format ) --> [].
1224 generated_attributes( [Name=Value|Attributes], Format0, Format ) -->
1225 {( Name == 'xml:space',
1226 Value="preserve" ->
1227 Format1 = false
1228 ;
1229 Format1 = Format0
1230 )},
1231 " ",
1232 generated_name( Name ),
1233 "=""",
1234 quoted_string( Value ),
1235 """",
1236 generated_attributes( Attributes, Format1, Format ).
1237
1238 generated_prefixed_name( [], Name ) -->
1239 generated_name( Name ).
1240 generated_prefixed_name( Prefix, Name ) -->
1241 {Prefix = [_|_]},
1242 chars( Prefix ), ":",
1243 generated_name( Name ).
1244
1245 generated_content( [], _Format, _Indent, _Prefix, _Namespace ) -->
1246 " />". % Leave an extra space for XHTML output.
1247 generated_content( [H|T], Format, Indent, Prefix, Namespace ) -->
1248 ">",
1249 generation( H, Prefix, Format, [0' |Indent], Format1 ),
1250 generation( T, Prefix, Format1, [0' |Indent], Format2 ),
1251 indent( Format2, Indent ),
1252 "</", generated_prefixed_name( Prefix, Namespace ), ">".
1253
1254 generated_prefixed_attributes( [_|_Prefix], _URI, Atts, Format0, Format ) -->
1255 generated_attributes( Atts, Format0, Format ).
1256 generated_prefixed_attributes( [], URI, Atts, Format0, Format ) -->
1257 {atom_codes( URI, Namespace ),
1258 findall( Attr, (member(Attr, Atts), Attr \= (xmlns=_Val)), Atts1 )
1259 },
1260 generated_attributes( [xmlns=Namespace|Atts1], Format0, Format ).
1261
1262 generated_name( Name, Plus, Minus ) :-
1263 atom_codes( Name, Chars ),
1264 append( Chars, Minus, Plus ).
1265
1266 generated_external_id( local ) --> "".
1267 generated_external_id( local(Literals) ) --> " [",
1268 generated_doctype_literals( Literals ), "\n\t]".
1269 generated_external_id( system(URL) ) -->
1270 " SYSTEM """,
1271 chars( URL ),
1272 """".
1273 generated_external_id( system(URL,Literals) ) -->
1274 " SYSTEM """,
1275 chars( URL ),
1276 """ [",
1277 generated_doctype_literals( Literals ), "\n\t]".
1278 generated_external_id( public(URN,URL) ) -->
1279 " PUBLIC """,
1280 chars( URN ),
1281 """ """,
1282 chars( URL ),
1283 """".
1284 generated_external_id( public(URN,URL,Literals) ) -->
1285 " PUBLIC """,
1286 chars( URN ),
1287 """ """,
1288 chars( URL ),
1289 """ [",
1290 generated_doctype_literals( Literals ), "\n\t]".
1291
1292 generated_doctype_literals( [] ) --> "".
1293 generated_doctype_literals( [dtd_literal(String)|Literals] ) --> "\n\t",
1294 "<!", cdata_generation( String ), ">",
1295 generated_doctype_literals( Literals ).
1296
1297 /* quoted_string( +Chars ) is a DCG representing Chars, a list of character
1298 * codes, as a legal XML attribute string. Any leading or trailing layout
1299 * characters are removed. &, " and < characters are replaced by &, "
1300 * and < respectively.
1301 */
1302 quoted_string( Raw, Plus, Minus ) :-
1303 quoted_string1( Raw, NoLeadingLayouts ),
1304 quoted_string2( NoLeadingLayouts, Layout, Layout, Plus, Minus ).
1305
1306 quoted_string1( [], [] ).
1307 quoted_string1( [Char|Chars], NoLeadingLayouts ) :-
1308 ( Char > 32 ->
1309 NoLeadingLayouts = [Char|Chars]
1310 ;
1311 quoted_string1( Chars, NoLeadingLayouts )
1312 ).
1313
1314 quoted_string2( [], _LayoutPlus, _LayoutMinus, List, List ).
1315 quoted_string2( [Char|Chars], LayoutPlus, LayoutMinus, Plus, Minus ) :-
1316 ( Char =< " " ->
1317 Plus = Plus1,
1318 LayoutMinus = [Char|LayoutMinus1],
1319 LayoutPlus = LayoutPlus1
1320 ; Char =< 127 ->
1321 Plus = LayoutPlus,
1322 pcdata_7bit( Char, LayoutMinus, Plus1 ),
1323 LayoutPlus1 = LayoutMinus1
1324 ; legal_xml_unicode( Char ) ->
1325 Plus = LayoutPlus,
1326 number_codes( Char, Codes ),
1327 pcdata_8bits_plus( Codes, LayoutMinus, Plus1 ),
1328 LayoutPlus1 = LayoutMinus1
1329 ;
1330 LayoutPlus = LayoutPlus1,
1331 LayoutMinus = LayoutMinus1,
1332 Plus = Plus1
1333 ),
1334 quoted_string2( Chars, LayoutPlus1, LayoutMinus1, Plus1, Minus ).
1335
1336 indent( false, _Indent ) --> [].
1337 indent( true, Indent ) --> "\n",
1338 chars( Indent ).
1339
1340 /* pcdata_generation( +Chars ) is a DCG representing Chars, a list of character
1341 * codes as legal XML "Parsed character data" (PCDATA) string. Any codes
1342 * which cannot be represented by a 7-bit character are replaced by their
1343 * decimal numeric character entity e.g. code 160 (non-breaking space) is
1344 * represented as  . Any character codes disallowed by the XML
1345 * specification are not encoded.
1346 */
1347 pcdata_generation( [], Plus, Plus ).
1348 pcdata_generation( [Char|Chars], Plus, Minus ) :-
1349 ( Char =< 127 ->
1350 pcdata_7bit( Char, Plus, Mid )
1351 ; legal_xml_unicode( Char ) ->
1352 number_codes( Char, Codes ),
1353 pcdata_8bits_plus( Codes, Plus, Mid )
1354 ;
1355 Plus = Mid
1356 ),
1357 pcdata_generation( Chars, Mid, Minus ).
1358
1359 /* pcdata_7bit(+Char) represents the ascii character set in its
1360 * simplest format, using the character entities & " < and >
1361 * which are common to both XML and HTML. The numeric entity ' is used in
1362 * place of ', because browsers don't recognize it in HTML.
1363 */
1364 pcdata_7bit( 0 ) --> "".
1365 pcdata_7bit( 1 ) --> "".
1366 pcdata_7bit( 2 ) --> "".
1367 pcdata_7bit( 3 ) --> "".
1368 pcdata_7bit( 4 ) --> "".
1369 pcdata_7bit( 5 ) --> "".
1370 pcdata_7bit( 6 ) --> "".
1371 pcdata_7bit( 7 ) --> "".
1372 pcdata_7bit( 8 ) --> "".
1373 pcdata_7bit( 9 ) --> [9].
1374 pcdata_7bit( 10 ) --> [10].
1375 pcdata_7bit( 11 ) --> "".
1376 pcdata_7bit( 12 ) --> "".
1377 pcdata_7bit( 13 ) --> [13].
1378 pcdata_7bit( 14 ) --> "".
1379 pcdata_7bit( 15 ) --> "".
1380 pcdata_7bit( 16 ) --> "".
1381 pcdata_7bit( 17 ) --> "".
1382 pcdata_7bit( 18 ) --> "".
1383 pcdata_7bit( 19 ) --> "".
1384 pcdata_7bit( 20 ) --> "".
1385 pcdata_7bit( 21 ) --> "".
1386 pcdata_7bit( 22 ) --> "".
1387 pcdata_7bit( 23 ) --> "".
1388 pcdata_7bit( 24 ) --> "".
1389 pcdata_7bit( 25 ) --> "".
1390 pcdata_7bit( 26 ) --> "".
1391 pcdata_7bit( 27 ) --> "".
1392 pcdata_7bit( 28 ) --> "".
1393 pcdata_7bit( 29 ) --> "".
1394 pcdata_7bit( 30 ) --> "".
1395 pcdata_7bit( 31 ) --> "".
1396 pcdata_7bit( 32 ) --> " ".
1397 pcdata_7bit( 33 ) --> "!".
1398 pcdata_7bit( 34 ) --> """.
1399 pcdata_7bit( 35 ) --> "#".
1400 pcdata_7bit( 36 ) --> "$".
1401 pcdata_7bit( 37 ) --> "%".
1402 pcdata_7bit( 38 ) --> "&".
1403 pcdata_7bit( 39 ) --> "'".
1404 pcdata_7bit( 40 ) --> "(".
1405 pcdata_7bit( 41 ) --> ")".
1406 pcdata_7bit( 42 ) --> "*".
1407 pcdata_7bit( 43 ) --> "+".
1408 pcdata_7bit( 44 ) --> ",".
1409 pcdata_7bit( 45 ) --> "-".
1410 pcdata_7bit( 46 ) --> ".".
1411 pcdata_7bit( 47 ) --> "/".
1412 pcdata_7bit( 48 ) --> "0".
1413 pcdata_7bit( 49 ) --> "1".
1414 pcdata_7bit( 50 ) --> "2".
1415 pcdata_7bit( 51 ) --> "3".
1416 pcdata_7bit( 52 ) --> "4".
1417 pcdata_7bit( 53 ) --> "5".
1418 pcdata_7bit( 54 ) --> "6".
1419 pcdata_7bit( 55 ) --> "7".
1420 pcdata_7bit( 56 ) --> "8".
1421 pcdata_7bit( 57 ) --> "9".
1422 pcdata_7bit( 58 ) --> ":".
1423 pcdata_7bit( 59 ) --> ";".
1424 pcdata_7bit( 60 ) --> "<".
1425 pcdata_7bit( 61 ) --> "=".
1426 pcdata_7bit( 62 ) --> ">".
1427 pcdata_7bit( 63 ) --> "?".
1428 pcdata_7bit( 64 ) --> "@".
1429 pcdata_7bit( 65 ) --> "A".
1430 pcdata_7bit( 66 ) --> "B".
1431 pcdata_7bit( 67 ) --> "C".
1432 pcdata_7bit( 68 ) --> "D".
1433 pcdata_7bit( 69 ) --> "E".
1434 pcdata_7bit( 70 ) --> "F".
1435 pcdata_7bit( 71 ) --> "G".
1436 pcdata_7bit( 72 ) --> "H".
1437 pcdata_7bit( 73 ) --> "I".
1438 pcdata_7bit( 74 ) --> "J".
1439 pcdata_7bit( 75 ) --> "K".
1440 pcdata_7bit( 76 ) --> "L".
1441 pcdata_7bit( 77 ) --> "M".
1442 pcdata_7bit( 78 ) --> "N".
1443 pcdata_7bit( 79 ) --> "O".
1444 pcdata_7bit( 80 ) --> "P".
1445 pcdata_7bit( 81 ) --> "Q".
1446 pcdata_7bit( 82 ) --> "R".
1447 pcdata_7bit( 83 ) --> "S".
1448 pcdata_7bit( 84 ) --> "T".
1449 pcdata_7bit( 85 ) --> "U".
1450 pcdata_7bit( 86 ) --> "V".
1451 pcdata_7bit( 87 ) --> "W".
1452 pcdata_7bit( 88 ) --> "X".
1453 pcdata_7bit( 89 ) --> "Y".
1454 pcdata_7bit( 90 ) --> "Z".
1455 pcdata_7bit( 91 ) --> "[".
1456 pcdata_7bit( 92 ) --> [92].
1457 pcdata_7bit( 93 ) --> "]".
1458 pcdata_7bit( 94 ) --> "^".
1459 pcdata_7bit( 95 ) --> "_".
1460 pcdata_7bit( 96 ) --> "`".
1461 pcdata_7bit( 97 ) --> "a".
1462 pcdata_7bit( 98 ) --> "b".
1463 pcdata_7bit( 99 ) --> "c".
1464 pcdata_7bit( 100 ) --> "d".
1465 pcdata_7bit( 101 ) --> "e".
1466 pcdata_7bit( 102 ) --> "f".
1467 pcdata_7bit( 103 ) --> "g".
1468 pcdata_7bit( 104 ) --> "h".
1469 pcdata_7bit( 105 ) --> "i".
1470 pcdata_7bit( 106 ) --> "j".
1471 pcdata_7bit( 107 ) --> "k".
1472 pcdata_7bit( 108 ) --> "l".
1473 pcdata_7bit( 109 ) --> "m".
1474 pcdata_7bit( 110 ) --> "n".
1475 pcdata_7bit( 111 ) --> "o".
1476 pcdata_7bit( 112 ) --> "p".
1477 pcdata_7bit( 113 ) --> "q".
1478 pcdata_7bit( 114 ) --> "r".
1479 pcdata_7bit( 115 ) --> "s".
1480 pcdata_7bit( 116 ) --> "t".
1481 pcdata_7bit( 117 ) --> "u".
1482 pcdata_7bit( 118 ) --> "v".
1483 pcdata_7bit( 119 ) --> "w".
1484 pcdata_7bit( 120 ) --> "x".
1485 pcdata_7bit( 121 ) --> "y".
1486 pcdata_7bit( 122 ) --> "z".
1487 pcdata_7bit( 123 ) --> "{".
1488 pcdata_7bit( 124 ) --> "|".
1489 pcdata_7bit( 125 ) --> "}".
1490 pcdata_7bit( 126 ) --> "~".
1491 pcdata_7bit( 127 ) --> "".
1492
1493 pcdata_8bits_plus( Codes ) -->
1494 "&#", chars( Codes ), ";".
1495
1496 /* cdata_generation( +Chars ) is a DCG representing Chars, a list of character
1497 * codes as a legal XML CDATA string. Any character codes disallowed by the XML
1498 * specification are not encoded.
1499 */
1500 cdata_generation( [] ) --> "".
1501 cdata_generation( [Char|Chars] ) -->
1502 ( {legal_xml_unicode( Char )}, !, [Char]
1503 | ""
1504 ),
1505 cdata_generation( Chars ).
1506
1507 legal_xml_unicode( 9 ).
1508 legal_xml_unicode( 10 ).
1509 legal_xml_unicode( 13 ).
1510 legal_xml_unicode( Code ) :-
1511 Code >= 32,
1512 Code =< 55295.
1513 legal_xml_unicode( Code ) :-
1514 Code >= 57344,
1515 Code =< 65533.
1516 legal_xml_unicode( Code ) :-
1517 Code >= 65536,
1518 Code =< 1114111.
1519
1520 /* xml_pp: "pretty print" an XML Document on the current output stream.
1521 *
1522 * Current Release: 1.9
1523 */
1524 /* xml_pp( +XMLDocument ) "pretty prints" XMLDocument on the current
1525 * output stream.
1526 */
1527 xml_pp( xml(Attributes, Document) ) :-
1528 write( 'xml( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1529 xml_pp_list( Document, "\t" ),
1530 format( ' ).~n', [] ).
1531 xml_pp( malformed(Attributes, Document) ) :-
1532 write( 'malformed( ' ), pp_attributes( Attributes, "" ), put_code( 0', ), nl,
1533 xml_pp_list( Document, "\t" ),
1534 format( ' ).~n', [] ).
1535
1536 xml_pp_indented( [], Indent ) :-
1537 format( '~s[]', [Indent] ).
1538 xml_pp_indented( List, Indent ) :-
1539 List = [_|_],
1540 format( '~s', [Indent] ),
1541 xml_pp_list( List, Indent ).
1542 xml_pp_indented( comment(Text), Indent ) :-
1543 format( '~scomment(', [Indent] ), pp_string(Text), put_code( 0') ).
1544 xml_pp_indented( namespace(URI,Prefix,Element), Indent ) :-
1545 format( '~snamespace( ~q, "~s",~n', [Indent,URI,Prefix] ),
1546 xml_pp_indented( Element, [0'\t|Indent] ),
1547 format( '~n~s)', [[0'\t|Indent]] ).
1548 xml_pp_indented( element(Tag,Attributes,Contents), Indent ) :-
1549 format( '~selement( ~q,~n', [Indent,Tag] ),
1550 pp_attributes( Attributes, [0'\t|Indent] ), put_code(0',), nl,
1551 xml_pp_list( Contents, [0'\t|Indent] ), write( ' )' ).
1552 xml_pp_indented( instructions(Target, Processing), Indent ) :-
1553 format( '~sinstructions( ~q, ', [Indent,Target] ),
1554 pp_string(Processing), put_code( 0') ).
1555 xml_pp_indented( doctype(Name, DoctypeId), Indent ) :-
1556 format( '~sdoctype( ~q, ', [Indent,Name] ),
1557 xml_pp_indented( DoctypeId, [0'\t|Indent] ),
1558 write( ' )' ).
1559 xml_pp_indented( cdata(CData), Indent ) :-
1560 format( '~scdata(', [Indent] ), pp_string(CData), put_code( 0') ).
1561 xml_pp_indented( pcdata(PCData), Indent ) :-
1562 format( '~spcdata(', [Indent] ), pp_string(PCData), put_code( 0') ).
1563 xml_pp_indented( public(URN,URL), _Indent ) :-
1564 format( 'public( "~s", "~s" )', [URN,URL] ).
1565 xml_pp_indented( public(URN,URL,Literals), Indent ) :-
1566 format( 'public( "~s", "~s",~n', [URN,URL] ),
1567 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1568 xml_pp_indented( system(URL), _Indent ) :-
1569 format( 'system( "~s" )', [URL] ).
1570 xml_pp_indented( system(URL,Literals), Indent ) :-
1571 format( 'system( "~s",~n', [URL] ),
1572 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1573 xml_pp_indented( local, _Indent ) :-
1574 write( local ).
1575 xml_pp_indented( local(Literals), Indent ) :-
1576 write( 'local(' ), nl,
1577 xml_pp_list( Literals, [0'\t|Indent] ), write( ' )' ). %'
1578 xml_pp_indented( dtd_literal(String), Indent ) :-
1579 format( '~sdtd_literal(', [Indent] ), pp_string(String), put_code( 0') ). %'
1580 xml_pp_indented( out_of_context(Tag), Indent ) :-
1581 format( '~s/* SYNTAX ERROR */ out_of_context( ~q )', [Indent,Tag] ).
1582 xml_pp_indented( unparsed(String), Indent ) :-
1583 format( '~s/* SYNTAX ERROR */ unparsed( ', [Indent] ),
1584 pp_string(String), put_code( 0') ).
1585
1586 xml_pp_list( [], Indent ) :-
1587 format( '~s[]', [Indent] ).
1588 xml_pp_list( [H|T], Indent ) :-
1589 format( '~s[~n', [Indent] ),
1590 xml_pp_indented( H, Indent ),
1591 xml_pp_list1( T, Indent ),
1592 format( '~s]', [Indent] ).
1593
1594 xml_pp_list1( [], _Indent ) :-
1595 nl.
1596 xml_pp_list1( [H|T], Indent ) :-
1597 put_code( 0', ), nl,
1598 xml_pp_indented( H, Indent ),
1599 xml_pp_list1( T, Indent ).
1600
1601 % PATCH LEUSCHEL:
1602 pp_attributes( Attributes, Indent ) :-
1603 (select('$attribute_linefeeds'=_Count,Attributes,Rest)
1604 -> pp_attributes2(Rest,Indent) % TO DO: insert linefeeds
1605 ; pp_attributes2(Attributes,Indent)).
1606
1607 pp_attributes2( [], Indent ) :-
1608 format( '~s[]', [Indent] ).
1609 pp_attributes2( [Attribute|Attributes], Indent ) :-
1610 format( '~s[', [Indent] ),
1611 pp_attributes1( Attributes, Attribute ),
1612 put_code( 0'] ).
1613 % END PATCH
1614 pp_attributes1( [], Name=Value ) :-
1615 format( '~q=', [Name] ), pp_string( Value ).
1616 pp_attributes1( [H|T], Name=Value ) :-
1617 format( '~q=', [Name] ), pp_string( Value ), write( ', ' ),
1618 pp_attributes1( T, H ).
1619 /* XML Utilities
1620 *
1621 * $Revision$
1622 */
1623
1624 % Entity and Namespace map operations: these maps are usually quite small, so
1625 % a linear list lookup is okay. They could be substituted by a logarithmic
1626 % data structure - in extremis.
1627
1628 /* empty_map( ?Map ) is true if Map is a null map.
1629 */
1630 empty_map( [] ).
1631
1632 /* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure
1633 * which records the pair Key-Data. Key must be ground.
1634 */
1635 map_member( Key0, [Key1-Data1|Rest], Data0 ) :-
1636 ( Key0 == Key1 ->
1637 Data0 = Data1
1638 ; Key0 @> Key1 ->
1639 map_member( Key0, Rest, Data0 )
1640 ).
1641
1642 /* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map
1643 * structure, Key must be ground, and Map1 is identical to Map0 except that
1644 * the pair Key-Data is recorded by Map1.
1645 */
1646 map_store( [], Key, Data, [Key-Data] ).
1647 map_store( [Key0-Data0|Map0], Key, Data, Map ) :-
1648 ( Key == Key0 ->
1649 Map = [Key-Data|Map0]
1650 ; Key @< Key0 ->
1651 Map = [Key-Data,Key0-Data0|Map0]
1652 ; % >
1653 Map = [Key0-Data0|Map1],
1654 map_store( Map0, Key, Data, Map1 )
1655 ).
1656
1657 /* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces )
1658 * is an ADT hiding the "state" arguments for XML Acquisition
1659 */
1660 initial_context(
1661 opt(Fmt,Ext,RemoveAttributePrefixes),
1662 context(void,PreserveSpace,'','',Entities,Empty,
1663 RemoveAttributePrefixes)
1664 ) :-
1665 empty_map( Empty ),
1666 ( Ext==false ->
1667 Entities = Empty
1668 ;
1669 extended_character_entities(Entities)
1670 ),
1671 ( Fmt==false ->
1672 PreserveSpace = true
1673 ;
1674 PreserveSpace = false
1675 ).
1676
1677 context_update( current_namespace, Context0, URI, Context1 ) :-
1678 Context0 = context(Element,Preserve,_Current,Default,Entities,
1679 Namespaces,RemoveAttributePrefixes),
1680 Context1 = context(Element,Preserve,URI,Default,Entities,
1681 Namespaces,RemoveAttributePrefixes).
1682 context_update( element, Context0, Tag, Context1 ) :-
1683 Context0 = context(_Element,Preserve,Current,Default,Entities,
1684 Namespaces,RemoveAttributePrefixes),
1685 Context1 = context(tag(Tag),Preserve,Current,Default,Entities,
1686 Namespaces,RemoveAttributePrefixes).
1687 context_update( default_namespace, Context0, URI, Context1 ):-
1688 Context0 = context(Element,Preserve,Current,_Default,Entities,
1689 Namespaces,RemoveAttributePrefixes),
1690 Context1 = context(Element,Preserve,Current,URI,Entities,
1691 Namespaces,RemoveAttributePrefixes).
1692 context_update( space_preserve, Context0, Boolean, Context1 ):-
1693 Context0 = context(Element,_Preserve,Current,Default,Entities,
1694 Namespaces,RemoveAttributePrefixes),
1695 Context1 = context(Element,Boolean,Current,Default,Entities,
1696 Namespaces,RemoveAttributePrefixes).
1697 context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :-
1698 Context0 = context(Element,Preserve,Current,Default,Entities,
1699 Namespaces0,RemoveAttributePrefixes),
1700 Context1 = context(Element,Preserve,Current,Default,Entities,
1701 Namespaces1,RemoveAttributePrefixes),
1702 map_store( Namespaces0, Prefix, URI, Namespaces1 ).
1703 context_update( entity(Name), Context0, String, Context1 ) :-
1704 Context0 = context(Element,Preserve,Current,Default,Entities0,
1705 Namespaces,RemoveAttributePrefixes),
1706 Context1 = context(Element,Preserve,Current,Default,Entities1,
1707 Namespaces,RemoveAttributePrefixes),
1708 map_store( Entities0, Name, String, Entities1 ).
1709
1710 remove_attribute_prefixes( Context ) :-
1711 Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1712 _Namespaces,true).
1713
1714 current_tag( Context, Tag ) :-
1715 Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities,
1716 _Namespaces,_RPFA).
1717
1718 current_namespace( Context, Current ) :-
1719 Context = context(_Element,_Preserve,Current,_Default,_Entities,
1720 _Namespaces,_RPFA).
1721
1722 default_namespace( Context, Default ) :-
1723 Context = context(_Element,_Preserve,_Current,Default,_Entities,
1724 _Namespaces,_RPFA).
1725
1726 space_preserve( Context ) :-
1727 Context = context(_Element,true,_Current,_Default,_Entities,
1728 _Namespaces,_RPFA).
1729
1730 specific_namespace( Prefix, Context, URI ) :-
1731 Context = context(_Element,_Preserve,_Current,_Default,_Entities,
1732 Namespaces,_RPFA),
1733 map_member( Prefix, Namespaces, URI ).
1734
1735 defined_entity( Reference, Context, String ) :-
1736 Context = context(_Element,_Preserve,_Current,_Default,Entities,
1737 _Namespaces,_RPFA),
1738 map_member( Reference, Entities, String ).
1739
1740 close_context( Context, Terms, WellFormed ) :-
1741 Context = context(Element,_Preserve,_Current,_Default,_Entities,
1742 _Namespaces,_RPFA),
1743 close_context1( Element, Terms, WellFormed ).
1744
1745 close_context1( void, [], true ).
1746 close_context1( tag(TagChars), [out_of_context(Tag)], false ) :-
1747 atom_codes( Tag, TagChars ).
1748
1749 void_context(
1750 context(void,_Preserve,_Current,_Default,_Entities,_Namespaces)
1751 ).
1752
1753 /* pp_string( +String ) prints String onto the current output stream.
1754 * If String contains only 7-bit chars it is printed in shorthand quoted
1755 * format, otherwise it is written as a list.
1756 * If your Prolog uses " to delimit a special string type, just use write/1.
1757 */
1758 % [MC] rewritten
1759 pp_string(Chars) :-
1760 put_code(0'"),
1761 pp_string1(Chars),
1762 put_code(0'").
1763
1764 pp_string1([]).
1765 pp_string1([Char|Chars]) :-
1766 pp_string2(Char),
1767 pp_string1(Chars).
1768
1769 pp_string2(0'\a) :- !,
1770 put_code(0'\\),
1771 put_code(0'a).
1772 pp_string2(0'\b) :- !,
1773 put_code(0'\\),
1774 put_code(0'b).
1775 pp_string2(0'\t) :- !,
1776 put_code(0'\\),
1777 put_code(0't).
1778 pp_string2(0'\n) :- !,
1779 put_code(0'\\),
1780 put_code(0'n).
1781 pp_string2(0'\v) :- !,
1782 put_code(0'\\),
1783 put_code(0'v).
1784 pp_string2(0'\f) :- !,
1785 put_code(0'\\),
1786 put_code(0'f).
1787 pp_string2(0'\r) :- !,
1788 put_code(0'\\),
1789 put_code(0'r).
1790 pp_string2(0'\e) :- !,
1791 put_code(0'\\),
1792 put_code(0'e).
1793 pp_string2(0'\") :- !,
1794 put_code(0'\\),
1795 put_code(0'").
1796 pp_string2(0'\\) :- !,
1797 put_code(0'\\),
1798 put_code(0'\\).
1799 pp_string2(Char) :-
1800 Char>=32, Char=<126, !,
1801 put_code(Char).
1802 pp_string2(Char) :-
1803 format('\\~8r\\', [Char]).
1804
1805 xml_declaration_attributes_valid( [] ).
1806 xml_declaration_attributes_valid( [Name=Value|Attributes] ) :-
1807 ? xml_declaration_attribute_valid( Name, Value ),
1808 ? xml_declaration_attributes_valid( Attributes ).
1809
1810 xml_declaration_attribute_valid( Name, Value ) :-
1811 lowercase( Value, Lowercase ),
1812 debug_format(19,'Checking xml declaration attribute ~s=~s~n',[Name,Lowercase]),
1813 ? if(canonical_xml_declaration_attribute( Name, Lowercase ), true,
1814 (format(user_error,'Illegal <xml> declaration attribute: ~s = ~s~n',[Name,Value]),
1815 (valid_declaration_attribute_name(Name)
1816 -> ajoin(['Ignoring illegal value for attribute ',Name,': '],Msg),
1817 atom_codes(AValue,Value),
1818 add_xml_warning(Msg,AValue)
1819 ; add_xml_warning('Ignoring illegal <xml> declaration attribute: ',Name)
1820 ),
1821 fail
1822 )). % PATCH LEUSCHEL: add warning
1823
1824 % PATCH LEUSCHEL:
1825 valid_declaration_attribute_name(version).
1826 valid_declaration_attribute_name(standalone).
1827 valid_declaration_attribute_name(encoding).
1828
1829
1830 canonical_xml_declaration_attribute( version, "1.0" ).
1831 canonical_xml_declaration_attribute( standalone, "yes" ).
1832 canonical_xml_declaration_attribute( standalone, "no" ).
1833 canonical_xml_declaration_attribute( encoding, "utf-8" ).
1834 canonical_xml_declaration_attribute( encoding, "utf-16" ).
1835 canonical_xml_declaration_attribute( encoding, "ascii" ).
1836 canonical_xml_declaration_attribute( encoding, "iso-8859-1" ).
1837 canonical_xml_declaration_attribute( encoding, "iso-8859-2" ).
1838 canonical_xml_declaration_attribute( encoding, "iso-8859-15" ).
1839 canonical_xml_declaration_attribute( encoding, "windows-1252" ).
1840
1841 % PATCH LEUSCHEL: some more sets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
1842 %canonical_xml_declaration_attribute( encoding, "iso-8859-3" ).
1843 %canonical_xml_declaration_attribute( encoding, "iso-8859-4" ).
1844 canonical_xml_declaration_attribute( encoding, Encoding ) :- atom_codes(Atom,Encoding),
1845 add_xml_warning('Unsupported xml encoding (which can lead to unexpected results): ',Atom).
1846
1847
1848
1849 /* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are
1850 * lists of character codes, and Lowercase is identical to MixedCase with
1851 * every uppercase character replaced by its lowercase equivalent.
1852 */
1853 lowercase( [], [] ).
1854 lowercase( [Char|Chars], [Lower|LowerCase] ) :-
1855 ( Char >= 0'A, Char =< 0'Z
1856 ->
1857 Lower is Char + 0'a - 0'A
1858 ;
1859 Lower = Char
1860 ),
1861 lowercase( Chars, LowerCase ).
1862
1863 :- dynamic extended_character_entities/1. % no point compiling it
1864 extended_character_entities( [
1865 "Aacute"-[193], % latin capital letter A with acute,
1866 "aacute"-[225], % latin small letter a with acute,
1867 "Acirc"-[194], % latin capital letter A with circumflex,
1868 "acirc"-[226], % latin small letter a with circumflex,
1869 "acute"-[180], % acute accent = spacing acute,
1870 "AElig"-[198], % latin capital letter AE
1871 "aelig"-[230], % latin small letter ae
1872 "Agrave"-[192], % latin capital letter A with grave
1873 "agrave"-[224], % latin small letter a with grave
1874 "alefsym"-[8501], % alef symbol = first transfinite cardinal,
1875 "Alpha"-[913], % greek capital letter alpha, U+0391
1876 "alpha"-[945], % greek small letter alpha,
1877 "and"-[8743], % logical and = wedge, U+2227 ISOtech
1878 "ang"-[8736], % angle, U+2220 ISOamso
1879 "Aring"-[197], % latin capital letter A with ring above
1880 "aring"-[229], % latin small letter a with ring above
1881 "asymp"-[8776], % almost equal to = asymptotic to,
1882 "Atilde"-[195], % latin capital letter A with tilde,
1883 "atilde"-[227], % latin small letter a with tilde,
1884 "Auml"-[196], % latin capital letter A with diaeresis,
1885 "auml"-[228], % latin small letter a with diaeresis,
1886 "bdquo"-[8222], % double low-9 quotation mark, U+201E NEW
1887 "Beta"-[914], % greek capital letter beta, U+0392
1888 "beta"-[946], % greek small letter beta, U+03B2 ISOgrk3
1889 "brvbar"-[166], % broken bar = broken vertical bar,
1890 "bull"-[8226], % bullet = black small circle,
1891 "cap"-[8745], % intersection = cap, U+2229 ISOtech
1892 "Ccedil"-[199], % latin capital letter C with cedilla,
1893 "ccedil"-[231], % latin small letter c with cedilla,
1894 "cedil"-[184], % cedilla = spacing cedilla, U+00B8 ISOdia>
1895 "cent"-[162], % cent sign, U+00A2 ISOnum>
1896 "Chi"-[935], % greek capital letter chi, U+03A7
1897 "chi"-[967], % greek small letter chi, U+03C7 ISOgrk3
1898 "circ"-[710], % modifier letter circumflex accent,
1899 "clubs"-[9827], % black club suit = shamrock,
1900 "cong"-[8773], % approximately equal to, U+2245 ISOtech
1901 "copy"-[169], % copyright sign, U+00A9 ISOnum>
1902 "crarr"-[8629], % downwards arrow with corner leftwards
1903 "cup"-[8746], % union = cup, U+222A ISOtech
1904 "curren"-[164], % currency sign, U+00A4 ISOnum>
1905 "dagger"-[8224], % dagger, U+2020 ISOpub
1906 "Dagger"-[8225], % double dagger, U+2021 ISOpub
1907 "darr"-[8595], % downwards arrow, U+2193 ISOnum
1908 "dArr"-[8659], % downwards double arrow, U+21D3 ISOamsa
1909 "deg"-[176], % degree sign, U+00B0 ISOnum>
1910 "Delta"-[916], % greek capital letter delta,
1911 "delta"-[948], % greek small letter delta,
1912 "diams"-[9830], % black diamond suit, U+2666 ISOpub
1913 "divide"-[247], % division sign, U+00F7 ISOnum>
1914 "Eacute"-[201], % latin capital letter E with acute,
1915 "eacute"-[233], % latin small letter e with acute,
1916 "Ecirc"-[202], % latin capital letter E with circumflex,
1917 "ecirc"-[234], % latin small letter e with circumflex,
1918 "Egrave"-[200], % latin capital letter E with grave,
1919 "egrave"-[232], % latin small letter e with grave,
1920 "empty"-[8709], % empty set = null set = diameter,
1921 "emsp"-[8195], % em space, U+2003 ISOpub
1922 "ensp"-[8194], % en space, U+2002 ISOpub
1923 "Epsilon"-[917], % greek capital letter epsilon, U+0395
1924 "epsilon"-[949], % greek small letter epsilon,
1925 "equiv"-[8801], % identical to, U+2261 ISOtech
1926 "Eta"-[919], % greek capital letter eta, U+0397
1927 "eta"-[951], % greek small letter eta, U+03B7 ISOgrk3
1928 "ETH"-[208], % latin capital letter ETH, U+00D0 ISOlat1>
1929 "eth"-[240], % latin small letter eth, U+00F0 ISOlat1>
1930 "Euml"-[203], % latin capital letter E with diaeresis,
1931 "euml"-[235], % latin small letter e with diaeresis,
1932 "euro"-[8364], % euro sign, U+20AC NEW
1933 "exist"-[8707], % there exists, U+2203 ISOtech
1934 "fnof"-[402], % latin small f with hook = function
1935 "forall"-[8704], % for all, U+2200 ISOtech
1936 "frac12"-[189], % vulgar fraction one half
1937 "frac14"-[188], % vulgar fraction one quarter
1938 "frac34"-[190], % vulgar fraction three quarters
1939 "frasl"-[8260], % fraction slash, U+2044 NEW
1940 "Gamma"-[915], % greek capital letter gamma,
1941 "gamma"-[947], % greek small letter gamma,
1942 "ge"-[8805], % greater-than or equal to,
1943 "harr"-[8596], % left right arrow, U+2194 ISOamsa
1944 "hArr"-[8660], % left right double arrow,
1945 "hearts"-[9829], % black heart suit = valentine,
1946 "hellip"-[8230], % horizontal ellipsis = three dot leader,
1947 "Iacute"-[205], % latin capital letter I with acute,
1948 "iacute"-[237], % latin small letter i with acute,
1949 "Icirc"-[206], % latin capital letter I with circumflex,
1950 "icirc"-[238], % latin small letter i with circumflex,
1951 "iexcl"-[161], % inverted exclamation mark, U+00A1 ISOnum>
1952 "Igrave"-[204], % latin capital letter I with grave,
1953 "igrave"-[236], % latin small letter i with grave,
1954 "image"-[8465], % blackletter capital I = imaginary part,
1955 "infin"-[8734], % infinity, U+221E ISOtech
1956 "int"-[8747], % integral, U+222B ISOtech
1957 "Iota"-[921], % greek capital letter iota, U+0399
1958 "iota"-[953], % greek small letter iota, U+03B9 ISOgrk3
1959 "iquest"-[191], % inverted question mark
1960 "isin"-[8712], % element of, U+2208 ISOtech
1961 "Iuml"-[207], % latin capital letter I with diaeresis,
1962 "iuml"-[239], % latin small letter i with diaeresis,
1963 "Kappa"-[922], % greek capital letter kappa, U+039A
1964 "kappa"-[954], % greek small letter kappa,
1965 "Lambda"-[923], % greek capital letter lambda,
1966 "lambda"-[955], % greek small letter lambda,
1967 "lang"-[9001], % left-pointing angle bracket = bra,
1968 "laquo"-[171], % left-pointing double angle quotation mark
1969 "larr"-[8592], % leftwards arrow, U+2190 ISOnum
1970 "lArr"-[8656], % leftwards double arrow, U+21D0 ISOtech
1971 "lceil"-[8968], % left ceiling = apl upstile,
1972 "ldquo"-[8220], % left double quotation mark,
1973 "le"-[8804], % less-than or equal to, U+2264 ISOtech
1974 "lfloor"-[8970], % left floor = apl downstile,
1975 "lowast"-[8727], % asterisk operator, U+2217 ISOtech
1976 "loz"-[9674], % lozenge, U+25CA ISOpub
1977 "lrm"-[8206], % left-to-right mark, U+200E NEW RFC 2070
1978 "lsaquo"-[8249], % single left-pointing angle quotation mark,
1979 "lsquo"-[8216], % left single quotation mark,
1980 "macr"-[175], % macron = spacing macron = overline
1981 "mdash"-[8212], % em dash, U+2014 ISOpub
1982 "micro"-[181], % micro sign, U+00B5 ISOnum>
1983 "middot"-[183], % middle dot = Georgian comma
1984 "minus"-[8722], % minus sign, U+2212 ISOtech
1985 "Mu"-[924], % greek capital letter mu, U+039C
1986 "mu"-[956], % greek small letter mu, U+03BC ISOgrk3
1987 "nabla"-[8711], % nabla = backward difference,
1988 "nbsp"-[160], % no-break space = non-breaking space,
1989 "ndash"-[8211], % en dash, U+2013 ISOpub
1990 "ne"-[8800], % not equal to, U+2260 ISOtech
1991 "ni"-[8715], % contains as member, U+220B ISOtech
1992 "not"-[172], % not sign, U+00AC ISOnum>
1993 "notin"-[8713], % not an element of, U+2209 ISOtech
1994 "nsub"-[8836], % not a subset of, U+2284 ISOamsn
1995 "Ntilde"-[209], % latin capital letter N with tilde,
1996 "ntilde"-[241], % latin small letter n with tilde,
1997 "Nu"-[925], % greek capital letter nu, U+039D
1998 "nu"-[957], % greek small letter nu, U+03BD ISOgrk3
1999 "Oacute"-[211], % latin capital letter O with acute,
2000 "oacute"-[243], % latin small letter o with acute,
2001 "Ocirc"-[212], % latin capital letter O with circumflex,
2002 "ocirc"-[244], % latin small letter o with circumflex,
2003 "OElig"-[338], % latin capital ligature OE,
2004 "oelig"-[339], % latin small ligature oe, U+0153 ISOlat2
2005 "Ograve"-[210], % latin capital letter O with grave,
2006 "ograve"-[242], % latin small letter o with grave,
2007 "oline"-[8254], % overline = spacing overscore,
2008 "Omega"-[937], % greek capital letter omega,
2009 "omega"-[969], % greek small letter omega,
2010 "Omicron"-[927], % greek capital letter omicron, U+039F
2011 "omicron"-[959], % greek small letter omicron, U+03BF NEW
2012 "oplus"-[8853], % circled plus = direct sum,
2013 "or"-[8744], % logical or = vee, U+2228 ISOtech
2014 "ordf"-[170], % feminine ordinal indicator, U+00AA ISOnum>
2015 "ordm"-[186], % masculine ordinal indicator,
2016 "Oslash"-[216], % latin capital letter O with stroke
2017 "oslash"-[248], % latin small letter o with stroke,
2018 "Otilde"-[213], % latin capital letter O with tilde,
2019 "otilde"-[245], % latin small letter o with tilde,
2020 "otimes"-[8855], % circled times = vector product,
2021 "Ouml"-[214], % latin capital letter O with diaeresis,
2022 "ouml"-[246], % latin small letter o with diaeresis,
2023 "para"-[182], % pilcrow sign = paragraph sign,
2024 "part"-[8706], % partial differential, U+2202 ISOtech
2025 "permil"-[8240], % per mille sign, U+2030 ISOtech
2026 "perp"-[8869], % up tack = orthogonal to = perpendicular,
2027 "Phi"-[934], % greek capital letter phi,
2028 "phi"-[966], % greek small letter phi, U+03C6 ISOgrk3
2029 "Pi"-[928], % greek capital letter pi, U+03A0 ISOgrk3
2030 "pi"-[960], % greek small letter pi, U+03C0 ISOgrk3
2031 "piv"-[982], % greek pi symbol, U+03D6 ISOgrk3
2032 "plusmn"-[177], % plus-minus sign = plus-or-minus sign,
2033 "pound"-[163], % pound sign, U+00A3 ISOnum>
2034 "prime"-[8242], % prime = minutes = feet, U+2032 ISOtech
2035 "Prime"-[8243], % double prime = seconds = inches,
2036 "prod"-[8719], % n-ary product = product sign,
2037 "prop"-[8733], % proportional to, U+221D ISOtech
2038 "Psi"-[936], % greek capital letter psi,
2039 "psi"-[968], % greek small letter psi, U+03C8 ISOgrk3
2040 "radic"-[8730], % square root = radical sign,
2041 "rang"-[9002], % right-pointing angle bracket = ket,
2042 "raquo"-[187], % right-pointing double angle quotation mark
2043 "rarr"-[8594], % rightwards arrow, U+2192 ISOnum
2044 "rArr"-[8658], % rightwards double arrow,
2045 "rceil"-[8969], % right ceiling, U+2309 ISOamsc
2046 "rdquo"-[8221], % right double quotation mark,
2047 "real"-[8476], % blackletter capital R = real part symbol,
2048 "reg"-[174], % registered sign = registered trade mark sign,
2049 "rfloor"-[8971], % right floor, U+230B ISOamsc
2050 "Rho"-[929], % greek capital letter rho, U+03A1
2051 "rho"-[961], % greek small letter rho, U+03C1 ISOgrk3
2052 "rlm"-[8207], % right-to-left mark, U+200F NEW RFC 2070
2053 "rsaquo"-[8250], % single right-pointing angle quotation mark,
2054 "rsquo"-[8217], % right single quotation mark,
2055 "sbquo"-[8218], % single low-9 quotation mark, U+201A NEW
2056 "Scaron"-[352], % latin capital letter S with caron,
2057 "scaron"-[353], % latin small letter s with caron,
2058 "sdot"-[8901], % dot operator, U+22C5 ISOamsb
2059 "sect"-[167], % section sign, U+00A7 ISOnum>
2060 "shy"-[173], % soft hyphen = discretionary hyphen,
2061 "Sigma"-[931], % greek capital letter sigma,
2062 "sigma"-[963], % greek small letter sigma,
2063 "sigmaf"-[962], % greek small letter final sigma,
2064 "sim"-[8764], % tilde operator = varies with = similar to,
2065 "spades"-[9824], % black spade suit, U+2660 ISOpub
2066 "sub"-[8834], % subset of, U+2282 ISOtech
2067 "sube"-[8838], % subset of or equal to, U+2286 ISOtech
2068 "sum"-[8721], % n-ary sumation, U+2211 ISOamsb
2069 "sup"-[8835], % superset of, U+2283 ISOtech
2070 "sup1"-[185], % superscript one = superscript digit one,
2071 "sup2"-[178], % superscript two = superscript digit two
2072 "sup3"-[179], % superscript three = superscript digit three
2073 "supe"-[8839], % superset of or equal to,
2074 "szlig"-[223], % latin small letter sharp s = ess-zed,
2075 "Tau"-[932], % greek capital letter tau, U+03A4
2076 "tau"-[964], % greek small letter tau, U+03C4 ISOgrk3
2077 "there4"-[8756], % therefore, U+2234 ISOtech
2078 "Theta"-[920], % greek capital letter theta,
2079 "theta"-[952], % greek small letter theta,
2080 "thetasym"-[977], % greek small letter theta symbol,
2081 "thinsp"-[8201], % thin space, U+2009 ISOpub
2082 "THORN"-[222], % latin capital letter THORN,
2083 "thorn"-[254], % latin small letter thorn with,
2084 "tilde"-[732], % small tilde, U+02DC ISOdia
2085 "times"-[215], % multiplication sign, U+00D7 ISOnum>
2086 "trade"-[8482], % trade mark sign, U+2122 ISOnum
2087 "Uacute"-[218], % latin capital letter U with acute,
2088 "uacute"-[250], % latin small letter u with acute,
2089 "uarr"-[8593], % upwards arrow, U+2191 ISOnum
2090 "uArr"-[8657], % upwards double arrow, U+21D1 ISOamsa
2091 "Ucirc"-[219], % latin capital letter U with circumflex,
2092 "ucirc"-[251], % latin small letter u with circumflex,
2093 "Ugrave"-[217], % latin capital letter U with grave,
2094 "ugrave"-[249], % latin small letter u with grave,
2095 "uml"-[168], % diaeresis = spacing diaeresis,
2096 "upsih"-[978], % greek upsilon with hook symbol,
2097 "Upsilon"-[933], % greek capital letter upsilon,
2098 "upsilon"-[965], % greek small letter upsilon,
2099 "Uuml"-[220], % latin capital letter U with diaeresis,
2100 "uuml"-[252], % latin small letter u with diaeresis,
2101 "weierp"-[8472], % script capital P = power set
2102 "Xi"-[926], % greek capital letter xi, U+039E ISOgrk3
2103 "xi"-[958], % greek small letter xi, U+03BE ISOgrk3
2104 "Yacute"-[221], % latin capital letter Y with acute,
2105 "yacute"-[253], % latin small letter y with acute,
2106 "yen"-[165], % yen sign = yuan sign, U+00A5 ISOnum>
2107 "yuml"-[255], % latin small letter y with diaeresis,
2108 "Yuml"-[376], % latin capital letter Y with diaeresis,
2109 "Zeta"-[918], % greek capital letter zeta, U+0396
2110 "zeta"-[950], % greek small letter zeta, U+03B6 ISOgrk3
2111 "zwj"-[8205], % zero width joiner, U+200D NEW RFC 2070
2112 "zwnj"-[8204] % zero width non-joiner,
2113 ] ).
2114
2115 /* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to
2116 * copy the list Chars inline.
2117 *
2118 * This is best expressed in terms of append/3 where append/3 is built-in.
2119 * For other Prologs, a straightforward specification can be used:
2120 *
2121 * chars( [] ) --> "".
2122 * chars( [Char|Chars] ) -->
2123 * [Char],
2124 * chars( Chars ).
2125 */
2126 chars( Chars, Plus, Minus ) :-
2127 append( Chars, Minus, Plus ).