1 % (c) 2016-2023 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html
4
5 :- module(xml2b,[convert_xml_to_b/4, convert_json_to_b/3]).
6
7 :- use_module(probsrc(module_information),[module_info/2]).
8 :- module_info(group,external_functions).
9 :- module_info(description,'This module transforms XML into a B representation.').
10
11 :- use_module(probsrc(xml_prob),[xml_parse/4]).
12 %:- use_module(library(xml),[xml_parse/3]).
13 :- use_module(library(lists)).
14 :- use_module(probsrc(custom_explicit_sets),[convert_to_avl/2, construct_singleton_avl_set/2]).
15 :- use_module(probsrc(error_manager)).
16 :- use_module(probsrc(debug)).
17
18 % type of generated XML elements:
19 /* struct(
20 recId: NATURAL1,
21 pId:NATURAL,
22 element:STRING,
23 attributes: STRING +-> STRING,
24 meta: STRING +-> STRING
25 )
26 */
27
28 % convert a code list to a B sequence/set of records
29 % Context is file name or string and is used for error messages only
30 convert_xml_to_b(Codes,BContent,Context,Span) :-
31 reset_counter(xml2b_current_line),
32 set_counter(xml2b_current_line,1),
33 peel_and_count_newlines(Codes,PeeledCodes),
34 (debug_mode(off) -> true
35 ; get_counter(xml2b_current_line,InitialLineNr),
36 format('XML starting at line: ~w of ~w~n',[InitialLineNr,Context])),
37 %statistics(walltime,[W1,_]),
38 xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)],Span), % format(false) means we also see all newlines
39 %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]),
40 % xml_prob:xml_pp(xml(_Atts,Content)),nl,
41 % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]),
42 %print(Content),nl,nl,
43 !,
44 (convert_xml_content_to_b(Content,0,1,_,Conversion,[]) -> BContent = Conversion
45 ; add_error(convert_xml_to_b,'Converting XML content to B failed:',Content,Span), % probably an internal error
46 fail
47 ).
48 convert_xml_to_b(_,_,Context,Span) :-
49 add_error(convert_xml_to_b,'XML Parsing failed, ensure that XML is valid in:',Context,Span),fail.
50
51
52 convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}.
53 convert_xml_content_to_b([H|T],ParentId,NextId,OutId) -->
54 convert_xml_element(H,ParentId,NextId,Id2),
55 convert_xml_content_to_b(T,ParentId,Id2,OutId).
56
57 % TO DO: do we need to parse namespaces...
58 % TO DO: store meta-information
59 % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?>
60 % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines !
61
62 convert_xml_element(Element, ParentId,NextId,OutId) -->
63 % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl},
64 {xml_element(Element,Tag,Attributes,Content,LineNr)},
65 !,
66 [(int(NextId),
67 rec([ % note: fields have to be in order
68 field(attributes,BAttributes), % partial function of Attributes
69 field(element,string(Tag)),
70 field(meta,MetaAttributes), % such as xmlLineNumber
71 field(pId,int(ParentId)), % id of the parent XML record
72 field(recId,int(NextId)) % id of the XML element/record
73 ])
74 )],
75 {Id2 is NextId+1,
76 number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC),
77 construct_singleton_avl_set((string(xmlLineNumber),string(LineNrAtom)),MetaAttributes),
78 l_convert_xml_attributes(Attributes,BAttributesList),
79 convert_to_avl(BAttributesList,BAttributes)
80 },
81 convert_xml_content_to_b(Content,NextId,Id2,OutId).
82 convert_xml_element(namespace( URL, NS, Element), ParentId,NextId,OutId) --> !, % what should we do with the namespace?
83 {(URL='', NS=[] -> true
84 ; get_counter(xml2b_current_line,LineNr),
85 format('*** Ignoring XML namespace "~s" annotation on line ~w with URL: ~w~n',[NS,LineNr,URL]))},
86 convert_xml_element(Element, ParentId,NextId,OutId).
87 convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines
88 % no need to count_newlines; already done below in xml_element before C1 \= []
89 convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}.
90 convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !,
91 {get_counter(xml2b_current_line,LineNr),
92 format('Ignoring XML instructions annotation on line ~w: ~w : ~w~n',[LineNr,Name,Chars])}.
93 convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !,
94 {get_counter(xml2b_current_line,LineNr),
95 format('Ignoring XML doctype annotation on line ~w: ~w : ~w~n',[LineNr,Tag,DoctypeId])}.
96 convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !,
97 % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog
98 convert_xml_element(pcdata(Codes),ParentId,Id,OutId).
99 convert_xml_element(El,_,Id,Id) -->
100 {get_counter(xml2b_current_line,LineNr),
101 format('*** Unknown XML element on line ~w: ~w~n',[LineNr,El])}.
102
103
104 xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :-
105 get_counter(xml2b_current_line,CurLineNr),
106 (select('='('$attribute_linefeeds',LF),XAttributes,Rest)
107 -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl,
108 set_counter(xml2b_current_line,LineNr)
109 ; LineNr = CurLineNr, Attributes=XAttributes).
110 xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :-
111 peel_and_count_newlines(Codes,C1),
112 C1 \= [],
113 get_counter(xml2b_current_line,LineNr),
114 count_newlines_and_trim(C1,NewCodes).
115
116
117 l_convert_xml_attributes([],[]).
118 l_convert_xml_attributes([H|T],[CH|CT]) :- convert_xml_attributes(H,CH),
119 l_convert_xml_attributes(T,CT).
120
121 convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes).
122 convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]).
123
124
125 :- use_module(extension('counter/counter'),
126 [counter_init/0, new_counter/1, get_counter/2, inc_counter/1, reset_counter/1, set_counter/2]).
127
128 count_newlines([]) :- !.
129 count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T).
130 count_newlines([_|T]) :- count_newlines(T).
131
132 % peel leading newlines
133 peel_and_count_newlines([],Res) :- !, Res=[].
134 peel_and_count_newlines([H|T],Res) :- (whitespace(H) -> peel_and_count_newlines(T,Res)
135 ; H=10 -> inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res)
136 ; H=13 -> inc_counter(xml2b_current_line),
137 (T=[10|TX]
138 -> peel_and_count_newlines(TX,Res)
139 ; peel_and_count_newlines(T,Res))
140 ; Res = [H|T]).
141 %peel_and_count_newlines(R,R).
142
143 % count new lines and trim trailing newlines
144 count_newlines_and_trim([],Res) :- !, Res=[].
145 count_newlines_and_trim(HT,Res) :- newline(HT,R),!, inc_counter(xml2b_current_line),
146 count_newlines_and_trim3(R,[10],Res).
147 count_newlines_and_trim([H|T],[H|TRes]) :-
148 count_newlines_and_trim(T,TRes).
149
150 count_newlines_and_trim3([],_,Res) :- !, Res=[]. % ignore accumulator
151 count_newlines_and_trim3(HT,Acc,Res) :- newline(HT,R),!, inc_counter(xml2b_current_line),
152 count_newlines_and_trim3(R,[10|Acc],Res).
153 count_newlines_and_trim3([H|T],Acc,Res) :-
154 revacc(Acc,[H|TRes],Res), count_newlines_and_trim(T,TRes).
155
156 revacc([],Acc,Acc).
157 revacc([H|T],Acc,Res) :- revacc(T,[H|Acc],Res).
158
159 newline([H|T],R) :- (H=10 -> R=T ; H=13 -> (T=[10|TX] -> R=TX ; R=T)).
160 % TO DO: should we check if we are on Windows ?
161
162 whitespace(9). % tab
163 whitespace(32).
164
165 xml2b_startup :- % call once at startup to ensure all counters exist
166 counter_init,
167 new_counter(xml2b_current_line).
168
169 :- use_module(probsrc(eventhandling),[register_event_listener/3]).
170 :- register_event_listener(startup_prob,xml2b_startup,
171 'Initialise xml2b Counters.').
172
173 % ------------------------------
174 % convert a format received by json_parser:json_parse_file into the same format as READ_XML
175
176 convert_json_to_b(Json,BContent,Span) :- !,
177 (convert_json_element(Json,0,1,_,1,Span,Conversion,[])
178 -> BContent=Conversion
179 ; add_error(convert_json_to_b,'Converting JSON content to B failed:',Json,Span), % probably an internal error
180 fail
181 ).
182
183 convert_json_content_to_b([],_ParentId,NextId,OutId,_,_Span) --> {OutId=NextId}.
184 convert_json_content_to_b([H|T],ParentId,NextId,OutId,LineNr,Span) -->
185 convert_json_element(H,ParentId,NextId,Id2,LineNr,Span),
186 convert_json_content_to_b(T,ParentId,Id2,OutId,LineNr,Span).
187
188 :- use_module(probsrc(tools_strings),[ajoin/2]).
189 convert_json_element(Element,ParentId,NextId,OutId,_LineNr,Span) -->
190 {json_element(Element,Tag,BAttributesList,Content,LineNr)},
191 !,
192 {var(LineNr) -> LineNr=0 ; true},
193 [(int(NextId),
194 rec([ % note: fields have to be in order
195 field(attributes,BAttributes), % partial function of Attributes
196 field(element,string(Tag)),
197 field(meta,MetaAttributes), % such as xmlLineNumber
198 field(pId,int(ParentId)), % id of the parent XML record
199 field(recId,int(NextId)) % id of the XML element/record
200 ])
201 )],
202 {Id2 is NextId+1,
203 number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC),
204 convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes),
205 convert_to_avl(BAttributesList,BAttributes)
206 },
207 convert_json_content_to_b(Content,NextId,Id2,OutId,LineNr,Span).
208 convert_json_element(El,_,Id,Id,LineNr,Span) -->
209 {ajoin(['Cannot translate JSON element at line ',LineNr,' to B:'],Msg),
210 add_error(convert_json_to_b,Msg,El,Span)}.
211
212 json_element(@(null),null,[],[],_) :- !. % unknown line number
213 json_element(@(true),boolean,[(string(value),string(true))],[],_) :- !. % unknown line number
214 json_element(@(false),boolean,[(string(value),string(false))],[],_) :- !. % unknown line number
215 json_element(number(Number),number,[(string(value),string(Atom))],[],_) :- !, % unknown line number
216 number_codes(Number,Codes), atom_codes(Atom,Codes).
217 json_element(string(String),string,[(string(value),string(String))],[],_) :- !. % unknown line number
218 json_element(json(Object),object,[],Object,_) :- !. % unknown line number
219 json_element(array(Array),array,[],Array,_) :- !. % unknown line number
220 json_element('='(Key,Value,Pos),attribute,[(string(key),string(Key))],[Value],LineNr) :- !, get_line_nr(Pos,LineNr).
221
222 get_line_nr(L-_,L) :- !.
223 get_line_nr(E,0) :- writeq(E), format('*** No position info: ~w~n',[E]).