1 | % (c) 2016-2024 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html | |
4 | ||
5 | :- module(xml2b,[convert_xml_to_b/4, convert_json_to_b/3]). | |
6 | ||
7 | :- use_module(probsrc(module_information),[module_info/2]). | |
8 | :- module_info(group,external_functions). | |
9 | :- module_info(description,'This module transforms XML into a B representation.'). | |
10 | ||
11 | :- use_module(probsrc(xml_prob),[xml_parse/4]). | |
12 | %:- use_module(library(xml),[xml_parse/3]). | |
13 | :- use_module(library(lists)). | |
14 | :- use_module(probsrc(custom_explicit_sets),[convert_to_avl/2, construct_singleton_avl_set/2]). | |
15 | :- use_module(probsrc(error_manager)). | |
16 | :- use_module(probsrc(debug)). | |
17 | ||
18 | % type of generated XML elements: | |
19 | /* struct( | |
20 | recId: NATURAL1, | |
21 | pId:NATURAL, | |
22 | element:STRING, | |
23 | attributes: STRING +-> STRING, | |
24 | meta: STRING +-> STRING | |
25 | ) | |
26 | */ | |
27 | ||
28 | % convert a code list to a B sequence/set of records | |
29 | % Context is file name or string and is used for error messages only | |
30 | convert_xml_to_b(Codes,BContent,Context,Span) :- | |
31 | reset_counter(xml2b_current_line), | |
32 | set_counter(xml2b_current_line,1), | |
33 | peel_and_count_newlines(Codes,PeeledCodes), | |
34 | (debug_mode(off) -> true | |
35 | ; get_counter(xml2b_current_line,InitialLineNr), | |
36 | format('XML starting at line: ~w of ~w~n',[InitialLineNr,Context])), | |
37 | %statistics(walltime,[W1,_]), | |
38 | xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)],Span), % format(false) means we also see all newlines | |
39 | %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]), | |
40 | % xml_prob:xml_pp(xml(_Atts,Content)),nl, | |
41 | % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]), | |
42 | %print(Content),nl,nl, | |
43 | !, | |
44 | (convert_xml_content_to_b(Content,0,1,_,Conversion,[]) -> BContent = Conversion | |
45 | ; add_error(convert_xml_to_b,'Converting XML content to B failed:',Content,Span), % probably an internal error | |
46 | fail | |
47 | ). | |
48 | convert_xml_to_b(_,_,Context,Span) :- | |
49 | add_error(convert_xml_to_b,'XML Parsing failed, ensure that XML is valid in:',Context,Span),fail. | |
50 | ||
51 | ||
52 | convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}. | |
53 | convert_xml_content_to_b([H|T],ParentId,NextId,OutId) --> | |
54 | convert_xml_element(H,ParentId,NextId,Id2), | |
55 | convert_xml_content_to_b(T,ParentId,Id2,OutId). | |
56 | ||
57 | % TO DO: do we need to parse namespaces... | |
58 | % TO DO: store meta-information | |
59 | % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?> | |
60 | % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines ! | |
61 | ||
62 | convert_xml_element(Element, ParentId,NextId,OutId) --> | |
63 | % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl}, | |
64 | {xml_element(Element,Tag,Attributes,Content,LineNr)}, | |
65 | !, | |
66 | [(int(NextId), | |
67 | rec([ % note: fields have to be in order | |
68 | field(attributes,BAttributes), % partial function of Attributes | |
69 | field(element,string(Tag)), | |
70 | field(meta,MetaAttributes), % such as xmlLineNumber | |
71 | field(pId,int(ParentId)), % id of the parent XML record | |
72 | field(recId,int(NextId)) % id of the XML element/record | |
73 | ]) | |
74 | )], | |
75 | {Id2 is NextId+1, | |
76 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
77 | construct_singleton_avl_set((string(xmlLineNumber),string(LineNrAtom)),MetaAttributes), | |
78 | l_convert_xml_attributes(Attributes,BAttributesList), | |
79 | convert_to_avl(BAttributesList,BAttributes) | |
80 | }, | |
81 | convert_xml_content_to_b(Content,NextId,Id2,OutId). | |
82 | convert_xml_element(namespace( URL, NS, Element), ParentId,NextId,OutId) --> !, % what should we do with the namespace? | |
83 | {(URL='', NS=[] -> true | |
84 | ; get_counter(xml2b_current_line,LineNr), | |
85 | format('*** Ignoring XML namespace "~s" annotation on line ~w with URL: ~w~n',[NS,LineNr,URL]))}, | |
86 | convert_xml_element(Element, ParentId,NextId,OutId). | |
87 | convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines | |
88 | % no need to count_newlines; already done below in xml_element before C1 \= [] | |
89 | convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}. | |
90 | convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !, | |
91 | {get_counter(xml2b_current_line,LineNr), | |
92 | format('Ignoring XML instructions annotation on line ~w: ~w : ~w~n',[LineNr,Name,Chars])}. | |
93 | convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !, | |
94 | {get_counter(xml2b_current_line,LineNr), | |
95 | format('Ignoring XML doctype annotation on line ~w: ~w : ~w~n',[LineNr,Tag,DoctypeId])}. | |
96 | convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !, | |
97 | % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog | |
98 | convert_xml_element(pcdata(Codes),ParentId,Id,OutId). | |
99 | convert_xml_element(El,_,Id,Id) --> | |
100 | {get_counter(xml2b_current_line,LineNr), | |
101 | format('*** Unknown XML element on line ~w: ~w~n',[LineNr,El])}. | |
102 | ||
103 | ||
104 | xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :- | |
105 | get_counter(xml2b_current_line,CurLineNr), | |
106 | (select('='('$attribute_linefeeds',LF),XAttributes,Rest) | |
107 | -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl, | |
108 | set_counter(xml2b_current_line,LineNr) | |
109 | ; LineNr = CurLineNr, Attributes=XAttributes). | |
110 | xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :- | |
111 | peel_and_count_newlines(Codes,C1), | |
112 | C1 \= [], | |
113 | get_counter(xml2b_current_line,LineNr), | |
114 | count_newlines_and_trim(C1,NewCodes). | |
115 | ||
116 | ||
117 | l_convert_xml_attributes([],[]). | |
118 | l_convert_xml_attributes([H|T],[CH|CT]) :- convert_xml_attributes(H,CH), | |
119 | l_convert_xml_attributes(T,CT). | |
120 | ||
121 | convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes). | |
122 | convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]). | |
123 | ||
124 | ||
125 | :- use_module(extension('counter/counter'), | |
126 | [counter_init/0, new_counter/1, get_counter/2, inc_counter/1, reset_counter/1, set_counter/2]). | |
127 | ||
128 | count_newlines([]) :- !. | |
129 | count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T). | |
130 | count_newlines([_|T]) :- count_newlines(T). | |
131 | ||
132 | % peel leading newlines | |
133 | peel_and_count_newlines([],Res) :- !, Res=[]. | |
134 | peel_and_count_newlines([H|T],Res) :- (whitespace(H) -> peel_and_count_newlines(T,Res) | |
135 | ; H=10 -> inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res) | |
136 | ; H=13 -> inc_counter(xml2b_current_line), | |
137 | (T=[10|TX] | |
138 | -> peel_and_count_newlines(TX,Res) | |
139 | ; peel_and_count_newlines(T,Res)) | |
140 | ; Res = [H|T]). | |
141 | %peel_and_count_newlines(R,R). | |
142 | ||
143 | % count new lines and trim trailing newlines | |
144 | count_newlines_and_trim([],Res) :- !, Res=[]. | |
145 | count_newlines_and_trim(HT,Res) :- newline(HT,R),!, inc_counter(xml2b_current_line), | |
146 | count_newlines_and_trim3(R,[10],Res). | |
147 | count_newlines_and_trim([H|T],[H|TRes]) :- | |
148 | count_newlines_and_trim(T,TRes). | |
149 | ||
150 | count_newlines_and_trim3([],_,Res) :- !, Res=[]. % ignore accumulator | |
151 | count_newlines_and_trim3(HT,Acc,Res) :- newline(HT,R),!, inc_counter(xml2b_current_line), | |
152 | count_newlines_and_trim3(R,[10|Acc],Res). | |
153 | count_newlines_and_trim3([H|T],Acc,Res) :- | |
154 | revacc(Acc,[H|TRes],Res), count_newlines_and_trim(T,TRes). | |
155 | ||
156 | revacc([],Acc,Acc). | |
157 | revacc([H|T],Acc,Res) :- revacc(T,[H|Acc],Res). | |
158 | ||
159 | newline([H|T],R) :- (H=10 -> R=T ; H=13 -> (T=[10|TX] -> R=TX ; R=T)). | |
160 | % TO DO: should we check if we are on Windows ? | |
161 | ||
162 | whitespace(9). % tab | |
163 | whitespace(32). | |
164 | ||
165 | xml2b_startup :- % call once at startup to ensure all counters exist | |
166 | counter_init, | |
167 | new_counter(xml2b_current_line). | |
168 | ||
169 | :- use_module(probsrc(eventhandling),[register_event_listener/3]). | |
170 | :- register_event_listener(startup_prob,xml2b_startup, | |
171 | 'Initialise xml2b Counters.'). | |
172 | ||
173 | % ------------------------------ | |
174 | % convert a format received by json_parser:json_parse_file into the same format as READ_XML | |
175 | ||
176 | convert_json_to_b(Json,BContent,Span) :- !, | |
177 | (convert_json_element(Json,0,1,_,1,Span,Conversion,[]) | |
178 | -> BContent=Conversion | |
179 | ; add_error(convert_json_to_b,'Converting JSON content to B failed:',Json,Span), % probably an internal error | |
180 | fail | |
181 | ). | |
182 | ||
183 | convert_json_content_to_b([],_ParentId,NextId,OutId,_,_Span) --> {OutId=NextId}. | |
184 | convert_json_content_to_b([H|T],ParentId,NextId,OutId,LineNr,Span) --> | |
185 | convert_json_element(H,ParentId,NextId,Id2,LineNr,Span), | |
186 | convert_json_content_to_b(T,ParentId,Id2,OutId,LineNr,Span). | |
187 | ||
188 | :- use_module(probsrc(tools_strings),[ajoin/2]). | |
189 | convert_json_element(Element,ParentId,NextId,OutId,_LineNr,Span) --> | |
190 | {json_element(Element,Tag,BAttributesList,Content,LineNr)}, | |
191 | !, | |
192 | {var(LineNr) -> LineNr=0 ; true}, | |
193 | [(int(NextId), | |
194 | rec([ % note: fields have to be in order | |
195 | field(attributes,BAttributes), % partial function of Attributes | |
196 | field(element,string(Tag)), | |
197 | field(meta,MetaAttributes), % such as xmlLineNumber | |
198 | field(pId,int(ParentId)), % id of the parent XML record | |
199 | field(recId,int(NextId)) % id of the XML element/record | |
200 | ]) | |
201 | )], | |
202 | {Id2 is NextId+1, | |
203 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
204 | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), | |
205 | convert_to_avl(BAttributesList,BAttributes) | |
206 | }, | |
207 | convert_json_content_to_b(Content,NextId,Id2,OutId,LineNr,Span). | |
208 | convert_json_element(El,_,Id,Id,LineNr,Span) --> | |
209 | {ajoin(['Cannot translate JSON element at line ',LineNr,' to B:'],Msg), | |
210 | add_error(convert_json_to_b,Msg,El,Span)}. | |
211 | ||
212 | json_element(@(null),null,[],[],_) :- !. % unknown line number | |
213 | json_element(@(true),boolean,[(string(value),string(true))],[],_) :- !. % unknown line number | |
214 | json_element(@(false),boolean,[(string(value),string(false))],[],_) :- !. % unknown line number | |
215 | json_element(number(Number),number,[(string(value),string(Atom))],[],_) :- !, % unknown line number | |
216 | number_codes(Number,Codes), atom_codes(Atom,Codes). | |
217 | json_element(string(String),string,[(string(value),string(String))],[],_) :- !. % unknown line number | |
218 | json_element(json(Object),object,[],Object,_) :- !. % unknown line number | |
219 | json_element(array(Array),array,[],Array,_) :- !. % unknown line number | |
220 | json_element('='(Key,Value,Pos),attribute,[(string(key),string(Key))],[Value],LineNr) :- !, get_line_nr(Pos,LineNr). | |
221 | ||
222 | get_line_nr(L-_,L) :- !. | |
223 | get_line_nr(E,0) :- writeq(E), format('*** No position info: ~w~n',[E]). |