| 1 | % (c) 2016-2024 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
| 2 | % Heinrich Heine Universitaet Duesseldorf | |
| 3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html | |
| 4 | ||
| 5 | :- module(xml2b,[convert_xml_to_b/4, convert_json_to_b/3]). | |
| 6 | ||
| 7 | :- use_module(probsrc(module_information),[module_info/2]). | |
| 8 | :- module_info(group,external_functions). | |
| 9 | :- module_info(description,'This module transforms XML into a B representation.'). | |
| 10 | ||
| 11 | :- use_module(probsrc(xml_prob),[xml_parse/4]). | |
| 12 | %:- use_module(library(xml),[xml_parse/3]). | |
| 13 | :- use_module(library(lists)). | |
| 14 | :- use_module(probsrc(custom_explicit_sets),[convert_to_avl/2, construct_singleton_avl_set/2]). | |
| 15 | :- use_module(probsrc(error_manager)). | |
| 16 | :- use_module(probsrc(debug)). | |
| 17 | ||
| 18 | % type of generated XML elements: | |
| 19 | /* struct( | |
| 20 | recId: NATURAL1, | |
| 21 | pId:NATURAL, | |
| 22 | element:STRING, | |
| 23 | attributes: STRING +-> STRING, | |
| 24 | meta: STRING +-> STRING | |
| 25 | ) | |
| 26 | */ | |
| 27 | ||
| 28 | % convert a code list to a B sequence/set of records | |
| 29 | % Context is file name or string and is used for error messages only | |
| 30 | convert_xml_to_b(Codes,BContent,Context,Span) :- | |
| 31 | reset_counter(xml2b_current_line), | |
| 32 | set_counter(xml2b_current_line,1), | |
| 33 | peel_and_count_newlines(Codes,PeeledCodes), | |
| 34 | (debug_mode(off) -> true | |
| 35 | ; get_counter(xml2b_current_line,InitialLineNr), | |
| 36 | format('XML starting at line: ~w of ~w~n',[InitialLineNr,Context])), | |
| 37 | %statistics(walltime,[W1,_]), | |
| 38 | xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)],Span), % format(false) means we also see all newlines | |
| 39 | %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]), | |
| 40 | % xml_prob:xml_pp(xml(_Atts,Content)),nl, | |
| 41 | % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]), | |
| 42 | %print(Content),nl,nl, | |
| 43 | !, | |
| 44 | (convert_xml_content_to_b(Content,0,1,_,Conversion,[]) -> BContent = Conversion | |
| 45 | ; add_error(convert_xml_to_b,'Converting XML content to B failed:',Content,Span), % probably an internal error | |
| 46 | fail | |
| 47 | ). | |
| 48 | convert_xml_to_b(_,_,Context,Span) :- | |
| 49 | add_error(convert_xml_to_b,'XML Parsing failed, ensure that XML is valid in:',Context,Span),fail. | |
| 50 | ||
| 51 | ||
| 52 | convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}. | |
| 53 | convert_xml_content_to_b([H|T],ParentId,NextId,OutId) --> | |
| 54 | convert_xml_element(H,ParentId,NextId,Id2), | |
| 55 | convert_xml_content_to_b(T,ParentId,Id2,OutId). | |
| 56 | ||
| 57 | % TO DO: do we need to parse namespaces... | |
| 58 | % TO DO: store meta-information | |
| 59 | % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?> | |
| 60 | % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines ! | |
| 61 | ||
| 62 | convert_xml_element(Element, ParentId,NextId,OutId) --> | |
| 63 | % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl}, | |
| 64 | {xml_element(Element,Tag,Attributes,Content,LineNr)}, | |
| 65 | !, | |
| 66 | [(int(NextId), | |
| 67 | rec([ % note: fields have to be in order | |
| 68 | field(attributes,BAttributes), % partial function of Attributes | |
| 69 | field(element,string(Tag)), | |
| 70 | field(meta,MetaAttributes), % such as xmlLineNumber | |
| 71 | field(pId,int(ParentId)), % id of the parent XML record | |
| 72 | field(recId,int(NextId)) % id of the XML element/record | |
| 73 | ]) | |
| 74 | )], | |
| 75 | {Id2 is NextId+1, | |
| 76 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
| 77 | construct_singleton_avl_set((string(xmlLineNumber),string(LineNrAtom)),MetaAttributes), | |
| 78 | l_convert_xml_attributes(Attributes,BAttributesList), | |
| 79 | convert_to_avl(BAttributesList,BAttributes) | |
| 80 | }, | |
| 81 | convert_xml_content_to_b(Content,NextId,Id2,OutId). | |
| 82 | convert_xml_element(namespace( URL, NS, Element), ParentId,NextId,OutId) --> !, % what should we do with the namespace? | |
| 83 | {(URL='', NS=[] -> true | |
| 84 | ; get_counter(xml2b_current_line,LineNr), | |
| 85 | format('*** Ignoring XML namespace "~s" annotation on line ~w with URL: ~w~n',[NS,LineNr,URL]))}, | |
| 86 | convert_xml_element(Element, ParentId,NextId,OutId). | |
| 87 | convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines | |
| 88 | % no need to count_newlines; already done below in xml_element before C1 \= [] | |
| 89 | convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}. | |
| 90 | convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !, | |
| 91 | {get_counter(xml2b_current_line,LineNr), | |
| 92 | format('Ignoring XML instructions annotation on line ~w: ~w : ~w~n',[LineNr,Name,Chars])}. | |
| 93 | convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !, | |
| 94 | {get_counter(xml2b_current_line,LineNr), | |
| 95 | format('Ignoring XML doctype annotation on line ~w: ~w : ~w~n',[LineNr,Tag,DoctypeId])}. | |
| 96 | convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !, | |
| 97 | % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog | |
| 98 | convert_xml_element(pcdata(Codes),ParentId,Id,OutId). | |
| 99 | convert_xml_element(El,_,Id,Id) --> | |
| 100 | {get_counter(xml2b_current_line,LineNr), | |
| 101 | format('*** Unknown XML element on line ~w: ~w~n',[LineNr,El])}. | |
| 102 | ||
| 103 | ||
| 104 | xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :- | |
| 105 | get_counter(xml2b_current_line,CurLineNr), | |
| 106 | (select('='('$attribute_linefeeds',LF),XAttributes,Rest) | |
| 107 | -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl, | |
| 108 | set_counter(xml2b_current_line,LineNr) | |
| 109 | ; LineNr = CurLineNr, Attributes=XAttributes). | |
| 110 | xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :- | |
| 111 | peel_and_count_newlines(Codes,C1), | |
| 112 | C1 \= [], | |
| 113 | get_counter(xml2b_current_line,LineNr), | |
| 114 | count_newlines_and_trim(C1,NewCodes). | |
| 115 | ||
| 116 | ||
| 117 | l_convert_xml_attributes([],[]). | |
| 118 | l_convert_xml_attributes([H|T],[CH|CT]) :- convert_xml_attributes(H,CH), | |
| 119 | l_convert_xml_attributes(T,CT). | |
| 120 | ||
| 121 | convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes). | |
| 122 | convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]). | |
| 123 | ||
| 124 | ||
| 125 | :- use_module(extension('counter/counter'), | |
| 126 | [counter_init/0, new_counter/1, get_counter/2, inc_counter/1, reset_counter/1, set_counter/2]). | |
| 127 | ||
| 128 | count_newlines([]) :- !. | |
| 129 | count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T). | |
| 130 | count_newlines([_|T]) :- count_newlines(T). | |
| 131 | ||
| 132 | % peel leading newlines | |
| 133 | peel_and_count_newlines([],Res) :- !, Res=[]. | |
| 134 | peel_and_count_newlines([H|T],Res) :- (whitespace(H) -> peel_and_count_newlines(T,Res) | |
| 135 | ; H=10 -> inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res) | |
| 136 | ; H=13 -> inc_counter(xml2b_current_line), | |
| 137 | (T=[10|TX] | |
| 138 | -> peel_and_count_newlines(TX,Res) | |
| 139 | ; peel_and_count_newlines(T,Res)) | |
| 140 | ; Res = [H|T]). | |
| 141 | %peel_and_count_newlines(R,R). | |
| 142 | ||
| 143 | % count new lines and trim trailing newlines | |
| 144 | count_newlines_and_trim([],Res) :- !, Res=[]. | |
| 145 | count_newlines_and_trim(HT,Res) :- newline(HT,R),!, inc_counter(xml2b_current_line), | |
| 146 | count_newlines_and_trim3(R,[10],Res). | |
| 147 | count_newlines_and_trim([H|T],[H|TRes]) :- | |
| 148 | count_newlines_and_trim(T,TRes). | |
| 149 | ||
| 150 | count_newlines_and_trim3([],_,Res) :- !, Res=[]. % ignore accumulator | |
| 151 | count_newlines_and_trim3(HT,Acc,Res) :- newline(HT,R),!, inc_counter(xml2b_current_line), | |
| 152 | count_newlines_and_trim3(R,[10|Acc],Res). | |
| 153 | count_newlines_and_trim3([H|T],Acc,Res) :- | |
| 154 | revacc(Acc,[H|TRes],Res), count_newlines_and_trim(T,TRes). | |
| 155 | ||
| 156 | revacc([],Acc,Acc). | |
| 157 | revacc([H|T],Acc,Res) :- revacc(T,[H|Acc],Res). | |
| 158 | ||
| 159 | newline([H|T],R) :- (H=10 -> R=T ; H=13 -> (T=[10|TX] -> R=TX ; R=T)). | |
| 160 | % TO DO: should we check if we are on Windows ? | |
| 161 | ||
| 162 | whitespace(9). % tab | |
| 163 | whitespace(32). | |
| 164 | ||
| 165 | xml2b_startup :- % call once at startup to ensure all counters exist | |
| 166 | counter_init, | |
| 167 | new_counter(xml2b_current_line). | |
| 168 | ||
| 169 | :- use_module(probsrc(eventhandling),[register_event_listener/3]). | |
| 170 | :- register_event_listener(startup_prob,xml2b_startup, | |
| 171 | 'Initialise xml2b Counters.'). | |
| 172 | ||
| 173 | % ------------------------------ | |
| 174 | % convert a format received by json_parser:json_parse_file into the same format as READ_XML | |
| 175 | ||
| 176 | convert_json_to_b(Json,BContent,Span) :- !, | |
| 177 | (convert_json_element(Json,0,1,_,1,Span,Conversion,[]) | |
| 178 | -> BContent=Conversion | |
| 179 | ; add_error(convert_json_to_b,'Converting JSON content to B failed:',Json,Span), % probably an internal error | |
| 180 | fail | |
| 181 | ). | |
| 182 | ||
| 183 | convert_json_content_to_b([],_ParentId,NextId,OutId,_,_Span) --> {OutId=NextId}. | |
| 184 | convert_json_content_to_b([H|T],ParentId,NextId,OutId,LineNr,Span) --> | |
| 185 | convert_json_element(H,ParentId,NextId,Id2,LineNr,Span), | |
| 186 | convert_json_content_to_b(T,ParentId,Id2,OutId,LineNr,Span). | |
| 187 | ||
| 188 | :- use_module(probsrc(tools_strings),[ajoin/2]). | |
| 189 | convert_json_element(Element,ParentId,NextId,OutId,_LineNr,Span) --> | |
| 190 | {json_element(Element,Tag,BAttributesList,Content,LineNr)}, | |
| 191 | !, | |
| 192 | {var(LineNr) -> LineNr=0 ; true}, | |
| 193 | [(int(NextId), | |
| 194 | rec([ % note: fields have to be in order | |
| 195 | field(attributes,BAttributes), % partial function of Attributes | |
| 196 | field(element,string(Tag)), | |
| 197 | field(meta,MetaAttributes), % such as xmlLineNumber | |
| 198 | field(pId,int(ParentId)), % id of the parent XML record | |
| 199 | field(recId,int(NextId)) % id of the XML element/record | |
| 200 | ]) | |
| 201 | )], | |
| 202 | {Id2 is NextId+1, | |
| 203 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
| 204 | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), | |
| 205 | convert_to_avl(BAttributesList,BAttributes) | |
| 206 | }, | |
| 207 | convert_json_content_to_b(Content,NextId,Id2,OutId,LineNr,Span). | |
| 208 | convert_json_element(El,_,Id,Id,LineNr,Span) --> | |
| 209 | {ajoin(['Cannot translate JSON element at line ',LineNr,' to B:'],Msg), | |
| 210 | add_error(convert_json_to_b,Msg,El,Span)}. | |
| 211 | ||
| 212 | json_element(@(null),null,[],[],_) :- !. % unknown line number | |
| 213 | json_element(@(true),boolean,[(string(value),string(true))],[],_) :- !. % unknown line number | |
| 214 | json_element(@(false),boolean,[(string(value),string(false))],[],_) :- !. % unknown line number | |
| 215 | json_element(number(Number),number,[(string(value),string(Atom))],[],_) :- !, % unknown line number | |
| 216 | number_codes(Number,Codes), atom_codes(Atom,Codes). | |
| 217 | json_element(string(String),string,[(string(value),string(String))],[],_) :- !. % unknown line number | |
| 218 | json_element(json(Object),object,[],Object,_) :- !. % unknown line number | |
| 219 | json_element(array(Array),array,[],Array,_) :- !. % unknown line number | |
| 220 | json_element('='(Key,Value,Pos),attribute,[(string(key),string(Key))],[Value],LineNr) :- !, get_line_nr(Pos,LineNr). | |
| 221 | ||
| 222 | get_line_nr(L-_,L) :- !. | |
| 223 | get_line_nr(E,0) :- writeq(E), format('*** No position info: ~w~n',[E]). |