| 1 | % (c) 2016-2019 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
| 2 | % Heinrich Heine Universitaet Duesseldorf | |
| 3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html | |
| 4 | ||
| 5 | :- module(xml2b,[convert_xml_to_b/2]). | |
| 6 | ||
| 7 | :- use_module(module_information,[module_info/2]). | |
| 8 | :- module_info(group,external_functions). | |
| 9 | :- module_info(description,'This module transforms XML into a B representation.'). | |
| 10 | ||
| 11 | %:- use_module(library(xml)). | |
| 12 | :- use_module(xml_prob). | |
| 13 | :- use_module(library(lists)). | |
| 14 | :- use_module(custom_explicit_sets,[convert_to_avl/2]). | |
| 15 | :- use_module(error_manager). | |
| 16 | :- use_module(debug). | |
| 17 | ||
| 18 | % type of generated XML elements: | |
| 19 | /* struct( | |
| 20 | recId: NATURAL1, | |
| 21 | pId:NATURAL, | |
| 22 | element:STRING, | |
| 23 | attributes: STRING +-> STRING, | |
| 24 | meta: STRING +-> STRING | |
| 25 | ) | |
| 26 | */ | |
| 27 | ||
| 28 | % convert a code list to a B sequence/set of records | |
| 29 | convert_xml_to_b(Codes,BContent) :- | |
| 30 | reset_counter(xml2b_current_line), | |
| 31 | set_counter(xml2b_current_line,1), | |
| 32 | peel_and_count_newlines(Codes,PeeledCodes), | |
| 33 | (debug_mode(off) -> true | |
| 34 | ; get_counter(xml2b_current_line,InitialLineNr), format('XML starting at line: ~w~n',[InitialLineNr])), | |
| 35 | %statistics(walltime,[W1,_]), | |
| 36 | xml_parse(PeeledCodes,xml(_Atts,Content),[format(false)]), % format(false) means we also see all newlines | |
| 37 | %statistics(walltime,[W2,_]), W is W2-W1, formatsilent('% Walltime ~w ms to parse XML using Prolog library~n',[W]), | |
| 38 | % xml_pp(xml(_Atts,Content)),nl, | |
| 39 | % xml_parse(Codes2,xml(_Atts,Content),[]), format('XML:~n~s~n',[Codes2]), | |
| 40 | %print(Content),nl,nl, | |
| 41 | !, | |
| 42 | convert_xml_content_to_b(Content,0,1,_,BContent,[]). | |
| 43 | convert_xml_to_b(_,_) :- add_error(convert_xml_to_b,'XML Parsing failed'),fail. | |
| 44 | ||
| 45 | ||
| 46 | convert_xml_content_to_b([],_ParentId,NextId,OutId) --> {OutId=NextId}. | |
| 47 | convert_xml_content_to_b([H|T],ParentId,NextId,OutId) --> | |
| 48 | convert_xml_element(H,ParentId,NextId,Id2), | |
| 49 | convert_xml_content_to_b(T,ParentId,Id2,OutId). | |
| 50 | ||
| 51 | % TO DO: do we need to parse namespaces... | |
| 52 | % TO DO: store meta-information | |
| 53 | % TO DO: adapt line numbers when newlines inside initial tag: <?xml version="1.0" encoding="ASCII"?> | |
| 54 | % TO DO: deal with new lines inside tags <Data version= "03.04"> : Prolog library hides those newlines ! | |
| 55 | ||
| 56 | convert_xml_element(Element, ParentId,NextId,OutId) --> | |
| 57 | % {get_counter(xml2b_current_line,CurNr),nl,print(el(CurNr,Element)),nl}, | |
| 58 | {xml_element(Element,Tag,Attributes,Content,LineNr)}, | |
| 59 | !, | |
| 60 | [(int(NextId), | |
| 61 | rec([ % note: fields have to be in order | |
| 62 | field(attributes,BAttributes), % partial function of Attributes | |
| 63 | field(element,string(Tag)), | |
| 64 | field(meta,MetaAttributes), % such as xmlLineNumber | |
| 65 | field(pId,int(ParentId)), % id of the parent XML record | |
| 66 | field(recId,int(NextId)) % id of the XML element/record | |
| 67 | ]) | |
| 68 | )], | |
| 69 | {Id2 is NextId+1, | |
| 70 | number_codes(LineNr,LCC), atom_codes(LineNrAtom,LCC), | |
| 71 | convert_to_avl([(string(xmlLineNumber),string(LineNrAtom))],MetaAttributes), | |
| 72 | maplist(convert_xml_attributes,Attributes,BAttributesList), | |
| 73 | convert_to_avl(BAttributesList,BAttributes) | |
| 74 | }, | |
| 75 | convert_xml_content_to_b(Content,NextId,Id2,OutId). | |
| 76 | convert_xml_element(namespace( URL, _, Element), ParentId,NextId,OutId) --> % what should we do with the name space ? | |
| 77 | {format('*** Ignoring XML namespace annotation: ~w~n',[URL])}, | |
| 78 | convert_xml_element(Element, ParentId,NextId,OutId). | |
| 79 | convert_xml_element(pcdata(_Codes),_,Id,Id) --> !. % should only happen when Codes consists solely of newlines | |
| 80 | % no need to count_newlines; already done below in xml_element before C1 \= []. | |
| 81 | convert_xml_element(comment(Codes),_,Id,Id) --> !,{count_newlines(Codes)}. | |
| 82 | convert_xml_element(instructions(Name, Chars),_,Id,Id) --> !, | |
| 83 | {format('Ignoring XML instructions annotation: ~w : ~w~n',[Name,Chars])}. | |
| 84 | convert_xml_element(doctype(Tag, DoctypeId),_,Id,Id) --> !, | |
| 85 | {format('Ignoring XML doctype annotation: ~w : ~w~n',[Tag,DoctypeId])}. | |
| 86 | convert_xml_element(cdata(Codes),ParentId,Id,OutId) --> !, | |
| 87 | % TO DO: tab expansion? see http://binding-time.co.uk/wiki/index.php/Parsing_XML_with_Prolog | |
| 88 | convert_xml_element(pcdata(Codes),ParentId,Id,OutId). | |
| 89 | convert_xml_element(El,_,Id,Id) --> | |
| 90 | {format('*** Unknown XML element: ~w~n',[El])}. | |
| 91 | ||
| 92 | ||
| 93 | xml_element(element(Tag,XAttributes,Content),Tag,Attributes,Content,LineNr) :- | |
| 94 | get_counter(xml2b_current_line,CurLineNr), | |
| 95 | (select('='('$attribute_linefeeds',LF),XAttributes,Rest) | |
| 96 | -> LineNr is CurLineNr+LF, Attributes = Rest, %print(inc_line_nr(CurLineNr,LF)),nl, | |
| 97 | set_counter(xml2b_current_line,LineNr) | |
| 98 | ; LineNr = CurLineNr, Attributes=XAttributes). | |
| 99 | xml_element(pcdata(Codes),'CText',['='(text,NewCodes)],[],LineNr) :- | |
| 100 | peel_and_count_newlines(Codes,C1), | |
| 101 | C1 \= [], | |
| 102 | get_counter(xml2b_current_line,LineNr), | |
| 103 | reverse(C1,RC), | |
| 104 | peel_and_count_newlines(RC,RC2), % remove newlines at end; try avoid B strings with newlines in them | |
| 105 | reverse(RC2,NewCodes). | |
| 106 | ||
| 107 | convert_xml_attributes('='(Attr,Codes),(string(Attr),string(Atom))) :- !,atom_codes(Atom,Codes), | |
| 108 | count_newlines(Codes). | |
| 109 | convert_xml_attributes(A,(string(error),string(A))) :- format('**** UNKNOWN ATTRIBUTE: ~w~n',[A]). | |
| 110 | ||
| 111 | ||
| 112 | :- use_module(extension('counter/counter')). | |
| 113 | ||
| 114 | count_newlines([]). | |
| 115 | count_newlines(HT) :- newline(HT,T),!, inc_counter(xml2b_current_line), count_newlines(T). | |
| 116 | count_newlines([_|T]) :- count_newlines(T). | |
| 117 | ||
| 118 | % peel leading newlines | |
| 119 | peel_and_count_newlines([],[]). | |
| 120 | peel_and_count_newlines(HT,Res) :- newline(HT,T),!, inc_counter(xml2b_current_line), peel_and_count_newlines(T,Res). | |
| 121 | peel_and_count_newlines([H|T],Res) :- whitespace(H),!, peel_and_count_newlines(T,Res). | |
| 122 | peel_and_count_newlines(R,R). | |
| 123 | ||
| 124 | ||
| 125 | newline([10|T],T). | |
| 126 | newline([13|X],T) :- (X=[10|TX] -> T=TX ; T=X). % TO DO: should we check if we are on Windows ? | |
| 127 | ||
| 128 | whitespace(9). % tab | |
| 129 | whitespace(32). | |
| 130 | ||
| 131 | xml2b_startup :- % call once at startup to ensure all counters exist | |
| 132 | counter_init, | |
| 133 | new_counter(xml2b_current_line). | |
| 134 | ||
| 135 | :- use_module(eventhandling,[register_event_listener/3]). | |
| 136 | :- register_event_listener(startup_prob,xml2b_startup, | |
| 137 | 'Initialise xml2b Counters.'). |