1 | | :- module( json_parser, [ json_parse/2, json_parse/3, |
2 | | json_parse_file/2, json_parse_file/3 |
3 | | ] ). |
4 | | |
5 | | :- set_prolog_flag(double_quotes, codes). |
6 | | |
7 | | % based on https://github.com/yoroto/pl-json |
8 | | % have added line counting and error reporting, reading directly from file |
9 | | % fixed a few issues with whitespace and avoid backtracking |
10 | | % return strings using string(.) wrapper to avoid ambiguity with arrays of integers |
11 | | |
12 | | :- use_module(probsrc(error_manager),[add_error/3, add_error/2, add_warning/3]). |
13 | | :- use_module(library(lists)). |
14 | | |
15 | | json_parse_file(File, Json) :- |
16 | | json_parse_file(File, [strings_as_atoms(true),position_infos(false)], Json). |
17 | | |
18 | | % parse a JSON file |
19 | | json_parse_file(File, Options, Json) :- |
20 | | maplist(process_option,Options), |
21 | | catch(read_file(File,Codes), error(existence_error(_,_),_E), |
22 | | (add_error(json_parse_file,'JSON file does not exist:',File),fail)), |
23 | | %format('Read:~n~s~n',[Codes]), |
24 | | (json_parse(Codes,Json) -> true |
25 | | ; Codes = [] -> add_error(json_parse_file,'JSON file is empty:',File),fail |
26 | | ; add_error(json_parse_file,'Could not parse JSON file:',File),fail |
27 | | ). |
28 | | |
29 | | process_option(strings_as_atoms(X)) :- !,retractall(convert_strings_to_atoms(_)), assertz(convert_strings_to_atoms(X)). |
30 | | process_option(position_infos(X)) :- !,retractall(add_position_infos(_)), assertz(add_position_infos(X)). |
31 | | process_option(O) :- add_warning(json_parser,'Unrecognized option:',O). |
32 | | |
33 | | |
34 | | read_file(Filename,Codes) :- |
35 | | open(Filename,read,S,[encoding(utf8)]), |
36 | | read_codes(S,Codes), |
37 | | close(S). |
38 | | read_codes(S,Codes) :- |
39 | | get_code(S,Code), |
40 | | ( Code < 0 -> |
41 | | Codes = [] |
42 | | ; |
43 | | Codes = [Code|Rest], |
44 | | read_codes(S,Rest)). |
45 | | |
46 | | % -------------- |
47 | | :- dynamic convert_strings_to_atoms/1, add_position_infos/1. |
48 | | convert_strings_to_atoms(true). % if true compatible with SICStus library; otherwise string(Codes) used |
49 | | add_position_infos(false). % if true we add FromLineNr-ToLineNr as extra argument to pairs (=/3 instead of =/2) |
50 | | |
51 | | json_parse( Chars, Json ) :- |
52 | | json_parse( Chars, Json, [] ). |
53 | | |
54 | | json_parse( Chars, Json, _Options ) :- |
55 | | reset_line_nr, |
56 | | json(Json, Chars, _). |
57 | | |
58 | ? | json(Json) --> ws,!,json(Json). |
59 | | json(Json) --> |
60 | ? | json1(Json), |
61 | | !, |
62 | | spaces. |
63 | | json(Json) --> {var(Json)},check_string("[{"""), {fail}. % generate error message |
64 | | %json(Json) --> {ground(Json)}, json(JSon2), {format('### Mismatch:~n ~w~n~w~n',[Json,Json2]),fail}. |
65 | | |
66 | | % a version that does not print an error message |
67 | | try_json(Json) --> ws,!,try_json(Json). |
68 | | try_json(Json) --> |
69 | | json1(Json), |
70 | | !, |
71 | | spaces. |
72 | | |
73 | | json1(null) --> "null",!. |
74 | | json1(true) --> "true",!. |
75 | | json1(false) --> "false",!. |
76 | | |
77 | | json1(Number) --> number(Number),!. |
78 | | |
79 | | json1(String) --> |
80 | | json_start_string_quote(EndQuote),!, |
81 | | %{print_info(start_string)}, |
82 | ? | string2(Codes,EndQuote), |
83 | | {convert_strings_to_atoms(true) -> atom_codes(String,Codes) ; String=string(Codes)}. |
84 | | json1(Array) --> |
85 | | "[",!, |
86 | | %{print_info(start_array)}, |
87 | | array(Array), |
88 | | check_string("]"). |
89 | | json1(json(Pairs)) --> |
90 | | "{",!, |
91 | | %{print_info(start_object)}, |
92 | | spaces, |
93 | | pairs(Pairs), |
94 | | spaces, |
95 | | check_string("}"). |
96 | | |
97 | | number(Number) --> |
98 | | nm_token(NmCodes), |
99 | | { number_codes(Number, NmCodes) }. |
100 | | |
101 | | nm_token([H|T]) --> |
102 | | [H], |
103 | | { minus(H);digit_table(H) }, |
104 | | nm_token1(T). |
105 | | |
106 | | nm_token1([0'\x2E\|T]) --> |
107 | | ".",!, |
108 | | nm_frac(T). |
109 | | |
110 | | nm_token1([H|T]) --> |
111 | | [H], |
112 | | { digit_table(H) }, !, |
113 | | nm_token1(T). |
114 | | |
115 | | nm_token1([]) --> []. |
116 | | |
117 | | |
118 | | nm_frac([0'\x45\,H|T]) --> |
119 | | ("e";"E"),!, |
120 | | [H], {minus(H);plus(H)}, |
121 | | nm_exp(T). |
122 | | |
123 | | nm_frac([H|T]) --> |
124 | | [H], |
125 | | { digit_table(H) }, !, |
126 | | nm_frac(T). |
127 | | |
128 | | nm_frac([]) --> []. |
129 | | |
130 | | nm_exp([H|T]) --> |
131 | | [H], |
132 | | { digit_table(H) }, !, |
133 | | nm_exp1(T). |
134 | | |
135 | | nm_exp1([H|T]) --> |
136 | | [H], |
137 | | { digit_table(H) }, !, |
138 | | nm_exp1(T). |
139 | | |
140 | | nm_exp1([]) --> []. |
141 | | |
142 | | % detect valid start string quote |
143 | | json_start_string_quote(0'\x22\) --> """",!. % regular JSON string syntax |
144 | | %json_start_string_quote(39) --> "'". % possible ProB JSON extension, so that we do not have to quote B strings in formulas |
145 | | |
146 | | % regular JSON string between double quotes |
147 | | string(X) --> string2(X,0'\x22\). |
148 | | |
149 | | string2([],EndQuote) --> [EndQuote], |
150 | | !. |
151 | | string2([EscapedChar|T],EndQuote) --> |
152 | | [0'\x5C\],!, % \ |
153 | ? | escape_char(EscapedChar), |
154 | ? | string2(T,EndQuote). |
155 | | string2([10|T],EndQuote) --> [10], |
156 | | !, |
157 | | %{generate_json_error("string content",10),fail}, % comment in to disallow newlines |
158 | | {inc_line_nr}, |
159 | ? | string2(T,EndQuote). |
160 | | % TODO: check for other illegal characters like tab? ... |
161 | | string2([H|T],EndQuote) --> |
162 | | [H], |
163 | ? | string2(T,EndQuote). |
164 | | |
165 | | escape_char( 0'\x22\ ) --> [0'\x22\]. %" 34 decimal |
166 | | escape_char( 0'\x5C\ ) --> [0'\x5C\]. %\ |
167 | | escape_char( 0'\x2F\ ) --> [0'\x2F\]. %/ |
168 | | escape_char( 0'\x08\ ) --> [0'\x62\]. %b |
169 | | escape_char( 0'\x0C\ ) --> [0'\x66\]. %f |
170 | | escape_char( 0'\x0A\ ) --> [0'\x6E\]. %n |
171 | | escape_char( 0'\x0D\ ) --> [0'\x72\]. %r |
172 | | escape_char( 0'\x09\ ) --> [0'\x74\]. %t |
173 | | |
174 | | escape_char( Code ) --> |
175 | | "u", |
176 | | hex_digit_char( H1 ), |
177 | | hex_digit_char( H2 ), |
178 | | hex_digit_char( H3 ), |
179 | | hex_digit_char( H4 ), |
180 | | { Code is (((H1 << 4 + H2) << 4 + H3) << 4 + H4) }. |
181 | | |
182 | | hex_digit_char( 0 ) --> "0". |
183 | | hex_digit_char( 1 ) --> "1". |
184 | | hex_digit_char( 2 ) --> "2". |
185 | | hex_digit_char( 3 ) --> "3". |
186 | | hex_digit_char( 4 ) --> "4". |
187 | | hex_digit_char( 5 ) --> "5". |
188 | | hex_digit_char( 6 ) --> "6". |
189 | | hex_digit_char( 7 ) --> "7". |
190 | | hex_digit_char( 8 ) --> "8". |
191 | | hex_digit_char( 9 ) --> "9". |
192 | | hex_digit_char( 10 ) --> "A". |
193 | | hex_digit_char( 11 ) --> "B". |
194 | | hex_digit_char( 12 ) --> "C". |
195 | | hex_digit_char( 13 ) --> "D". |
196 | | hex_digit_char( 14 ) --> "E". |
197 | | hex_digit_char( 15 ) --> "F". |
198 | | hex_digit_char( 10 ) --> "a". |
199 | | hex_digit_char( 11 ) --> "b". |
200 | | hex_digit_char( 12 ) --> "c". |
201 | | hex_digit_char( 13 ) --> "d". |
202 | | hex_digit_char( 14 ) --> "e". |
203 | | hex_digit_char( 15 ) --> "f". |
204 | | |
205 | ? | array(Array) --> ws,!,array(Array). |
206 | | array([H|T]) --> |
207 | | try_json(H), !,{print_info(first_array)}, |
208 | | array1(T). |
209 | | array([]) --> [], {print_info(end_array)}. |
210 | | |
211 | | array1([H|T]) --> |
212 | | ",", !, |
213 | | json(H),!, {print_info(next_array)}, |
214 | | array1(T). |
215 | | array1([]) --> [], {print_info(empty_array)}. |
216 | | |
217 | | pair(ResPair,Optional) --> |
218 | | spaces, |
219 | | ({Optional=optional} -> opt_pair_name(Codes) ; pair_name(Codes)), |
220 | | {add_position_infos(false) -> ResPair = '='(Name, Value) |
221 | | ; cur_line(FromLineNr), ResPair = '='(Name, Value,FromLineNr-ToLineNr)}, |
222 | | % TODO: usually we want the position info of the value, ideally with start column info |
223 | | check_string(":"), |
224 | | { atom_codes(Name, Codes) }, |
225 | | {print_info(pair_value_for(Name))}, |
226 | | json(Value), |
227 | | {add_position_infos(false) -> true ; cur_line(ToLineNr)}, |
228 | | {print_info(found_value_vor(Name,Value,FromLineNr-ToLineNr))}. |
229 | | |
230 | | opt_pair_name(Name) --> """", string(Name), spaces. |
231 | | pair_name(Name) --> check_string(""""), string(Name), spaces. |
232 | | |
233 | | |
234 | | pairs(List) --> ws,!,pairs(List). |
235 | | pairs([H|T]) --> |
236 | | pair(H,optional), !, |
237 | | pairs1(T). |
238 | | pairs([]) --> [], {print_info(empty_pairs)}. |
239 | | |
240 | | pairs1(List) --> ws,!,pairs1(List). |
241 | | pairs1([H|T]) --> |
242 | | ",", !, |
243 | | pair(H,required),!, |
244 | | pairs1(T). |
245 | | pairs1([]) --> [], {print_info(end_pairs)}. |
246 | | |
247 | | :- use_module(library(lists),[append/2]). |
248 | | % check if the next char matches a given character or one of the given chars |
249 | | check_string(List) --> ws,!,check_string(List). |
250 | | check_string(List) --> [Char], {member(Char,List)},!. |
251 | | check_string(ExpectedString, [Char|_],_) :- |
252 | | generate_json_error(ExpectedString,Char), |
253 | | fail. |
254 | | |
255 | | generate_json_error(ExpectedString,Char) :- |
256 | | cur_line(LineNr), |
257 | | number_codes(LineNr,LC), |
258 | | % TO DO: if String has more than one char: write one of: |
259 | | convert_char(Char,CStr), |
260 | | append(["JSON error on line ",LC,": expecting ",ExpectedString," obtained ",CStr],Codes), |
261 | | atom_codes(Msg,Codes), |
262 | | %format(user_error,'! Error on line ~w: expecting ~s obtained ~s~n',[LineNr,String,[Char]]), |
263 | | add_error(json_parser,Msg). |
264 | | |
265 | | convert_char(8,Res) :- !, Res="tabulation character". |
266 | | convert_char(10,Res) :- !, Res="newline". |
267 | | convert_char(13,Res) :- !, Res="return character". |
268 | | convert_char(Char,[Char]). |
269 | | |
270 | | |
271 | | minus( 0'- ). |
272 | | plus( 0'+ ). |
273 | | digit_table( 0'0 ). |
274 | | digit_table( 0'1 ). |
275 | | digit_table( 0'2 ). |
276 | | digit_table( 0'3 ). |
277 | | digit_table( 0'4 ). |
278 | | digit_table( 0'5 ). |
279 | | digit_table( 0'6 ). |
280 | | digit_table( 0'7 ). |
281 | | digit_table( 0'8 ). |
282 | | digit_table( 0'9 ). |
283 | | |
284 | | |
285 | ? | spaces --> ws,!,spaces. |
286 | | spaces --> []. |
287 | | |
288 | | % whitespace |
289 | | ws --> new_line,!. |
290 | | ws --> " "; "\t" ; [10] ; [13]. |
291 | | |
292 | | new_line --> "\n",{inc_line_nr}. |
293 | | |
294 | | % use a fact to keep track of line numbers |
295 | | :- dynamic cur_line/1. |
296 | | cur_line(1). |
297 | | inc_line_nr :- retract(cur_line(N)), N1 is N+1, assertz(cur_line(N1)). |
298 | | reset_line_nr :- retract(cur_line(_)), assertz(cur_line(1)). |
299 | | |
300 | | print_info(_) :- !. % comment out to view debug info |
301 | | print_info(Error) :- print_error(Error). |
302 | | print_error(Error) :- |
303 | | cur_line(LineNr), |
304 | | nl(user_error), |
305 | | write(user_error,'! Line: '),write_term(user_error,LineNr,[]),nl(user_error), |
306 | | (var(Error) -> print_message(error,'_') |
307 | | ; write(user_error,'! '),write_term(user_error,Error,[max_depth(20),numbervars(true)]),nl(user_error)). |
308 | | %% flush_output(user_error), %%. |