1 :- module( json_parser, [
2 json_parse/2, json_parse/3,
3 json_parse_file/2, json_parse_file/3,
4 json_write/2, json_write/3,
5 json_write_stream/1, json_write_stream/2, json_write_stream/3,
6 json_write_file/2, json_write_file/3,
7 json_escape_string_atom/2, json_escape_string_codes/2, json_escape_string_codes/3
8 ]).
9
10 :- set_prolog_flag(double_quotes, codes).
11
12 :- use_module(library(lists)).
13 :- use_module('../tools', []). % to setup paths
14 :- use_module(probsrc(error_manager),[add_error/3, add_error/2, add_warning/3]).
15 :- use_module(probsrc(self_check)).
16
17 % based on https://github.com/yoroto/pl-json
18 % have added line counting and error reporting, reading directly from file
19 % fixed a few issues with whitespace and avoid backtracking
20 % return strings using string(.) wrapper to avoid ambiguity with arrays of integers
21 % fix issue with unparsed suffix being ignored (toggle-able via option rest(_))
22 % add options to change the representation of all json values
23 % add compact and pretty printing of a given json term
24
25 % JSON prolog term format:
26 % literals (null/true/false) are tagged with '@'
27 % numbers are tagged with 'number'
28 % strings are tagged with 'string'
29 % arrays are tagged with 'array'
30 % objects are tagged with 'json' and contain terms with the functor '='
31
32
33 % parse a JSON file
34 json_parse_file(File, Json) :-
35 json_parse_file(File, Json, []).
36
37 % parse a JSON file with custom options
38 json_parse_file(File, Json, Options) :-
39 catch(read_file(File,Codes), error(existence_error(_,_),_E),
40 (add_error(json_parse_file,'JSON file does not exist:',File),fail)),
41 (json_parse(Codes,Json,Options) -> true
42 ; add_error(json_parse_file,'Could not parse JSON file:',File),fail
43 ).
44
45 read_file(Filename,Codes) :-
46 open(Filename,read,S,[encoding(utf8)]),
47 read_codes(S,Codes),
48 close(S).
49 read_codes(S,Codes) :-
50 get_code(S,Code),
51 ( Code < 0 ->
52 Codes = []
53 ;
54 Codes = [Code|Rest],
55 read_codes(S,Rest)).
56
57 % --------------
58
59 % write a JSON file
60 json_write_file(File, Json) :-
61 json_write_file(File, Json, []).
62
63 % write a JSON file with custom options
64 json_write_file(File, Json, Options) :-
65 open(File,write,Stream,[encoding(utf8)]),
66 json_write_stream(Stream,Json,Options),
67 close(Stream).
68
69 % --------------
70
71 :- dynamic
72 position_infos/1,
73 strings_as_atoms/1, multiline_strings/1,
74 pretty/1, ascii/1.
75 reset_options :-
76 % by default we expect no rest (unparsed suffix), but callers can use rest(Var) to capture the rest to do their own handling
77 % (rest/1 handled separately)
78
79 % if true we add FromLineNr-ToLineNr as extra argument to attribute key-value terms in objects (=/3 instead of =/2, default false)
80 retractall(position_infos(_)), assertz(position_infos(false)),
81
82 % if true compatible with SICStus library (converted to atom); otherwise use codes directly (might cause problems with number arrays without tags, default true)
83 retractall(strings_as_atoms(_)), assertz(strings_as_atoms(true)),
84
85 % if true allow newlines in strings when parsing (default false)
86 retractall(multiline_strings(_)), assertz(multiline_strings(false)),
87
88 % write json strings using pretty print (include new lines and indent, default false)
89 retractall(pretty(_)), assertz(pretty(false)),
90
91 % write json strings as ascii only (default false)
92 retractall(ascii(_)), assertz(ascii(false)).
93
94 process_option(rest(_)) :- !. % handled separately
95 process_option(position_infos(X)) :- atom(X), !, retractall(position_infos(_)), assertz(position_infos(X)).
96 process_option(strings_as_atoms(X)) :- atom(X), !, retractall(strings_as_atoms(_)), assertz(strings_as_atoms(X)).
97 process_option(multiline_strings(X)) :- atom(X), !, retractall(multiline_strings(_)), assertz(multiline_strings(X)).
98 process_option(pretty(X)) :- atom(X), !, retractall(pretty(_)), assertz(pretty(X)).
99 process_option(ascii(X)) :- atom(X), !, retractall(ascii(_)), assertz(ascii(X)).
100 process_option(O) :- add_warning(json_parser, 'Unrecognized option:', O).
101
102 % --------------
103
104 json_write_stream(Json) :-
105 json_write_stream(current_output, Json).
106
107 json_write_stream(Stream, Json) :-
108 json_write_stream(Stream, Json, []).
109
110 json_write_stream(Stream, Json, Options) :-
111 json_write(Json, Chars, Options),
112 format(Stream, '~s', [Chars]).
113
114 json_write(Json, Chars) :-
115 json_write(Json, Chars, []).
116
117 json_write(Json, Chars, Options) :-
118 reset_options,
119 reset_line_nr,
120 maplist(process_option, Options),
121 json_write0(Json, 0, Chars, []).
122
123 % --------------
124
125 :- assert_must_succeed(json_parser:json_escape_string_atom('{"a"}','{\\"a\\"}')).
126 :- assert_must_succeed(json_parser:json_escape_string_atom('{"a \\/ b"}','{\\"a \\\\/ b\\"}')). % escape \ for JSON
127
128 json_escape_string_atom(Atom, EscapedAtom) :-
129 \+ atom(Atom),
130 !,
131 add_error('Cannot escape: ', json_escape_string_atom(Atom, EscapedAtom)),
132 Atom = EscapedAtom.
133
134 json_escape_string_atom(Atom, EscapedAtom) :-
135 atom_codes(Atom, Codes),
136 json_escape_string_codes(Codes, EscapedCodes),
137 atom_codes(EscapedAtom, EscapedCodes).
138
139 json_escape_string_codes(Codes, EscapedCodes) :-
140 json_escape_string_codes(Codes, EscapedCodes, []).
141
142 json_escape_string_codes(Codes, EscapedCodes, Options) :-
143 reset_options,
144 reset_line_nr,
145 maplist(process_option, Options),
146 json_escape_string_codes0(Codes, EscapedCodes).
147
148 % --------------
149
150 indent(" ").
151 json_write_indent(_) --> {pretty(false)}, !, "".
152 json_write_indent(0) --> !, "".
153 json_write_indent(N) --> !, {indent(I)}, I, {N1 is N-1}, json_write_indent(N1).
154
155 json_write_newline --> {pretty(false)}, !, "".
156 json_write_newline --> !, "\n".
157
158 json_write_pretty_whitespace --> {pretty(false)}, !, "".
159 json_write_pretty_whitespace --> !, " ".
160
161 json_write0(Json, Indent) -->
162 json_write_indent(Indent),
163 json_write1(Json, Indent),
164 json_write_newline.
165
166 json_write1(@(Literal), _Indent) -->
167 {valid_literal(Literal), atom_codes(Literal, LiteralCodes)},
168 !,
169 LiteralCodes.
170 json_write1(number(Number), _Indent) -->
171 {lenient_number_codes(Number, NumberCodes)},
172 !,
173 NumberCodes.
174 json_write1(string(String), _Indent) -->
175 {lenient_atom_codes(String, StringCodes)},
176 !,
177 json_write_string(StringCodes).
178 json_write1(json(Object), Indent) -->
179 !,
180 "{",
181 {Indent1 is Indent+1},
182 json_write_object(Object, Indent1),
183 "}".
184 json_write1(array(Array), Indent) -->
185 !,
186 "[",
187 {Indent1 is Indent+1},
188 json_write_array(Array, Indent1),
189 "]".
190 json_write1(Json, _Indent) -->
191 {add_error(json_write, 'Unknown json term: ', Json), fail}.
192
193 json_write_string(StringCodes) -->
194 {json_escape_string_codes0(StringCodes, EscCodes)},
195 """",
196 EscCodes,
197 """".
198
199 valid_literal(null).
200 valid_literal(true).
201 valid_literal(false).
202
203 safe_number_codes(N, C) :-
204 catch(number_codes(N, C), _, fail),
205 number(C).
206
207 lenient_number_codes(Value, Codes) :- number(Value), !, number_codes(Value, Codes).
208 lenient_number_codes(Value, Codes) :-
209 atom(Value),
210 atom_codes(Value, VCodes),
211 safe_number_codes(_, VCodes),
212 !,
213 Codes = VCodes.
214 lenient_number_codes(Value, Codes) :-
215 Value = [C|_],
216 number(C),
217 safe_number_codes(_, Value),
218 !,
219 Codes = Value.
220
221 lenient_atom_codes(Value, Codes) :- atom(Value), !, atom_codes(Value, Codes).
222 lenient_atom_codes(Value, Codes) :- number(Value), !, number_codes(Value, Codes).
223 lenient_atom_codes(Value, Codes) :- Value=[], !, Codes=[].
224 lenient_atom_codes(Value, Codes) :- Value=[C|_], number(C), !, Codes = Value.
225
226 json_escape_string_codes0([], []) :- !.
227 json_escape_string_codes0([34|T], [92,34|ET]) :- !, json_escape_string_codes0(T, ET). % double quote
228 json_escape_string_codes0([92|T], [92,92|ET]) :- !, json_escape_string_codes0(T, ET). % backslash
229 % it is unnecessary to escape the forward slash here
230 json_escape_string_codes0([8|T], [92,98|ET]) :- !, json_escape_string_codes0(T, ET). % b(ackspace)
231 json_escape_string_codes0([12|T], [92,102|ET]) :- !, json_escape_string_codes0(T, ET). % f(orm feed)
232 json_escape_string_codes0([10|T], [92,110|ET]) :- !, json_escape_string_codes0(T, ET). % n(ew line)
233 json_escape_string_codes0([13|T], [92,114|ET]) :- !, json_escape_string_codes0(T, ET). % (carriage) r(eturn)
234 json_escape_string_codes0([9|T], [92,116|ET]) :- !, json_escape_string_codes0(T, ET). % (horizontal) t(ab)
235 json_escape_string_codes0([N|T], [92,117,H4,H3,H2,H1|ET]) :-
236 (N >= 0, N =< 31 ; ascii(true), N >= 0x7f, N =< 0x10000), % non-ascii/non-printable (u)nicode codepoints
237 !,
238 to_4_hex_codes(N, H4, H3, H2, H1),
239 json_escape_string_codes0(T, ET).
240 json_escape_string_codes0([N|T], [92,117,H4,H3,H2,H1,92,117,H8,H7,H6,H5|ET]) :- % escape as surrogate pair
241 ascii(true), N >= 0x10000, N =< 0x10FFFF, % maximum unicode codepoint
242 !,
243 LowSurrogate is ((N-65536) mod 0x400) + 0xDC00,
244 HighSurrogate is ((N-65536) div 0x400) + 0xD800,
245 to_4_hex_codes(HighSurrogate, H4, H3, H2, H1),
246 to_4_hex_codes(LowSurrogate, H8, H7, H6, H5),
247 json_escape_string_codes0(T, ET).
248 json_escape_string_codes0([N|T], [N|ET]) :- !, json_escape_string_codes0(T, ET). % the rest
249
250 to_4_hex_codes(N1, H4, H3, H2, H1) :-
251 D1 is (N1 mod 16), N2 is N1 div 16,
252 D2 is (N2 mod 16), N3 is N2 div 16,
253 D3 is (N3 mod 16), N4 is N3 div 16,
254 D4 is (N4 mod 16),
255 to_hex_code(D1, H1),
256 to_hex_code(D2, H2),
257 to_hex_code(D3, H3),
258 to_hex_code(D4, H4).
259
260 to_hex_code(N, C) :- N >= 0, N < 10, !, C is 48+N. % 48 is '0'
261 to_hex_code(N, C) :- N >= 10, N < 16, !, C is 97+(N-10). % 97 is 'a'
262 from_hex_code(C, N) :- C >= 48, C =< 57, !, N is C-48.
263 from_hex_code(C, N) :- C >= 97, C =< 102, !, N is (C-97)+10.
264 from_hex_code(C, N) :- C >= 65, C =< 70, !, N is (C-65)+10.
265
266 json_write_array([], _Indent) --> !, [].
267 json_write_array([H|T], Indent) -->
268 !,
269 json_write_newline,
270 json_write_indent(Indent),
271 json_write1(H, Indent),
272 json_write_array1(T, Indent).
273 json_write_array1([], Indent) -->
274 !,
275 json_write_newline,
276 {Indent1 is Indent-1},
277 json_write_indent(Indent1).
278 json_write_array1([H|T], Indent) -->
279 !,
280 ",",
281 json_write_newline,
282 json_write_indent(Indent),
283 json_write1(H, Indent),
284 json_write_array1(T, Indent).
285
286 json_write_pair(Pair, Indent) -->
287 json_write_newline,
288 json_write_indent(Indent),
289 {Pair =.. [_, K, V | _], atom_codes(K, KCodes)}, % leniency: accept every term with at least two arguments
290 json_write_string(KCodes),
291 ":",
292 json_write_pretty_whitespace,
293 json_write1(V, Indent).
294
295 json_write_object([], _Indent) --> !, [].
296 json_write_object([H|T], Indent) -->
297 !,
298 json_write_pair(H, Indent),
299 json_write_object1(T, Indent).
300 json_write_object1([], Indent) -->
301 !,
302 json_write_newline,
303 {Indent1 is Indent-1},
304 json_write_indent(Indent1).
305 json_write_object1([H|T], Indent) -->
306 !,
307 ",",
308 json_write_pair(H, Indent),
309 json_write_object1(T, Indent).
310
311 % --------------
312
313 json_parse(Chars, Json) :-
314 json_parse(Chars, Json, []).
315
316 json_parse(Chars, Json, Options) :-
317 reset_options,
318 reset_line_nr,
319 maplist(process_option, Options),
320 (member(rest(ExpectedRest), Options) -> true ; ExpectedRest = ""),
321 json(Json, Chars, Rest), !,
322 (ExpectedRest = Rest
323 -> true
324 ; (ExpectedRest = "" -> ExpectedString = "<EOF>" ; ExpectedString = ExpectedRest),
325 generate_json_error(ExpectedString, Rest),
326 fail
327 ).
328
329 json(Json) --> try_json(Json), !.
330 json(_) --> !, fail_with_error("json value").
331
332 % a version that does not print an error message
333 try_json(Json) --> ws, !, try_json(Json).
334 try_json(Json) --> json1(Json), !, spaces.
335
336 json1(@(Literal)) -->
337 {valid_literal(Literal), atom_codes(Literal, LiteralCodes)},
338 LiteralCodes,
339 !.
340 json1(number(Number)) --> parse_number(Number), !.
341 json1(string(String)) -->
342 json_start_string_quote(Quote), !,
343 string2(Codes, Quote),
344 {strings_as_atoms(true) -> atom_codes(String, Codes) ; String = Codes}.
345 json1(array(Array)) -->
346 "[", !,
347 array(Array).
348 json1(json(Object)) -->
349 "{", !,
350 pairs(Object).
351
352 parse_number(Number) -->
353 nm_token(NmCodes), !,
354 {number_codes(Number, NmCodes)}.
355
356 nm_token([H|T]) -->
357 [H], {minus(H)}, !,
358 nm_int(T).
359 nm_token(T) --> !, nm_int(T).
360
361 nm_int([H|T]) -->
362 [H], {zero(H)}, !,
363 nm_int0(T).
364 nm_int([H|T]) -->
365 [H], {digit_table_1to9(H)}, !,
366 nm_int1(T).
367
368 nm_int0([H|T]) -->
369 [H], {dot(H)}, !,
370 nm_frac(T).
371 nm_int0([]) --> !, "".
372
373 nm_int1([H|T]) -->
374 [H], {digit_table(H)}, !,
375 nm_int1(T).
376 nm_int1([H|T]) -->
377 [H], {dot(H)}, !,
378 nm_frac(T).
379 nm_int1([]) --> !, "".
380
381 nm_frac([H|T]) -->
382 [H], {digit_table(H)}, !,
383 nm_frac1(T).
384
385 nm_frac1([H|T]) -->
386 [H], {exp(H)}, !,
387 nm_exp(T).
388 nm_frac1([H|T]) -->
389 [H], {digit_table(H)}, !,
390 nm_frac1(T).
391 nm_frac1([]) --> !, "".
392
393 nm_exp([H|T]) -->
394 [H], {sign(H)}, !,
395 nm_exp0(T).
396 nm_exp(T) --> !, nm_exp0(T).
397
398 nm_exp0([H|T]) -->
399 [H], {digit_table(H)}, !,
400 nm_exp1(T).
401
402 nm_exp1([H|T]) -->
403 [H], {digit_table(H)}, !,
404 nm_exp1(T).
405 nm_exp1([]) --> !, "".
406
407 % detect valid start string quote
408 json_start_string_quote(0'\x22\) --> """", !. % regular JSON string syntax
409 %json_start_string_quote(39) --> "'", !. % possible ProB JSON extension, so that we do not have to quote B strings in formulas
410
411 % regular JSON string between double quotes
412 %string(X) --> string2(X,0'\x22\).
413
414 string2([],EndQuote) -->
415 [EndQuote],
416 !.
417 string2([EscapedChar|T],EndQuote) -->
418 [0'\x5C\],!, % \
419 escape_char(EscapedChar),
420 string2(T,EndQuote).
421 string2([C|T],EndQuote) -->
422 [C],
423 {C=10;C=13},
424 !,
425 ({multiline_strings(true)} -> {true} ; fail_with_error("string content")),
426 {C=10 -> inc_line_nr ; true},
427 string2(T,EndQuote).
428 string2([H|T],EndQuote) -->
429 [H],
430 string2(T,EndQuote).
431
432 escape_char( 0'\x22\ ) --> [0'\x22\], !. %" 34 decimal
433 escape_char( 0'\x5C\ ) --> [0'\x5C\], !. %\
434 escape_char( 0'\x2F\ ) --> [0'\x2F\], !. %/
435 escape_char( 0'\x08\ ) --> [0'\x62\], !. %b
436 escape_char( 0'\x0C\ ) --> [0'\x66\], !. %f
437 escape_char( 0'\x0A\ ) --> [0'\x6E\], !. %n
438 escape_char( 0'\x0D\ ) --> [0'\x72\], !. %r
439 escape_char( 0'\x09\ ) --> [0'\x74\], !. %t
440
441 % combine surrogates (same behaviour as SICStus library)
442 escape_char( Code ) -->
443 "u",
444 hex_digit_char( H1 ),
445 hex_digit_char( H2 ),
446 hex_digit_char( H3 ),
447 hex_digit_char( H4 ),
448 {HighSurrogate is (((H1 << 4 + H2) << 4 + H3) << 4 + H4)},
449 {HighSurrogate >= 0xD800, HighSurrogate =< 0xDBFF},
450 "\\u",
451 hex_digit_char( H5 ),
452 hex_digit_char( H6 ),
453 hex_digit_char( H7 ),
454 hex_digit_char( H8 ),
455 {LowSurrogate is (((H5 << 4 + H6) << 4 + H7) << 4 + H8)},
456 {LowSurrogate >= 0xDC00, LowSurrogate =< 0xDFFF},
457 !,
458 {Code is 0x10000 + (0x400 * (HighSurrogate - 0xD800)) + (LowSurrogate - 0xDC00)}.
459
460 escape_char( Code ) -->
461 "u",
462 !,
463 hex_digit_char( H1 ),
464 hex_digit_char( H2 ),
465 hex_digit_char( H3 ),
466 hex_digit_char( H4 ),
467 { Code is (((H1 << 4 + H2) << 4 + H3) << 4 + H4) }.
468
469 hex_digit_char( N ) --> [C], {from_hex_code(C, N)}, !.
470
471 array(Array) --> ws, !, array(Array).
472 array([]) --> "]", !.
473 array([H|T]) -->
474 json1(H), !,
475 spaces,
476 array1(T).
477 array(_) --> !, fail_with_error("json value or \"]\" in array").
478
479 array1([]) --> "]", !.
480 array1([H|T]) -->
481 check_string(","),
482 spaces,
483 json1(H), !,
484 spaces,
485 array1(T).
486 array1(_) --> !, fail_with_error("\",\" json value or \"]\" in array").
487
488 pair(ResPair) -->
489 spaces,
490 {position_infos(true)
491 -> position_start(Pos), ResPair = '='(Name, Value, Pos)
492 ; ResPair = '='(Name, Value)},
493 (json_start_string_quote(Quote) -> {true} ; fail_with_error("string quote")),
494 string2(Codes, Quote),
495 {atom_codes(Name, Codes)},
496 spaces, check_string(":"), spaces,
497 json1(Value),
498 {position_infos(true) -> position_end(Pos) ; true},
499 spaces.
500
501 pairs(List) --> ws, !, pairs(List).
502 pairs([]) --> "}", !.
503 pairs([H|T]) -->
504 pair(H), !,
505 pairs1(T).
506 pairs(_) --> !, fail_with_error("string \":\" json value or \"}\" in object").
507
508 pairs1([]) --> "}", !.
509 pairs1([H|T]) -->
510 check_string(","),
511 pair(H), !,
512 pairs1(T).
513 pairs1(_) --> !, fail_with_error("\",\" string \":\" json value or \"}\" in object").
514
515 % check if the next char matches a given character or one of the given chars
516 check_string(List) --> [Char], {member(Char, List)}, !.
517 check_string(List) --> !, fail_with_error(List).
518
519 fail_with_error(ExpectedString, Codes, _) :- !, generate_json_error(ExpectedString, Codes), fail.
520
521 generate_json_error(ExpectedString, Char) :-
522 cur_line(LineNr),
523 number_codes(LineNr,LC),
524 convert_chars(Char,CStr),
525 append(["JSON error in line ",LC,": expected ",ExpectedString,", but got ",CStr],Codes),
526 atom_codes(Msg,Codes),
527 add_error(json_parser,Msg).
528
529 convert_chars([], Res) :- !, Res = "<EOF>".
530 convert_chars([Char|_], Res) :- !, convert_char(Char, Res).
531 convert_chars(Char, Res) :- !, convert_char(Char, Res).
532
533 convert_char(9, Res) :- !, Res = "<tab>".
534 convert_char(10, Res) :- !, Res = "<newline>".
535 convert_char(13, Res) :- !, Res = "<carriage return>".
536 convert_char(Char, Res) :- number(Char), !, Res = [Char].
537 convert_char(_, Res) :- !, Res = "<unknown>".
538
539 minus( 0'- ).
540 plus( 0'+ ).
541 sign( C ) :- minus( C ).
542 sign( C ) :- plus( C ).
543 zero( 0'0 ).
544 digit_table( C ) :- zero( C ).
545 digit_table( C ) :- digit_table_1to9( C ).
546 digit_table_1to9( 0'1 ).
547 digit_table_1to9( 0'2 ).
548 digit_table_1to9( 0'3 ).
549 digit_table_1to9( 0'4 ).
550 digit_table_1to9( 0'5 ).
551 digit_table_1to9( 0'6 ).
552 digit_table_1to9( 0'7 ).
553 digit_table_1to9( 0'8 ).
554 digit_table_1to9( 0'9 ).
555 dot( 0'. ).
556 exp( 0'e ).
557 exp( 0'E ).
558
559
560 spaces --> ws,!,spaces.
561 spaces --> "".
562
563 % whitespace
564 ws --> new_line, !.
565 ws --> " "; "\t" ; "\r".
566
567 new_line --> "\n",{inc_line_nr}.
568
569 % use a fact to keep track of line numbers
570 :- dynamic cur_line/1.
571 reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)).
572 inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)).
573
574 position_start(Start-_) :- !, cur_line(Start).
575 position_end(_-End) :- !, cur_line(End).