| 1 | :- module( json_parser, [ | |
| 2 | json_parse/2, json_parse/3, | |
| 3 | json_parse_file/2, json_parse_file/3, | |
| 4 | json_write/2, json_write/3, | |
| 5 | json_write_stream/1, json_write_stream/2, json_write_stream/3, | |
| 6 | json_write_file/2, json_write_file/3, | |
| 7 | json_escape_string_atom/2, json_escape_string_codes/2, json_escape_string_codes/3 | |
| 8 | ]). | |
| 9 | ||
| 10 | :- set_prolog_flag(double_quotes, codes). | |
| 11 | ||
| 12 | :- use_module(library(lists)). | |
| 13 | :- use_module('../tools', []). % to setup paths | |
| 14 | :- use_module(probsrc(error_manager),[add_error/3, add_error/2, add_warning/3]). | |
| 15 | :- use_module(probsrc(self_check)). | |
| 16 | ||
| 17 | % based on https://github.com/yoroto/pl-json | |
| 18 | % have added line counting and error reporting, reading directly from file | |
| 19 | % fixed a few issues with whitespace and avoid backtracking | |
| 20 | % return strings using string(.) wrapper to avoid ambiguity with arrays of integers | |
| 21 | % fix issue with unparsed suffix being ignored (toggle-able via option rest(_)) | |
| 22 | % add options to change the representation of all json values | |
| 23 | % add compact and pretty printing of a given json term | |
| 24 | ||
| 25 | % JSON prolog term format: | |
| 26 | % literals (null/true/false) are tagged with '@' | |
| 27 | % numbers are tagged with 'number' | |
| 28 | % strings are tagged with 'string' | |
| 29 | % arrays are tagged with 'array' | |
| 30 | % objects are tagged with 'json' and contain terms with the functor '=' | |
| 31 | ||
| 32 | ||
| 33 | % parse a JSON file | |
| 34 | json_parse_file(File, Json) :- | |
| 35 | json_parse_file(File, Json, []). | |
| 36 | ||
| 37 | % parse a JSON file with custom options | |
| 38 | json_parse_file(File, Json, Options) :- | |
| 39 | catch(read_file(File,Codes), error(existence_error(_,_),_E), | |
| 40 | (add_error(json_parse_file,'JSON file does not exist:',File),fail)), | |
| 41 | (json_parse(Codes,Json,Options) -> true | |
| 42 | ; add_error(json_parse_file,'Could not parse JSON file:',File),fail | |
| 43 | ). | |
| 44 | ||
| 45 | read_file(Filename,Codes) :- | |
| 46 | open(Filename,read,S,[encoding(utf8)]), | |
| 47 | read_codes(S,Codes), | |
| 48 | close(S). | |
| 49 | read_codes(S,Codes) :- | |
| 50 | get_code(S,Code), | |
| 51 | ( Code < 0 -> | |
| 52 | Codes = [] | |
| 53 | ; | |
| 54 | Codes = [Code|Rest], | |
| 55 | read_codes(S,Rest)). | |
| 56 | ||
| 57 | % -------------- | |
| 58 | ||
| 59 | % write a JSON file | |
| 60 | json_write_file(File, Json) :- | |
| 61 | json_write_file(File, Json, []). | |
| 62 | ||
| 63 | % write a JSON file with custom options | |
| 64 | json_write_file(File, Json, Options) :- | |
| 65 | open(File,write,Stream,[encoding(utf8)]), | |
| 66 | json_write_stream(Stream,Json,Options), | |
| 67 | close(Stream). | |
| 68 | ||
| 69 | % -------------- | |
| 70 | ||
| 71 | :- dynamic | |
| 72 | position_infos/1, | |
| 73 | strings_as_atoms/1, multiline_strings/1, | |
| 74 | pretty/1, ascii/1. | |
| 75 | reset_options :- | |
| 76 | % by default we expect no rest (unparsed suffix), but callers can use rest(Var) to capture the rest to do their own handling | |
| 77 | % (rest/1 handled separately) | |
| 78 | ||
| 79 | % if true we add FromLineNr-ToLineNr as extra argument to attribute key-value terms in objects (=/3 instead of =/2, default false) | |
| 80 | retractall(position_infos(_)), assertz(position_infos(false)), | |
| 81 | ||
| 82 | % if true compatible with SICStus library (converted to atom); otherwise use codes directly (might cause problems with number arrays without tags, default true) | |
| 83 | retractall(strings_as_atoms(_)), assertz(strings_as_atoms(true)), | |
| 84 | ||
| 85 | % if true allow newlines in strings when parsing (default false) | |
| 86 | retractall(multiline_strings(_)), assertz(multiline_strings(false)), | |
| 87 | ||
| 88 | % write json strings using pretty print (include new lines and indent, default false) | |
| 89 | retractall(pretty(_)), assertz(pretty(false)), | |
| 90 | ||
| 91 | % write json strings as ascii only (default false) | |
| 92 | retractall(ascii(_)), assertz(ascii(false)). | |
| 93 | ||
| 94 | process_option(rest(_)) :- !. % handled separately | |
| 95 | process_option(position_infos(X)) :- atom(X), !, retractall(position_infos(_)), assertz(position_infos(X)). | |
| 96 | process_option(strings_as_atoms(X)) :- atom(X), !, retractall(strings_as_atoms(_)), assertz(strings_as_atoms(X)). | |
| 97 | process_option(multiline_strings(X)) :- atom(X), !, retractall(multiline_strings(_)), assertz(multiline_strings(X)). | |
| 98 | process_option(pretty(X)) :- atom(X), !, retractall(pretty(_)), assertz(pretty(X)). | |
| 99 | process_option(ascii(X)) :- atom(X), !, retractall(ascii(_)), assertz(ascii(X)). | |
| 100 | process_option(O) :- add_warning(json_parser, 'Unrecognized option:', O). | |
| 101 | ||
| 102 | % -------------- | |
| 103 | ||
| 104 | json_write_stream(Json) :- | |
| 105 | json_write_stream(current_output, Json). | |
| 106 | ||
| 107 | json_write_stream(Stream, Json) :- | |
| 108 | json_write_stream(Stream, Json, []). | |
| 109 | ||
| 110 | json_write_stream(Stream, Json, Options) :- | |
| 111 | json_write(Json, Chars, Options), | |
| 112 | format(Stream, '~s', [Chars]). | |
| 113 | ||
| 114 | json_write(Json, Chars) :- | |
| 115 | json_write(Json, Chars, []). | |
| 116 | ||
| 117 | json_write(Json, Chars, Options) :- | |
| 118 | reset_options, | |
| 119 | reset_line_nr, | |
| 120 | maplist(process_option, Options), | |
| 121 | json_write0(Json, 0, Chars, []). | |
| 122 | ||
| 123 | % -------------- | |
| 124 | ||
| 125 | :- assert_must_succeed(json_parser:json_escape_string_atom('{"a"}','{\\"a\\"}')). | |
| 126 | :- assert_must_succeed(json_parser:json_escape_string_atom('{"a \\/ b"}','{\\"a \\\\/ b\\"}')). % escape \ for JSON | |
| 127 | ||
| 128 | json_escape_string_atom(Atom, EscapedAtom) :- | |
| 129 | \+ atom(Atom), | |
| 130 | !, | |
| 131 | add_error('Cannot escape: ', json_escape_string_atom(Atom, EscapedAtom)), | |
| 132 | Atom = EscapedAtom. | |
| 133 | ||
| 134 | json_escape_string_atom(Atom, EscapedAtom) :- | |
| 135 | atom_codes(Atom, Codes), | |
| 136 | json_escape_string_codes(Codes, EscapedCodes), | |
| 137 | atom_codes(EscapedAtom, EscapedCodes). | |
| 138 | ||
| 139 | json_escape_string_codes(Codes, EscapedCodes) :- | |
| 140 | json_escape_string_codes(Codes, EscapedCodes, []). | |
| 141 | ||
| 142 | json_escape_string_codes(Codes, EscapedCodes, Options) :- | |
| 143 | reset_options, | |
| 144 | reset_line_nr, | |
| 145 | maplist(process_option, Options), | |
| 146 | json_escape_string_codes0(Codes, EscapedCodes). | |
| 147 | ||
| 148 | % -------------- | |
| 149 | ||
| 150 | indent(" "). | |
| 151 | json_write_indent(_) --> {pretty(false)}, !, "". | |
| 152 | json_write_indent(0) --> !, "". | |
| 153 | json_write_indent(N) --> !, {indent(I)}, I, {N1 is N-1}, json_write_indent(N1). | |
| 154 | ||
| 155 | json_write_newline --> {pretty(false)}, !, "". | |
| 156 | json_write_newline --> !, "\n". | |
| 157 | ||
| 158 | json_write_pretty_whitespace --> {pretty(false)}, !, "". | |
| 159 | json_write_pretty_whitespace --> !, " ". | |
| 160 | ||
| 161 | json_write0(Json, Indent) --> | |
| 162 | json_write_indent(Indent), | |
| 163 | json_write1(Json, Indent), | |
| 164 | json_write_newline. | |
| 165 | ||
| 166 | json_write1(@(Literal), _Indent) --> | |
| 167 | {valid_literal(Literal), atom_codes(Literal, LiteralCodes)}, | |
| 168 | !, | |
| 169 | LiteralCodes. | |
| 170 | json_write1(number(Number), _Indent) --> | |
| 171 | {lenient_number_codes(Number, NumberCodes)}, | |
| 172 | !, | |
| 173 | NumberCodes. | |
| 174 | json_write1(string(String), _Indent) --> | |
| 175 | {lenient_atom_codes(String, StringCodes)}, | |
| 176 | !, | |
| 177 | json_write_string(StringCodes). | |
| 178 | json_write1(json(Object), Indent) --> | |
| 179 | !, | |
| 180 | "{", | |
| 181 | {Indent1 is Indent+1}, | |
| 182 | json_write_object(Object, Indent1), | |
| 183 | "}". | |
| 184 | json_write1(array(Array), Indent) --> | |
| 185 | !, | |
| 186 | "[", | |
| 187 | {Indent1 is Indent+1}, | |
| 188 | json_write_array(Array, Indent1), | |
| 189 | "]". | |
| 190 | json_write1(Json, _Indent) --> | |
| 191 | {add_error(json_write, 'Unknown json term: ', Json), fail}. | |
| 192 | ||
| 193 | json_write_string(StringCodes) --> | |
| 194 | {json_escape_string_codes0(StringCodes, EscCodes)}, | |
| 195 | """", | |
| 196 | EscCodes, | |
| 197 | """". | |
| 198 | ||
| 199 | valid_literal(null). | |
| 200 | valid_literal(true). | |
| 201 | valid_literal(false). | |
| 202 | ||
| 203 | safe_number_codes(N, C) :- | |
| 204 | catch(number_codes(N, C), _, fail), | |
| 205 | number(C). | |
| 206 | ||
| 207 | lenient_number_codes(Value, Codes) :- number(Value), !, number_codes(Value, Codes). | |
| 208 | lenient_number_codes(Value, Codes) :- | |
| 209 | atom(Value), | |
| 210 | atom_codes(Value, VCodes), | |
| 211 | safe_number_codes(_, VCodes), | |
| 212 | !, | |
| 213 | Codes = VCodes. | |
| 214 | lenient_number_codes(Value, Codes) :- | |
| 215 | Value = [C|_], | |
| 216 | number(C), | |
| 217 | safe_number_codes(_, Value), | |
| 218 | !, | |
| 219 | Codes = Value. | |
| 220 | ||
| 221 | lenient_atom_codes(Value, Codes) :- atom(Value), !, atom_codes(Value, Codes). | |
| 222 | lenient_atom_codes(Value, Codes) :- number(Value), !, number_codes(Value, Codes). | |
| 223 | lenient_atom_codes(Value, Codes) :- Value=[], !, Codes=[]. | |
| 224 | lenient_atom_codes(Value, Codes) :- Value=[C|_], number(C), !, Codes = Value. | |
| 225 | ||
| 226 | json_escape_string_codes0([], []) :- !. | |
| 227 | json_escape_string_codes0([34|T], [92,34|ET]) :- !, json_escape_string_codes0(T, ET). % double quote | |
| 228 | json_escape_string_codes0([92|T], [92,92|ET]) :- !, json_escape_string_codes0(T, ET). % backslash | |
| 229 | % it is unnecessary to escape the forward slash here | |
| 230 | json_escape_string_codes0([8|T], [92,98|ET]) :- !, json_escape_string_codes0(T, ET). % b(ackspace) | |
| 231 | json_escape_string_codes0([12|T], [92,102|ET]) :- !, json_escape_string_codes0(T, ET). % f(orm feed) | |
| 232 | json_escape_string_codes0([10|T], [92,110|ET]) :- !, json_escape_string_codes0(T, ET). % n(ew line) | |
| 233 | json_escape_string_codes0([13|T], [92,114|ET]) :- !, json_escape_string_codes0(T, ET). % (carriage) r(eturn) | |
| 234 | json_escape_string_codes0([9|T], [92,116|ET]) :- !, json_escape_string_codes0(T, ET). % (horizontal) t(ab) | |
| 235 | json_escape_string_codes0([N|T], [92,117,H4,H3,H2,H1|ET]) :- | |
| 236 | (N >= 0, N =< 31 ; ascii(true), N >= 0x7f, N =< 0x10000), % non-ascii/non-printable (u)nicode codepoints | |
| 237 | !, | |
| 238 | to_4_hex_codes(N, H4, H3, H2, H1), | |
| 239 | json_escape_string_codes0(T, ET). | |
| 240 | json_escape_string_codes0([N|T], [92,117,H4,H3,H2,H1,92,117,H8,H7,H6,H5|ET]) :- % escape as surrogate pair | |
| 241 | ascii(true), N >= 0x10000, N =< 0x10FFFF, % maximum unicode codepoint | |
| 242 | !, | |
| 243 | LowSurrogate is ((N-65536) mod 0x400) + 0xDC00, | |
| 244 | HighSurrogate is ((N-65536) div 0x400) + 0xD800, | |
| 245 | to_4_hex_codes(HighSurrogate, H4, H3, H2, H1), | |
| 246 | to_4_hex_codes(LowSurrogate, H8, H7, H6, H5), | |
| 247 | json_escape_string_codes0(T, ET). | |
| 248 | json_escape_string_codes0([N|T], [N|ET]) :- !, json_escape_string_codes0(T, ET). % the rest | |
| 249 | ||
| 250 | to_4_hex_codes(N1, H4, H3, H2, H1) :- | |
| 251 | D1 is (N1 mod 16), N2 is N1 div 16, | |
| 252 | D2 is (N2 mod 16), N3 is N2 div 16, | |
| 253 | D3 is (N3 mod 16), N4 is N3 div 16, | |
| 254 | D4 is (N4 mod 16), | |
| 255 | to_hex_code(D1, H1), | |
| 256 | to_hex_code(D2, H2), | |
| 257 | to_hex_code(D3, H3), | |
| 258 | to_hex_code(D4, H4). | |
| 259 | ||
| 260 | to_hex_code(N, C) :- N >= 0, N < 10, !, C is 48+N. % 48 is '0' | |
| 261 | to_hex_code(N, C) :- N >= 10, N < 16, !, C is 97+(N-10). % 97 is 'a' | |
| 262 | from_hex_code(C, N) :- C >= 48, C =< 57, !, N is C-48. | |
| 263 | from_hex_code(C, N) :- C >= 97, C =< 102, !, N is (C-97)+10. | |
| 264 | from_hex_code(C, N) :- C >= 65, C =< 70, !, N is (C-65)+10. | |
| 265 | ||
| 266 | json_write_array([], _Indent) --> !, []. | |
| 267 | json_write_array([H|T], Indent) --> | |
| 268 | !, | |
| 269 | json_write_newline, | |
| 270 | json_write_indent(Indent), | |
| 271 | json_write1(H, Indent), | |
| 272 | json_write_array1(T, Indent). | |
| 273 | json_write_array1([], Indent) --> | |
| 274 | !, | |
| 275 | json_write_newline, | |
| 276 | {Indent1 is Indent-1}, | |
| 277 | json_write_indent(Indent1). | |
| 278 | json_write_array1([H|T], Indent) --> | |
| 279 | !, | |
| 280 | ",", | |
| 281 | json_write_newline, | |
| 282 | json_write_indent(Indent), | |
| 283 | json_write1(H, Indent), | |
| 284 | json_write_array1(T, Indent). | |
| 285 | ||
| 286 | json_write_pair(Pair, Indent) --> | |
| 287 | json_write_newline, | |
| 288 | json_write_indent(Indent), | |
| 289 | {Pair =.. [_, K, V | _], atom_codes(K, KCodes)}, % leniency: accept every term with at least two arguments | |
| 290 | json_write_string(KCodes), | |
| 291 | ":", | |
| 292 | json_write_pretty_whitespace, | |
| 293 | json_write1(V, Indent). | |
| 294 | ||
| 295 | json_write_object([], _Indent) --> !, []. | |
| 296 | json_write_object([H|T], Indent) --> | |
| 297 | !, | |
| 298 | json_write_pair(H, Indent), | |
| 299 | json_write_object1(T, Indent). | |
| 300 | json_write_object1([], Indent) --> | |
| 301 | !, | |
| 302 | json_write_newline, | |
| 303 | {Indent1 is Indent-1}, | |
| 304 | json_write_indent(Indent1). | |
| 305 | json_write_object1([H|T], Indent) --> | |
| 306 | !, | |
| 307 | ",", | |
| 308 | json_write_pair(H, Indent), | |
| 309 | json_write_object1(T, Indent). | |
| 310 | ||
| 311 | % -------------- | |
| 312 | ||
| 313 | json_parse(Chars, Json) :- | |
| 314 | json_parse(Chars, Json, []). | |
| 315 | ||
| 316 | json_parse(Chars, Json, Options) :- | |
| 317 | reset_options, | |
| 318 | reset_line_nr, | |
| 319 | maplist(process_option, Options), | |
| 320 | (member(rest(ExpectedRest), Options) -> true ; ExpectedRest = ""), | |
| 321 | json(Json, Chars, Rest), !, | |
| 322 | (ExpectedRest = Rest | |
| 323 | -> true | |
| 324 | ; (ExpectedRest = "" -> ExpectedString = "<EOF>" ; ExpectedString = ExpectedRest), | |
| 325 | generate_json_error(ExpectedString, Rest), | |
| 326 | fail | |
| 327 | ). | |
| 328 | ||
| 329 | json(Json) --> try_json(Json), !. | |
| 330 | json(_) --> !, fail_with_error("json value"). | |
| 331 | ||
| 332 | % a version that does not print an error message | |
| 333 | try_json(Json) --> ws, !, try_json(Json). | |
| 334 | try_json(Json) --> json1(Json), !, spaces. | |
| 335 | ||
| 336 | json1(@(Literal)) --> | |
| 337 | {valid_literal(Literal), atom_codes(Literal, LiteralCodes)}, | |
| 338 | LiteralCodes, | |
| 339 | !. | |
| 340 | json1(number(Number)) --> parse_number(Number), !. | |
| 341 | json1(string(String)) --> | |
| 342 | json_start_string_quote(Quote), !, | |
| 343 | string2(Codes, Quote), | |
| 344 | {strings_as_atoms(true) -> atom_codes(String, Codes) ; String = Codes}. | |
| 345 | json1(array(Array)) --> | |
| 346 | "[", !, | |
| 347 | array(Array). | |
| 348 | json1(json(Object)) --> | |
| 349 | "{", !, | |
| 350 | pairs(Object). | |
| 351 | ||
| 352 | parse_number(Number) --> | |
| 353 | nm_token(NmCodes), !, | |
| 354 | {number_codes(Number, NmCodes)}. | |
| 355 | ||
| 356 | nm_token([H|T]) --> | |
| 357 | [H], {minus(H)}, !, | |
| 358 | nm_int(T). | |
| 359 | nm_token(T) --> !, nm_int(T). | |
| 360 | ||
| 361 | nm_int([H|T]) --> | |
| 362 | [H], {zero(H)}, !, | |
| 363 | nm_int0(T). | |
| 364 | nm_int([H|T]) --> | |
| 365 | [H], {digit_table_1to9(H)}, !, | |
| 366 | nm_int1(T). | |
| 367 | ||
| 368 | nm_int0([H|T]) --> | |
| 369 | [H], {dot(H)}, !, | |
| 370 | nm_frac(T). | |
| 371 | nm_int0([]) --> !, "". | |
| 372 | ||
| 373 | nm_int1([H|T]) --> | |
| 374 | [H], {digit_table(H)}, !, | |
| 375 | nm_int1(T). | |
| 376 | nm_int1([H|T]) --> | |
| 377 | [H], {dot(H)}, !, | |
| 378 | nm_frac(T). | |
| 379 | nm_int1([]) --> !, "". | |
| 380 | ||
| 381 | nm_frac([H|T]) --> | |
| 382 | [H], {digit_table(H)}, !, | |
| 383 | nm_frac1(T). | |
| 384 | ||
| 385 | nm_frac1([H|T]) --> | |
| 386 | [H], {exp(H)}, !, | |
| 387 | nm_exp(T). | |
| 388 | nm_frac1([H|T]) --> | |
| 389 | [H], {digit_table(H)}, !, | |
| 390 | nm_frac1(T). | |
| 391 | nm_frac1([]) --> !, "". | |
| 392 | ||
| 393 | nm_exp([H|T]) --> | |
| 394 | [H], {sign(H)}, !, | |
| 395 | nm_exp0(T). | |
| 396 | nm_exp(T) --> !, nm_exp0(T). | |
| 397 | ||
| 398 | nm_exp0([H|T]) --> | |
| 399 | [H], {digit_table(H)}, !, | |
| 400 | nm_exp1(T). | |
| 401 | ||
| 402 | nm_exp1([H|T]) --> | |
| 403 | [H], {digit_table(H)}, !, | |
| 404 | nm_exp1(T). | |
| 405 | nm_exp1([]) --> !, "". | |
| 406 | ||
| 407 | % detect valid start string quote | |
| 408 | json_start_string_quote(0'\x22\) --> """", !. % regular JSON string syntax | |
| 409 | %json_start_string_quote(39) --> "'", !. % possible ProB JSON extension, so that we do not have to quote B strings in formulas | |
| 410 | ||
| 411 | % regular JSON string between double quotes | |
| 412 | %string(X) --> string2(X,0'\x22\). | |
| 413 | ||
| 414 | string2([],EndQuote) --> | |
| 415 | [EndQuote], | |
| 416 | !. | |
| 417 | string2([EscapedChar|T],EndQuote) --> | |
| 418 | [0'\x5C\],!, % \ | |
| 419 | escape_char(EscapedChar), | |
| 420 | string2(T,EndQuote). | |
| 421 | string2([C|T],EndQuote) --> | |
| 422 | [C], | |
| 423 | {C=10;C=13}, | |
| 424 | !, | |
| 425 | ({multiline_strings(true)} -> {true} ; fail_with_error("string content")), | |
| 426 | {C=10 -> inc_line_nr ; true}, | |
| 427 | string2(T,EndQuote). | |
| 428 | string2([H|T],EndQuote) --> | |
| 429 | [H], | |
| 430 | string2(T,EndQuote). | |
| 431 | ||
| 432 | escape_char( 0'\x22\ ) --> [0'\x22\], !. %" 34 decimal | |
| 433 | escape_char( 0'\x5C\ ) --> [0'\x5C\], !. %\ | |
| 434 | escape_char( 0'\x2F\ ) --> [0'\x2F\], !. %/ | |
| 435 | escape_char( 0'\x08\ ) --> [0'\x62\], !. %b | |
| 436 | escape_char( 0'\x0C\ ) --> [0'\x66\], !. %f | |
| 437 | escape_char( 0'\x0A\ ) --> [0'\x6E\], !. %n | |
| 438 | escape_char( 0'\x0D\ ) --> [0'\x72\], !. %r | |
| 439 | escape_char( 0'\x09\ ) --> [0'\x74\], !. %t | |
| 440 | ||
| 441 | % combine surrogates (same behaviour as SICStus library) | |
| 442 | escape_char( Code ) --> | |
| 443 | "u", | |
| 444 | hex_digit_char( H1 ), | |
| 445 | hex_digit_char( H2 ), | |
| 446 | hex_digit_char( H3 ), | |
| 447 | hex_digit_char( H4 ), | |
| 448 | {HighSurrogate is (((H1 << 4 + H2) << 4 + H3) << 4 + H4)}, | |
| 449 | {HighSurrogate >= 0xD800, HighSurrogate =< 0xDBFF}, | |
| 450 | "\\u", | |
| 451 | hex_digit_char( H5 ), | |
| 452 | hex_digit_char( H6 ), | |
| 453 | hex_digit_char( H7 ), | |
| 454 | hex_digit_char( H8 ), | |
| 455 | {LowSurrogate is (((H5 << 4 + H6) << 4 + H7) << 4 + H8)}, | |
| 456 | {LowSurrogate >= 0xDC00, LowSurrogate =< 0xDFFF}, | |
| 457 | !, | |
| 458 | {Code is 0x10000 + (0x400 * (HighSurrogate - 0xD800)) + (LowSurrogate - 0xDC00)}. | |
| 459 | ||
| 460 | escape_char( Code ) --> | |
| 461 | "u", | |
| 462 | !, | |
| 463 | hex_digit_char( H1 ), | |
| 464 | hex_digit_char( H2 ), | |
| 465 | hex_digit_char( H3 ), | |
| 466 | hex_digit_char( H4 ), | |
| 467 | { Code is (((H1 << 4 + H2) << 4 + H3) << 4 + H4) }. | |
| 468 | ||
| 469 | hex_digit_char( N ) --> [C], {from_hex_code(C, N)}, !. | |
| 470 | ||
| 471 | array(Array) --> ws, !, array(Array). | |
| 472 | array([]) --> "]", !. | |
| 473 | array([H|T]) --> | |
| 474 | json1(H), !, | |
| 475 | spaces, | |
| 476 | array1(T). | |
| 477 | array(_) --> !, fail_with_error("json value or \"]\" in array"). | |
| 478 | ||
| 479 | array1([]) --> "]", !. | |
| 480 | array1([H|T]) --> | |
| 481 | check_string(","), | |
| 482 | spaces, | |
| 483 | json1(H), !, | |
| 484 | spaces, | |
| 485 | array1(T). | |
| 486 | array1(_) --> !, fail_with_error("\",\" json value or \"]\" in array"). | |
| 487 | ||
| 488 | pair(ResPair) --> | |
| 489 | spaces, | |
| 490 | {position_infos(true) | |
| 491 | -> position_start(Pos), ResPair = '='(Name, Value, Pos) | |
| 492 | ; ResPair = '='(Name, Value)}, | |
| 493 | (json_start_string_quote(Quote) -> {true} ; fail_with_error("string quote")), | |
| 494 | string2(Codes, Quote), | |
| 495 | {atom_codes(Name, Codes)}, | |
| 496 | spaces, check_string(":"), spaces, | |
| 497 | json1(Value), | |
| 498 | {position_infos(true) -> position_end(Pos) ; true}, | |
| 499 | spaces. | |
| 500 | ||
| 501 | pairs(List) --> ws, !, pairs(List). | |
| 502 | pairs([]) --> "}", !. | |
| 503 | pairs([H|T]) --> | |
| 504 | pair(H), !, | |
| 505 | pairs1(T). | |
| 506 | pairs(_) --> !, fail_with_error("string \":\" json value or \"}\" in object"). | |
| 507 | ||
| 508 | pairs1([]) --> "}", !. | |
| 509 | pairs1([H|T]) --> | |
| 510 | check_string(","), | |
| 511 | pair(H), !, | |
| 512 | pairs1(T). | |
| 513 | pairs1(_) --> !, fail_with_error("\",\" string \":\" json value or \"}\" in object"). | |
| 514 | ||
| 515 | % check if the next char matches a given character or one of the given chars | |
| 516 | check_string(List) --> [Char], {member(Char, List)}, !. | |
| 517 | check_string(List) --> !, fail_with_error(List). | |
| 518 | ||
| 519 | fail_with_error(ExpectedString, Codes, _) :- !, generate_json_error(ExpectedString, Codes), fail. | |
| 520 | ||
| 521 | generate_json_error(ExpectedString, Char) :- | |
| 522 | cur_line(LineNr), | |
| 523 | number_codes(LineNr,LC), | |
| 524 | convert_chars(Char,CStr), | |
| 525 | append(["JSON error in line ",LC,": expected ",ExpectedString,", but got ",CStr],Codes), | |
| 526 | atom_codes(Msg,Codes), | |
| 527 | add_error(json_parser,Msg). | |
| 528 | ||
| 529 | convert_chars([], Res) :- !, Res = "<EOF>". | |
| 530 | convert_chars([Char|_], Res) :- !, convert_char(Char, Res). | |
| 531 | convert_chars(Char, Res) :- !, convert_char(Char, Res). | |
| 532 | ||
| 533 | convert_char(9, Res) :- !, Res = "<tab>". | |
| 534 | convert_char(10, Res) :- !, Res = "<newline>". | |
| 535 | convert_char(13, Res) :- !, Res = "<carriage return>". | |
| 536 | convert_char(Char, Res) :- number(Char), !, Res = [Char]. | |
| 537 | convert_char(_, Res) :- !, Res = "<unknown>". | |
| 538 | ||
| 539 | minus( 0'- ). | |
| 540 | plus( 0'+ ). | |
| 541 | sign( C ) :- minus( C ). | |
| 542 | sign( C ) :- plus( C ). | |
| 543 | zero( 0'0 ). | |
| 544 | digit_table( C ) :- zero( C ). | |
| 545 | digit_table( C ) :- digit_table_1to9( C ). | |
| 546 | digit_table_1to9( 0'1 ). | |
| 547 | digit_table_1to9( 0'2 ). | |
| 548 | digit_table_1to9( 0'3 ). | |
| 549 | digit_table_1to9( 0'4 ). | |
| 550 | digit_table_1to9( 0'5 ). | |
| 551 | digit_table_1to9( 0'6 ). | |
| 552 | digit_table_1to9( 0'7 ). | |
| 553 | digit_table_1to9( 0'8 ). | |
| 554 | digit_table_1to9( 0'9 ). | |
| 555 | dot( 0'. ). | |
| 556 | exp( 0'e ). | |
| 557 | exp( 0'E ). | |
| 558 | ||
| 559 | ||
| 560 | spaces --> ws,!,spaces. | |
| 561 | spaces --> "". | |
| 562 | ||
| 563 | % whitespace | |
| 564 | ws --> new_line, !. | |
| 565 | ws --> " "; "\t" ; "\r". | |
| 566 | ||
| 567 | new_line --> "\n",{inc_line_nr}. | |
| 568 | ||
| 569 | % use a fact to keep track of line numbers | |
| 570 | :- dynamic cur_line/1. | |
| 571 | reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)). | |
| 572 | inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)). | |
| 573 | ||
| 574 | position_start(Start-_) :- !, cur_line(Start). | |
| 575 | position_end(_-End) :- !, cur_line(End). |