| 1 | | % (c) 2009-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, |
| 2 | | % Heinrich Heine Universitaet Duesseldorf |
| 3 | | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) |
| 4 | | |
| 5 | | |
| 6 | | :- module(tools_strings, |
| 7 | | [string_concatenate/3, |
| 8 | | ajoin/2, ajoin_with_limit/3, ajoin_with_sep/3, |
| 9 | | ajoin_path/3, |
| 10 | | safe_name/2, |
| 11 | | predicate_functor/3, |
| 12 | | atom_codes_with_limit/2, atom_codes_with_limit/3, |
| 13 | | truncate_atom/3, |
| 14 | | atom_prefix/2, atom_suffix/2, atom_split/4, |
| 15 | | match_atom/2, match_atom/3, |
| 16 | | convert_cli_arg/2, |
| 17 | | convert_to_number/2, |
| 18 | | convert_atom_to_number/2, |
| 19 | | get_hex_codes/2, |
| 20 | | get_hex_bytes/2, |
| 21 | | number_codes_min_length/3, |
| 22 | | is_digit_code/1, |
| 23 | | is_alphabetical_ascii_code/1, |
| 24 | | is_simple_classical_b_identifier/1, is_simple_classical_b_identifier_codes/1, |
| 25 | | is_composed_classical_b_identifier/1, |
| 26 | | strip_newlines/2, |
| 27 | | atom_tail_to_lower_case/2 |
| 28 | | ]). |
| 29 | | |
| 30 | | :- use_module(module_information). |
| 31 | | |
| 32 | | :- module_info(group,infrastructure). |
| 33 | | :- module_info(description,'A few utilities on strings/atoms seperated out from tools.pl to avoid cyclic module dependencies.'). |
| 34 | | |
| 35 | | :- set_prolog_flag(double_quotes, codes). |
| 36 | | |
| 37 | | |
| 38 | | %! string_concatenate(+X,+Y,-XY) |
| 39 | | string_concatenate(X,Y,XY) :- atom(X),atom(Y),!, atom_concat(X,Y,XY). |
| 40 | | string_concatenate(X,Y,XY) :- atom(X),number(Y),!, |
| 41 | | % convert Y to atom, atom_concat is much faster than converting to codes list and back |
| 42 | | number_codes(Y,YC), atom_codes(X,XC), |
| 43 | | append(XC,YC,XYC), |
| 44 | | atom_codes(XY,XYC). |
| 45 | | string_concatenate(X,Y,XY) :- |
| 46 | | safe_name(X,Xs),safe_name(Y,Ys),append(Xs,Ys,XYs),atom_codes(XY,XYs). |
| 47 | | |
| 48 | | % tests are stored in tools.pl to avoid cyclic module dependencies |
| 49 | | %:- assert_must_succeed((tools_strings: ajoin_with_sep([link,a,xa],'.',Text), Text == 'link.a.xa')). |
| 50 | | %:- assert_must_succeed((tools_strings: ajoin_with_sep([link],'.',Text), Text == 'link')). |
| 51 | | |
| 52 | | :- use_module(library(types), [illarg/3]). |
| 53 | | ajoin_with_sep(List,Sep,Text) :- var(List),!, |
| 54 | | illarg(var, ajoin_with_sep(List,Sep,Text), 1). |
| 55 | | ajoin_with_sep(List,Sep,Text) :- |
| 56 | | insert_sep(List,Sep,L2), |
| 57 | | ajoin(L2,Text). |
| 58 | | |
| 59 | | insert_sep([H1,H2|T],Sep,Res) :- !, Res= [H1,Sep|RT], insert_sep([H2|T],Sep,RT). |
| 60 | | insert_sep(L,_Sep,L). |
| 61 | | |
| 62 | | |
| 63 | | /* Concats a list of atoms, but checks if numbers or compound terms are |
| 64 | | involved, which are converted to simple atoms */ |
| 65 | | ajoin(List,Text) :- var(List),!, |
| 66 | | illarg(var, ajoin(List,Text), 1). |
| 67 | | ajoin([Atom],Res) :- !, Res=Atom. |
| 68 | | ajoin(ListOfAtoms,Atom) :- ajoin_with_limit(ListOfAtoms,10000000,Atom). |
| 69 | | % atom_codes with list of 100000000 takes very long, with 10000000 a few seconds |
| 70 | | |
| 71 | | % concats a directory with a tail file path |
| 72 | | % checks if directory ends in slash and adds one if necessary and ensures only one / is present |
| 73 | | % sicstus can deal paths with multiple /, so this is more for aesthetic reasons |
| 74 | | ajoin_path(Directory,File,FullPath) :- \+ atom(Directory),!, |
| 75 | | illarg(force_type(atom), ajoin_path(Directory,File,FullPath) , 1). |
| 76 | | ajoin_path(Directory,File,FullPath) :- \+ atom(File),!, |
| 77 | | illarg(force_type(atom), ajoin_path(Directory,File,FullPath) , 2). |
| 78 | | ajoin_path(Directory,File,FullPath) :- |
| 79 | | atom_codes(Directory,DC), |
| 80 | | atom_codes(File,FC), |
| 81 | | Sep = 0'/, |
| 82 | | (FC=[Sep|FC2] -> FC2=FC ; FC2=[Sep|FC]), |
| 83 | | (append(DC2,[Sep],DC) % directory ends in / |
| 84 | | -> append(DC2,FC2,FullCodes) |
| 85 | | ; append(DC,FC2,FullCodes)), |
| 86 | | atom_codes(FullPath,FullCodes). |
| 87 | | |
| 88 | | |
| 89 | | %safe_atom_concat(A,B,C,ExceptionOcc) :- |
| 90 | | % catch(atom_concat(A,B,C), |
| 91 | | % error(representation_error(max_atom_length),_), |
| 92 | | % (print(exception(max_atom_length)),nl,A=C,ExceptionOcc=true)). |
| 93 | | |
| 94 | | :- use_module(library(codesio),[write_to_codes/2]). |
| 95 | | |
| 96 | | %toAtom(Number,Atom) :- number(Number),!,number_chars(Number,C),atom_chars(Atom,C). |
| 97 | | %toAtom(Atom,Atom) :- atomic(Atom),!. |
| 98 | | %toAtom(Term,Atom) :- write_to_codes(Term,Codes),safe_atom_codes(Atom,Codes). |
| 99 | | |
| 100 | | toCodes(Number,Codes) :- number(Number),!,number_codes(Number,Codes). |
| 101 | | toCodes(Atom,Codes) :- atomic(Atom),!,atom_codes(Atom,Codes). |
| 102 | | toCodes(Term,Codes) :- write_to_codes(Term,Codes). |
| 103 | | |
| 104 | | |
| 105 | | % match an atom with a term |
| 106 | | % we want to match the atom 's(0)' with the term s(0) |
| 107 | | match_atom(Atom,Term) :- atom(Term),!,Term=Atom. |
| 108 | | match_atom(Atom,Term) :- atom_codes(Atom,AtomCodes), write_to_codes(Term,Codes),Codes=AtomCodes. |
| 109 | | |
| 110 | | % version where atom_codes is precomputed for efficiency: |
| 111 | | match_atom(Atom,_,Term) :- atom(Term),!,Term=Atom. |
| 112 | | match_atom(_Atom,AtomCodes,Term) :- write_to_codes(Term,Codes),Codes=AtomCodes. |
| 113 | | |
| 114 | | |
| 115 | | % a version of number codes with 0 padding at the left to achieve minimum length. |
| 116 | | number_codes_min_length(Nr,Min,Codes) :- number_codes(Nr,C1), length(C1,Len), |
| 117 | | pad_nrc(Len,Min,C1,Codes). |
| 118 | | pad_nrc(L,Min,C,R) :- L >= Min,!, R=C. |
| 119 | | pad_nrc(L,Min,C,[0'0 | RT]) :- L1 is L+1, |
| 120 | | pad_nrc(L1,Min,C,RT). |
| 121 | | |
| 122 | | % a copy of safe_atom_codes/2 from tools to avoid module dependency on error_manager |
| 123 | | safe_atom_codes(V,C) :- var(V),var(C),!, |
| 124 | | print_error('Variables in call: '),print_error(safe_atom_codes(V,C)), |
| 125 | | C='$VARIABLE$'. |
| 126 | | safe_atom_codes(A,C) :- |
| 127 | | catch(atom_codes(A,C), |
| 128 | | error(representation_error(max_atom_length),_), |
| 129 | | (print(exception(max_atom_length)),nl,atom_codes_with_limit(A,1000,C))). |
| 130 | | |
| 131 | | % will concatenate until the Limit is reached or exceeded; it may produce atoms longer than Limit |
| 132 | | % (if first atom already longer than limit + it adds ... |
| 133 | | %:- assert_must_succeed((tools: ajoin_with_limit(['A','B','C','D'],100,Text), Text == 'ABCD')). |
| 134 | | %:- assert_must_succeed((tools: ajoin_with_limit(['A','B','C','D'],2,Text), Text == 'AB...')). |
| 135 | | |
| 136 | | |
| 137 | | ajoin_with_limit(Atoms,Limit,Result) :- |
| 138 | | ajoin_codes_with_limit(Atoms,Limit,Codes), |
| 139 | | safe_atom_codes(Atom,Codes), Result=Atom. |
| 140 | | |
| 141 | | %:- assert_must_succeed((tools: ajoin_codes_with_limit(['A','B','C','D'],100,Text), Text == "ABCD")). |
| 142 | | ajoin_codes_with_limit([],_,[]). |
| 143 | | ajoin_codes_with_limit([Atom|TAtoms],Limit,Res) :- |
| 144 | | toCodes(Atom,AtomCodes), |
| 145 | | add_codes(AtomCodes,TAtoms,Limit,Res). |
| 146 | | |
| 147 | | add_codes([],TAtoms,Limit,Res) :- !, ajoin_codes_with_limit(TAtoms,Limit,Res). |
| 148 | | add_codes(_,_,Limit,Res) :- Limit < 1, !, Res = "...". |
| 149 | | add_codes([H|T],TAtoms,Limit,[H|TR]) :- L1 is Limit-1, |
| 150 | | add_codes(T,TAtoms,L1,TR). |
| 151 | | |
| 152 | | |
| 153 | | |
| 154 | | :- use_module(tools_printing,[print_error/1]). |
| 155 | | safe_name([], "[]") :- !. % [] is not an atom on SWI-Prolog! |
| 156 | | safe_name(X,N) :- atom(X),!, atom_codes(X,N). |
| 157 | | safe_name(X,N) :- number(X),!, number_codes(X,N). |
| 158 | | safe_name(X,N) :- var(X),!, N="var". |
| 159 | | safe_name(lambda_res(X),[114,101,115,95|N]) :- !, atom_codes(X,N). |
| 160 | | safe_name(X,N) :- functor(X,F,_),atom_codes(F,N), print_error(non_atomic_in_safe_name(X)). |
| 161 | | |
| 162 | | |
| 163 | | predicate_functor(X,F,N) :- var(X),!, print_error(var_in_predicate_functor),F='$VAR',N=0. |
| 164 | | predicate_functor(_Module:Pred,F,N) :- !,predicate_functor(Pred,F,N). |
| 165 | | predicate_functor(P,F,N) :- functor(P,F,N). |
| 166 | | |
| 167 | | |
| 168 | | |
| 169 | | atom_codes_with_limit(A,C) :- |
| 170 | | catch( |
| 171 | | atom_codes(A,C), |
| 172 | | error(representation_error(max_atom_length),_), |
| 173 | | (print(exception(max_atom_length)),nl,atom_codes_with_limit(A,1000,C))). |
| 174 | | |
| 175 | | |
| 176 | | atom_codes_with_limit(A,Limit,Codes) :- var(A), Limit >= 0, !, |
| 177 | | truncate_codes(Codes,Limit,TCodes,_), |
| 178 | | atom_codes(A,TCodes). |
| 179 | | %atom_codes_with_limit(A,Limit,Codes) :- compound(A),!, % should we catch this error? |
| 180 | | atom_codes_with_limit(A,Limit,Codes) :- Limit < 1, !, atom_codes(A,Codes). |
| 181 | | atom_codes_with_limit(A,Limit,Codes) :- atom_codes(A,Codes1), |
| 182 | | truncate_codes(Codes,Limit,Codes1,_). |
| 183 | | |
| 184 | | |
| 185 | | truncate_codes([],_,[],false). |
| 186 | | truncate_codes([H|T],Count,Res,Trunc) :- |
| 187 | | Count<1 -> Res = [46,46,46],Trunc=true /* '...' */ |
| 188 | | ; Res = [H|TT], C1 is Count-1, truncate_codes(T,C1,TT,Trunc). |
| 189 | | |
| 190 | | %:- assert_must_succeed((tools_strings:truncate_atom(abcd,100,Text), Text == 'abcd')). |
| 191 | | %:- assert_must_succeed((tools_strings:truncate_atom(abcd,2,Text), Text == 'ab...')). |
| 192 | | %:- assert_must_succeed((tools_strings:truncate_atom(abcd,0,Text), Text == '...')). |
| 193 | | % TO DO: could be made more efficient by using something like sub_atom(Atom,0,Limit,_,NewAtom) |
| 194 | | truncate_atom(Atom,Limit,NewAtom) :- |
| 195 | | atom_codes(Atom,Codes), |
| 196 | | truncate_codes(Codes,Limit,TCodes,Trunc), |
| 197 | | (Trunc=true -> atom_codes(NewAtom,TCodes) ; NewAtom=Atom). |
| 198 | | |
| 199 | | |
| 200 | | %atom_prefix(_,Atom) :- \+ atom(Atom),!, trace,fail. |
| 201 | | atom_prefix(Prefix,Atom) :- |
| 202 | | sub_atom(Atom,0,_,_,Prefix). % instead of atom_concat(Prefix,_,Atom) |
| 203 | | |
| 204 | | %atom_suffix(_,Atom) :- \+ atom(Atom),!, trace,fail. |
| 205 | | atom_suffix(Suffix,Atom) :- |
| 206 | | sub_atom(Atom,_,_,0,Suffix). % instead of atom_concat(_,Suffix,Atom) |
| 207 | | |
| 208 | | % Atom can be split into Prefix.Sep.Suffix |
| 209 | | atom_split(Prefix,Sep,Suffix,Atom) :- |
| 210 | | atom_concat(Prefix,Suffix1,Atom), |
| 211 | | atom_concat(Sep,Suffix,Suffix1). |
| 212 | | |
| 213 | | convert_cli_arg(PrefVal,Value) :- compound(PrefVal),!,Value=PrefVal. |
| 214 | | convert_cli_arg(Atom,Value) :- |
| 215 | | convert_atom_to_number(Atom,Nr),!, /* convert '12' to 12 */ |
| 216 | | Value=Nr. |
| 217 | | convert_cli_arg(V,V). |
| 218 | | |
| 219 | | convert_to_number(Nr,Res) :- number(Nr),!,Res=Nr. |
| 220 | | convert_to_number(Atom,Nr) :- convert_atom_to_number(Atom,Nr). |
| 221 | | |
| 222 | | convert_atom_to_number(Atom,Nr) :- |
| 223 | | atom(Atom), atom_codes(Atom,C), |
| 224 | | catch(number_codes(Nr,C), |
| 225 | | error(syntax_error(_N),_), |
| 226 | | % in this case safe_number_codes fails ; we cannot convert the codes into a number |
| 227 | | fail). |
| 228 | | |
| 229 | | |
| 230 | | % detect simple ASCII classical B identifiers accepted by the parser |
| 231 | | is_simple_classical_b_identifier(Atom) :- atom_codes(Atom,Codes), |
| 232 | | is_simple_classical_b_identifier_codes(Codes). |
| 233 | | is_simple_classical_b_identifier_codes(Codes) :- |
| 234 | | Codes = [Code|T], |
| 235 | ? | is_valid_id_letter_code(Code),!, |
| 236 | | legal_id_aux(T). |
| 237 | | |
| 238 | | legal_id_aux([]). |
| 239 | | legal_id_aux([0'$,0'0]) :- !. % only $0 allowed |
| 240 | ? | legal_id_aux([Code|T]) :- legal_id_code(Code),!,legal_id_aux(T). |
| 241 | | |
| 242 | | legal_id_code(0'_). |
| 243 | | % FIXME Primes should only be allowed at the end of the identifier, not in the middle! |
| 244 | | legal_id_code(0'\'). |
| 245 | | legal_id_code(0x2032). % Unicode prime |
| 246 | | legal_id_code(C) :- is_digit_code(C). |
| 247 | ? | legal_id_code(C) :- is_valid_id_letter_code(C). |
| 248 | | |
| 249 | | is_digit_code(Code) :- Code >= 48, Code =< 57. % 0-9 |
| 250 | | |
| 251 | | is_alphabetical_ascii_code(Code) :- Code >= 65, |
| 252 | | ( Code =< 90 -> true % A-Z |
| 253 | | ; Code >= 97, Code =< 122). % a-z |
| 254 | | |
| 255 | | is_greek_lower_case(Code) :- Code >= 945, % alpha |
| 256 | | Code =< 1017. % omega |
| 257 | | is_greek_upper_case(Code) :- Code >= 916, % Alpha |
| 258 | | Code =< 937. % Omega |
| 259 | | |
| 260 | | % partially taken from BParser.scc: unicode_letter definition from java-1.7.sablecc |
| 261 | | is_umlaut_code(Code) :- Code < 0xc0, !, fail. |
| 262 | | is_umlaut_code(Code) :- Code >= 0xc0, Code =< 0xd6. % [0x00c0..0x00d6] in BParser.scc ) |
| 263 | | is_umlaut_code(Code) :- Code >= 0xd8, Code =< 0xf6. % [0x00c0..0x00d6] in BParser.scc |
| 264 | | is_umlaut_code(Code) :- Code >= 0xf8, Code =< 0x01f5. % [0x00f8..0x01f5] in BParser.scc |
| 265 | | is_umlaut_code(Code) :- Code >= 0x01fa, Code =< 0x0217. |
| 266 | | is_umlaut_code(Code) :- Code >= 0x0401, Code =< 0x040c. % Cyrillic 0x0401..0x040c] + [0x040e..0x044f] |
| 267 | | is_umlaut_code(Code) :- Code >= 0x040e, Code =< 0x044f. |
| 268 | | is_umlaut_code(Code) :- Code >= 0x0451, Code =< 0x045c. % [0x0451..0x045c] + [0x045e..0x0481] |
| 269 | | is_umlaut_code(Code) :- Code >= 0x045e, Code =< 0x0481. |
| 270 | | % TO DO: add more |
| 271 | | |
| 272 | | is_greek_lambda_code(955). % special treatment as it is used as an operator |
| 273 | | |
| 274 | | is_valid_id_letter_code(Code) :- is_alphabetical_ascii_code(Code). |
| 275 | | is_valid_id_letter_code(Code) :- is_greek_lower_case(Code), % accepted by ProB, but not by Atelier-B |
| 276 | | \+ is_greek_lambda_code(Code). |
| 277 | | is_valid_id_letter_code(Code) :- is_greek_upper_case(Code). % accepted by ProB, but not by Atelier-B |
| 278 | | is_valid_id_letter_code(Code) :- is_umlaut_code(Code). % accepted by ProB, but not by Atelier-B |
| 279 | | |
| 280 | | is_composed_classical_b_identifier(Atom) :- atom_codes(Atom,Codes), |
| 281 | | Codes = [Code|T], |
| 282 | | is_valid_id_letter_code(Code), !, |
| 283 | | legal_comp_id_aux(T). |
| 284 | | |
| 285 | | legal_comp_id_aux([]). |
| 286 | | legal_comp_id_aux([0'$,0'0]). % only $0 allowed |
| 287 | | legal_comp_id_aux([0'. , Code |T]) :- !, % a dot which must be followed by a new legal id |
| 288 | | is_valid_id_letter_code(Code), !, legal_comp_id_aux(T). |
| 289 | | legal_comp_id_aux([Code|T]) :- legal_id_code(Code), !, legal_comp_id_aux(T). |
| 290 | | |
| 291 | | % strip newlines and replace by space |
| 292 | | strip_newlines(Atom,SAtom) :- atom_codes(Atom,Codes), strip_aux(Codes,SC), atom_codes(SAtom,SC). |
| 293 | | |
| 294 | | strip_aux([],R) :- !,R=[]. |
| 295 | | strip_aux([10|T],R) :- !, strip_aux(T,R). |
| 296 | | strip_aux([13|T],[32|R]) :- !, strip_aux(T,R). |
| 297 | | strip_aux([H|T],[H|R]) :- !,strip_aux(T,R). |
| 298 | | |
| 299 | | :- use_module(library(lists),[maplist/3]). |
| 300 | | |
| 301 | | % transform upper case to lower case, except for first letter |
| 302 | | atom_tail_to_lower_case(ATOM_uc,Atom_lc) :- |
| 303 | | atom_codes(ATOM_uc,[First|TC]), |
| 304 | | maplist(simple_lowcase,TC,TC2), |
| 305 | | atom_codes(Atom_lc,[First|TC2]). |
| 306 | | |
| 307 | | simple_lowcase(H,R) :- 0'A =< H, H =< 0'Z, !, R is H+0'a-0'A. |
| 308 | | simple_lowcase(Code,Code). |
| 309 | | |
| 310 | | |
| 311 | | % ---------- hex utilities: to do move to tools ? |
| 312 | | |
| 313 | | |
| 314 | | %:- assert_must_succeed(tools_strings: get_hex_codes(255,"ff"). |
| 315 | | get_hex_codes(0,Chars) :- !, Chars="0". |
| 316 | | get_hex_codes(Nr,Chars) :- get_hex_codes(Nr,[],Chars). |
| 317 | | |
| 318 | | get_hex_codes(0,Acc,R) :- !, R=Acc. |
| 319 | | get_hex_codes(Nr,Acc,R) :- |
| 320 | | DigNr is Nr mod 16, |
| 321 | | get_hex_code(DigNr,Char), |
| 322 | | N1 is Nr // 16, |
| 323 | | get_hex_codes(N1,[Char|Acc],R). |
| 324 | | |
| 325 | | %:- assert_must_succeed(tools_strings: get_hex_bytes([255,3],"ff03")). |
| 326 | | % use for converting output of sha hash library: |
| 327 | | get_hex_bytes([],[]). |
| 328 | | get_hex_bytes([Byte|T],[C1,C2|HT]) :- get_hex_byte(Byte,C1,C2), get_hex_bytes(T,HT). |
| 329 | | |
| 330 | | get_hex_byte(Byte,C1,C2) :- |
| 331 | | N1 is Byte // 16, N2 is Byte mod 16, |
| 332 | | get_hex_code(N1,C1), get_hex_code(N2,C2). |
| 333 | | get_hex_code(Nr,Digit) :- Nr<10,!, Digit is Nr+48. % "0" = [48] |
| 334 | | get_hex_code(Nr,Digit) :- Nr<16,!, Digit is Nr+87. % "A" = [65], "a" = [97] |
| 335 | | |
| 336 | | |