1 % (c) 2009-2026 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
4
5
6 :- module(tools_strings,
7 [string_concatenate/3,
8 ajoin/2, ajoin_with_limit/3, ajoin_with_sep/3,
9 ajoin_path/3,
10 safe_name/2,
11 predicate_functor/3,
12 atom_codes_with_limit/2, atom_codes_with_limit/3,
13 truncate_atom/3,
14 atom_prefix/2, atom_suffix/2, atom_split/4,
15 match_atom/2, match_atom/3,
16 convert_cli_arg/2,
17 convert_to_number/2,
18 convert_atom_to_number/2,
19 get_hex_codes/2,
20 get_hex_bytes/2,
21 number_codes_min_length/3,
22 is_digit_code/1,
23 is_alphabetical_ascii_code/1, is_alphabetical_ascii_codes/1,
24 is_ascii_id_codes/1,
25 is_simple_classical_b_identifier/1, is_simple_classical_b_identifier_codes/1,
26 is_composed_classical_b_identifier/1,
27 strip_newlines/2,
28 atom_tail_to_lower_case/2,
29 format_modified_info/2, format_datime_to_codes/2
30 ]).
31
32 :- use_module(module_information).
33
34 :- module_info(group,infrastructure).
35 :- module_info(description,'A few utilities on strings/atoms seperated out from tools.pl to avoid cyclic module dependencies.').
36
37 :- set_prolog_flag(double_quotes, codes).
38
39
40 %! string_concatenate(+X,+Y,-XY)
41 string_concatenate(X,Y,XY) :- atom(X),atom(Y),!, atom_concat(X,Y,XY).
42 string_concatenate(X,Y,XY) :- atom(X),number(Y),!,
43 % convert Y to atom, atom_concat is much faster than converting to codes list and back
44 number_codes(Y,YC), atom_codes(X,XC),
45 append(XC,YC,XYC),
46 atom_codes(XY,XYC).
47 string_concatenate(X,Y,XY) :-
48 safe_name(X,Xs),safe_name(Y,Ys),append(Xs,Ys,XYs),atom_codes(XY,XYs).
49
50 % tests are stored in tools.pl to avoid cyclic module dependencies
51 %:- assert_must_succeed((tools_strings: ajoin_with_sep([link,a,xa],'.',Text), Text == 'link.a.xa')).
52 %:- assert_must_succeed((tools_strings: ajoin_with_sep([link],'.',Text), Text == 'link')).
53
54 :- use_module(library(types), [illarg/3]).
55 ajoin_with_sep(List,Sep,Text) :- var(List),!,
56 illarg(var, ajoin_with_sep(List,Sep,Text), 1).
57 ajoin_with_sep(List,Sep,Text) :-
58 insert_sep(List,Sep,L2),
59 ajoin(L2,Text).
60
61 insert_sep([H1,H2|T],Sep,Res) :- !, Res= [H1,Sep|RT], insert_sep([H2|T],Sep,RT).
62 insert_sep(L,_Sep,L).
63
64
65 /* Concats a list of atoms, but checks if numbers or compound terms are
66 involved, which are converted to simple atoms */
67 ajoin(List,Text) :- var(List),!,
68 illarg(var, ajoin(List,Text), 1).
69 ajoin([Atom],Res) :- !, Res=Atom.
70 ajoin(ListOfAtoms,Atom) :- ajoin_with_limit(ListOfAtoms,10000000,Atom).
71 % atom_codes with list of 100000000 takes very long, with 10000000 a few seconds
72
73 % concats a directory with a tail file path
74 % checks if directory ends in slash and adds one if necessary and ensures only one / is present
75 % sicstus can deal paths with multiple /, so this is more for aesthetic reasons
76 ajoin_path(Directory,File,FullPath) :- \+ atom(Directory),!,
77 illarg(force_type(atom), ajoin_path(Directory,File,FullPath) , 1).
78 ajoin_path(Directory,File,FullPath) :- \+ atom(File),!,
79 illarg(force_type(atom), ajoin_path(Directory,File,FullPath) , 2).
80 ajoin_path(Directory,File,FullPath) :-
81 atom_codes(Directory,DC),
82 atom_codes(File,FC),
83 Sep = 0'/,
84 (FC=[Sep|FC2] -> FC2=FC ; FC2=[Sep|FC]),
85 (append(DC2,[Sep],DC) % directory ends in /
86 -> append(DC2,FC2,FullCodes)
87 ; append(DC,FC2,FullCodes)),
88 atom_codes(FullPath,FullCodes).
89
90
91 %safe_atom_concat(A,B,C,ExceptionOcc) :-
92 % catch(atom_concat(A,B,C),
93 % error(representation_error(max_atom_length),_),
94 % (print(exception(max_atom_length)),nl,A=C,ExceptionOcc=true)).
95
96 :- use_module(library(codesio),[write_to_codes/2]).
97
98 %toAtom(Number,Atom) :- number(Number),!,number_chars(Number,C),atom_chars(Atom,C).
99 %toAtom(Atom,Atom) :- atomic(Atom),!.
100 %toAtom(Term,Atom) :- write_to_codes(Term,Codes),safe_atom_codes(Atom,Codes).
101
102 toCodes(Number,Codes) :- number(Number),!,number_codes(Number,Codes).
103 toCodes(Atom,Codes) :- atomic(Atom),!,atom_codes(Atom,Codes).
104 toCodes(Term,Codes) :- write_to_codes(Term,Codes).
105
106
107 % match an atom with a term
108 % we want to match the atom 's(0)' with the term s(0)
109 match_atom(Atom,Term) :- atom(Term),!,Term=Atom.
110 match_atom(Atom,Term) :- atom_codes(Atom,AtomCodes), write_to_codes(Term,Codes),Codes=AtomCodes.
111
112 % version where atom_codes is precomputed for efficiency:
113 match_atom(Atom,_,Term) :- atom(Term),!,Term=Atom.
114 match_atom(_Atom,AtomCodes,Term) :- write_to_codes(Term,Codes),Codes=AtomCodes.
115
116
117 % a version of number codes with 0 padding at the left to achieve minimum length.
118 number_codes_min_length(Nr,Min,Codes) :- number_codes(Nr,C1), length(C1,Len),
119 pad_nrc(Len,Min,C1,Codes).
120 pad_nrc(L,Min,C,R) :- L >= Min,!, R=C.
121 pad_nrc(L,Min,C,[0'0 | RT]) :- L1 is L+1,
122 pad_nrc(L1,Min,C,RT).
123
124 % a copy of safe_atom_codes/2 from tools to avoid module dependency on error_manager
125 safe_atom_codes(V,C) :- var(V),var(C),!,
126 print_error('Variables in call: '),print_error(safe_atom_codes(V,C)),
127 C='$VARIABLE$'.
128 safe_atom_codes(A,C) :-
129 catch(atom_codes(A,C),
130 error(representation_error(max_atom_length),_),
131 (print(exception(max_atom_length)),nl,atom_codes_with_limit(A,1000,C))).
132
133 % will concatenate until the Limit is reached or exceeded; it may produce atoms longer than Limit
134 % (if first atom already longer than limit + it adds ...
135 %:- assert_must_succeed((tools: ajoin_with_limit(['A','B','C','D'],100,Text), Text == 'ABCD')).
136 %:- assert_must_succeed((tools: ajoin_with_limit(['A','B','C','D'],2,Text), Text == 'AB...')).
137
138
139 ajoin_with_limit(Atoms,Limit,Result) :-
140 ajoin_codes_with_limit(Atoms,Limit,Codes),
141 safe_atom_codes(Atom,Codes), Result=Atom.
142
143 %:- assert_must_succeed((tools: ajoin_codes_with_limit(['A','B','C','D'],100,Text), Text == "ABCD")).
144 ajoin_codes_with_limit([],_,[]).
145 ajoin_codes_with_limit([Atom|TAtoms],Limit,Res) :-
146 toCodes(Atom,AtomCodes),
147 add_codes(AtomCodes,TAtoms,Limit,Res).
148
149 add_codes([],TAtoms,Limit,Res) :- !, ajoin_codes_with_limit(TAtoms,Limit,Res).
150 add_codes(_,_,Limit,Res) :- Limit < 1, !, Res = "...".
151 add_codes([H|T],TAtoms,Limit,[H|TR]) :- L1 is Limit-1,
152 add_codes(T,TAtoms,L1,TR).
153
154
155
156 :- use_module(tools_printing,[print_error/1]).
157 safe_name([], "[]") :- !. % [] is not an atom on SWI-Prolog!
158 safe_name(X,N) :- atom(X),!, atom_codes(X,N).
159 safe_name(X,N) :- number(X),!, number_codes(X,N).
160 safe_name(X,N) :- var(X),!, N="var".
161 safe_name(lambda_res(X),[114,101,115,95|N]) :- !, atom_codes(X,N).
162 safe_name(X,N) :- functor(X,F,_),atom_codes(F,N), print_error(non_atomic_in_safe_name(X)).
163
164
165 predicate_functor(X,F,N) :- var(X),!, print_error(var_in_predicate_functor),F='$VAR',N=0.
166 predicate_functor(_Module:Pred,F,N) :- !,predicate_functor(Pred,F,N).
167 predicate_functor(P,F,N) :- functor(P,F,N).
168
169
170
171 atom_codes_with_limit(A,C) :-
172 catch(
173 atom_codes(A,C),
174 error(representation_error(max_atom_length),_),
175 (print(exception(max_atom_length)),nl,atom_codes_with_limit(A,1000,C))).
176
177
178 atom_codes_with_limit(A,Limit,Codes) :- var(A), Limit >= 0, !,
179 truncate_codes(Codes,Limit,TCodes,_),
180 atom_codes(A,TCodes).
181 %atom_codes_with_limit(A,Limit,Codes) :- compound(A),!, % should we catch this error?
182 atom_codes_with_limit(A,Limit,Codes) :- Limit < 1, !, atom_codes(A,Codes).
183 atom_codes_with_limit(A,Limit,Codes) :- atom_codes(A,Codes1),
184 truncate_codes(Codes,Limit,Codes1,_).
185
186
187 truncate_codes([],_,[],false).
188 truncate_codes([H|T],Count,Res,Trunc) :-
189 Count<1 -> Res = [46,46,46],Trunc=true /* '...' */
190 ; Res = [H|TT], C1 is Count-1, truncate_codes(T,C1,TT,Trunc).
191
192 %:- assert_must_succeed((tools_strings:truncate_atom(abcd,100,Text), Text == 'abcd')).
193 %:- assert_must_succeed((tools_strings:truncate_atom(abcd,2,Text), Text == 'ab...')).
194 %:- assert_must_succeed((tools_strings:truncate_atom(abcd,0,Text), Text == '...')).
195 % TO DO: could be made more efficient by using something like sub_atom(Atom,0,Limit,_,NewAtom)
196 truncate_atom(Atom,Limit,NewAtom) :-
197 atom_codes(Atom,Codes),
198 truncate_codes(Codes,Limit,TCodes,Trunc),
199 (Trunc=true -> atom_codes(NewAtom,TCodes) ; NewAtom=Atom).
200
201
202 %atom_prefix(_,Atom) :- \+ atom(Atom),!, trace,fail.
203 atom_prefix(Prefix,Atom) :-
204 sub_atom(Atom,0,_,_,Prefix). % instead of atom_concat(Prefix,_,Atom)
205
206 %atom_suffix(_,Atom) :- \+ atom(Atom),!, trace,fail.
207 atom_suffix(Suffix,Atom) :-
208 sub_atom(Atom,_,_,0,Suffix). % instead of atom_concat(_,Suffix,Atom)
209
210 % Atom can be split into Prefix.Sep.Suffix
211 atom_split(Prefix,Sep,Suffix,Atom) :-
212 atom_concat(Prefix,Suffix1,Atom),
213 atom_concat(Sep,Suffix,Suffix1).
214
215 convert_cli_arg(PrefVal,Value) :- compound(PrefVal),!,Value=PrefVal.
216 convert_cli_arg(Atom,Value) :-
217 convert_atom_to_number(Atom,Nr),!, /* convert '12' to 12 */
218 Value=Nr.
219 convert_cli_arg(V,V).
220
221 convert_to_number(Nr,Res) :- number(Nr),!,Res=Nr.
222 convert_to_number(Atom,Nr) :- convert_atom_to_number(Atom,Nr).
223
224 convert_atom_to_number(Atom,Nr) :-
225 atom(Atom), atom_codes(Atom,C),
226 catch(number_codes(Nr,C),
227 error(syntax_error(_N),_),
228 % in this case safe_number_codes fails ; we cannot convert the codes into a number
229 fail).
230
231
232 % detect simple ASCII classical B identifiers accepted by the parser
233 is_simple_classical_b_identifier(Atom) :- atom_codes(Atom,Codes),
234 is_simple_classical_b_identifier_codes(Codes).
235 is_simple_classical_b_identifier_codes(Codes) :-
236 Codes = [Code|T],
237 ? is_valid_id_letter_code(Code),!,
238 legal_id_aux(T).
239
240 legal_id_aux([]).
241 legal_id_aux([0'$,0'0]) :- !. % only $0 allowed
242 ?legal_id_aux([Code|T]) :- legal_id_code(Code),!,legal_id_aux(T).
243
244 legal_id_code(0'_).
245 % FIXME Primes should only be allowed at the end of the identifier, not in the middle!
246 legal_id_code(0'\').
247 legal_id_code(0x2032). % Unicode prime
248 legal_id_code(C) :- is_digit_code(C).
249 ?legal_id_code(C) :- is_valid_id_letter_code(C).
250
251 is_digit_code(Code) :- Code >= 48, Code =< 57. % 0-9
252
253 is_alphabetical_ascii_code(Code) :- Code >= 65,
254 ( Code =< 90 -> true % A-Z
255 ; Code >= 97, Code =< 122). % a-z
256 is_alphabetical_ascii_codes([]).
257 is_alphabetical_ascii_codes([H|T]) :- is_alphabetical_ascii_code(H), is_alphabetical_ascii_codes(T).
258
259 % the codes list represents a classical ASCII identifier starting with a letter and then digits/letters
260 is_ascii_id_codes([H|T]) :- is_alphabetical_ascii_code(H),
261 is_alphabetical_ascii_or_digit_codes(T).
262 is_alphabetical_ascii_or_digit_codes([]).
263 is_alphabetical_ascii_or_digit_codes([H|T]) :-
264 (is_alphabetical_ascii_code(H) -> true ; is_digit_code(H) -> true),
265 is_alphabetical_ascii_or_digit_codes(T).
266
267 is_greek_lower_case(Code) :- Code >= 945, % alpha
268 Code =< 1017. % omega
269 is_greek_upper_case(Code) :- Code >= 916, % Alpha
270 Code =< 937. % Omega
271
272 % partially taken from BParser.scc: unicode_letter definition from java-1.7.sablecc
273 is_umlaut_code(Code) :- Code < 0xc0, !, fail.
274 is_umlaut_code(Code) :- Code >= 0xc0, Code =< 0xd6. % [0x00c0..0x00d6] in BParser.scc )
275 is_umlaut_code(Code) :- Code >= 0xd8, Code =< 0xf6. % [0x00c0..0x00d6] in BParser.scc
276 is_umlaut_code(Code) :- Code >= 0xf8, Code =< 0x01f5. % [0x00f8..0x01f5] in BParser.scc
277 is_umlaut_code(Code) :- Code >= 0x01fa, Code =< 0x0217.
278 is_umlaut_code(Code) :- Code >= 0x0401, Code =< 0x040c. % Cyrillic 0x0401..0x040c] + [0x040e..0x044f]
279 is_umlaut_code(Code) :- Code >= 0x040e, Code =< 0x044f.
280 is_umlaut_code(Code) :- Code >= 0x0451, Code =< 0x045c. % [0x0451..0x045c] + [0x045e..0x0481]
281 is_umlaut_code(Code) :- Code >= 0x045e, Code =< 0x0481.
282 % TO DO: add more
283
284 is_greek_lambda_code(955). % special treatment as it is used as an operator
285
286 is_valid_id_letter_code(Code) :- is_alphabetical_ascii_code(Code).
287 is_valid_id_letter_code(Code) :- is_greek_lower_case(Code), % accepted by ProB, but not by Atelier-B
288 \+ is_greek_lambda_code(Code).
289 is_valid_id_letter_code(Code) :- is_greek_upper_case(Code). % accepted by ProB, but not by Atelier-B
290 is_valid_id_letter_code(Code) :- is_umlaut_code(Code). % accepted by ProB, but not by Atelier-B
291
292 is_composed_classical_b_identifier(Atom) :- atom_codes(Atom,Codes),
293 Codes = [Code|T],
294 is_valid_id_letter_code(Code), !,
295 legal_comp_id_aux(T).
296
297 legal_comp_id_aux([]).
298 legal_comp_id_aux([0'$,0'0]). % only $0 allowed
299 legal_comp_id_aux([0'. , Code |T]) :- !, % a dot which must be followed by a new legal id
300 is_valid_id_letter_code(Code), !, legal_comp_id_aux(T).
301 legal_comp_id_aux([Code|T]) :- legal_id_code(Code), !, legal_comp_id_aux(T).
302
303 % strip newlines and replace by space
304 strip_newlines(Atom,SAtom) :- atom_codes(Atom,Codes), strip_aux(Codes,SC), atom_codes(SAtom,SC).
305
306 strip_aux([],R) :- !,R=[].
307 strip_aux([10|T],R) :- !, strip_aux(T,R).
308 strip_aux([13|T],[32|R]) :- !, strip_aux(T,R).
309 strip_aux([H|T],[H|R]) :- !,strip_aux(T,R).
310
311 :- use_module(library(lists),[maplist/3]).
312
313 % transform upper case to lower case, except for first letter
314 atom_tail_to_lower_case(ATOM_uc,Atom_lc) :-
315 atom_codes(ATOM_uc,[First|TC]),
316 maplist(simple_lowcase,TC,TC2),
317 atom_codes(Atom_lc,[First|TC2]).
318
319 simple_lowcase(H,R) :- 0'A =< H, H =< 0'Z, !, R is H+0'a-0'A.
320 simple_lowcase(Code,Code).
321
322
323 % ---------- hex utilities: to do move to tools ?
324
325
326 %:- assert_must_succeed(tools_strings: get_hex_codes(255,"ff").
327 get_hex_codes(0,Chars) :- !, Chars="0".
328 get_hex_codes(Nr,Chars) :- get_hex_codes(Nr,[],Chars).
329
330 get_hex_codes(0,Acc,R) :- !, R=Acc.
331 get_hex_codes(Nr,Acc,R) :-
332 DigNr is Nr mod 16,
333 get_hex_code(DigNr,Char),
334 N1 is Nr // 16,
335 get_hex_codes(N1,[Char|Acc],R).
336
337 %:- assert_must_succeed(tools_strings: get_hex_bytes([255,3],"ff03")).
338 % use for converting output of sha hash library:
339 get_hex_bytes([],[]).
340 get_hex_bytes([Byte|T],[C1,C2|HT]) :- get_hex_byte(Byte,C1,C2), get_hex_bytes(T,HT).
341
342 get_hex_byte(Byte,C1,C2) :-
343 N1 is Byte // 16, N2 is Byte mod 16,
344 get_hex_code(N1,C1), get_hex_code(N2,C2).
345 get_hex_code(Nr,Digit) :- Nr<10,!, Digit is Nr+48. % "0" = [48]
346 get_hex_code(Nr,Digit) :- Nr<16,!, Digit is Nr+87. % "A" = [65], "a" = [97]
347
348 % ---------------------- date utilities
349
350 % a way to print modify_localtime
351 format_modified_info(Stream,ModLocTime) :-
352 format(Stream,' (modified on ',[]),
353 format_datime(Stream,ModLocTime),
354 format(Stream,')~n',[]).
355
356 format_datime(Stream,datime(Yr,Mon,Day,Hr,Min,_Sec)) :-
357 number_codes_min_length(Min,2,MC),!,
358 format(Stream,'~w/~w/~w at ~w:~s',[Day,Mon,Yr,Hr,MC]).
359 format_datime(Stream,Time) :-
360 format(Stream,'<<?? ~w ??>>',Time).
361
362 :- use_module(library(codesio),[format_to_codes/3]).
363
364 format_datime_to_codes(datime(Yr,Mon,Day,Hr,Min,_Sec),Codes) :-
365 number_codes_min_length(Min,2,MC),!,
366 format_to_codes('~w/~w/~w at ~w:~s',[Day,Mon,Yr,Hr,MC],Codes).
367 format_datime_to_codes(Time,Codes) :-
368 format('<<?? ~w ??>>',Time,Codes).
369