1		% (c) 2009-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2		% Heinrich Heine Universitaet Duesseldorf
3		% This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
4
5
6		:- module(tools_strings,
7		[string_concatenate/3,
8		ajoin/2, ajoin_with_limit/3, ajoin_with_sep/3,
9		ajoin_path/3,
10		safe_name/2,
11		predicate_functor/3,
12		atom_codes_with_limit/2, atom_codes_with_limit/3,
13		truncate_atom/3,
14		atom_prefix/2, atom_suffix/2, atom_split/4,
15		match_atom/2, match_atom/3,
16		convert_cli_arg/2,
17		convert_to_number/2,
18		convert_atom_to_number/2,
19		get_hex_codes/2,
20		get_hex_bytes/2,
21		number_codes_min_length/3,
22		is_digit_code/1,
23		is_alphabetical_ascii_code/1,
24		is_simple_classical_b_identifier/1, is_simple_classical_b_identifier_codes/1,
25		is_composed_classical_b_identifier/1,
26		strip_newlines/2,
27		atom_tail_to_lower_case/2
28		]).
29
30		:- use_module(module_information).
31
32		:- module_info(group,infrastructure).
33		:- module_info(description,'A few utilities on strings/atoms seperated out from tools.pl to avoid cyclic module dependencies.').
34
35		:- set_prolog_flag(double_quotes, codes).
36
37
38		%! string_concatenate(+X,+Y,-XY)
39		string_concatenate(X,Y,XY) :- atom(X),atom(Y),!, atom_concat(X,Y,XY).
40		string_concatenate(X,Y,XY) :- atom(X),number(Y),!,
41		% convert Y to atom, atom_concat is much faster than converting to codes list and back
42		number_codes(Y,YC), atom_codes(X,XC),
43		append(XC,YC,XYC),
44		atom_codes(XY,XYC).
45		string_concatenate(X,Y,XY) :-
46		safe_name(X,Xs),safe_name(Y,Ys),append(Xs,Ys,XYs),atom_codes(XY,XYs).
47
48		% tests are stored in tools.pl to avoid cyclic module dependencies
49		%:- assert_must_succeed((tools_strings: ajoin_with_sep([link,a,xa],'.',Text), Text == 'link.a.xa')).
50		%:- assert_must_succeed((tools_strings: ajoin_with_sep([link],'.',Text), Text == 'link')).
51
52		:- use_module(library(types), [illarg/3]).
53		ajoin_with_sep(List,Sep,Text) :- var(List),!,
54		illarg(var, ajoin_with_sep(List,Sep,Text), 1).
55		ajoin_with_sep(List,Sep,Text) :-
56		insert_sep(List,Sep,L2),
57		ajoin(L2,Text).
58
59		insert_sep([H1,H2\|T],Sep,Res) :- !, Res= [H1,Sep\|RT], insert_sep([H2\|T],Sep,RT).
60		insert_sep(L,_Sep,L).
61
62
63		/* Concats a list of atoms, but checks if numbers or compound terms are
64		involved, which are converted to simple atoms */
65		ajoin(List,Text) :- var(List),!,
66		illarg(var, ajoin(List,Text), 1).
67		ajoin([Atom],Res) :- !, Res=Atom.
68		ajoin(ListOfAtoms,Atom) :- ajoin_with_limit(ListOfAtoms,10000000,Atom).
69		% atom_codes with list of 100000000 takes very long, with 10000000 a few seconds
70
71		% concats a directory with a tail file path
72		% checks if directory ends in slash and adds one if necessary and ensures only one / is present
73		% sicstus can deal paths with multiple /, so this is more for aesthetic reasons
74		ajoin_path(Directory,File,FullPath) :- \+ atom(Directory),!,
75		illarg(force_type(atom), ajoin_path(Directory,File,FullPath) , 1).
76		ajoin_path(Directory,File,FullPath) :- \+ atom(File),!,
77		illarg(force_type(atom), ajoin_path(Directory,File,FullPath) , 2).
78		ajoin_path(Directory,File,FullPath) :-
79		atom_codes(Directory,DC),
80		atom_codes(File,FC),
81		Sep = 0'/,
82		(FC=[Sep\|FC2] -> FC2=FC ; FC2=[Sep\|FC]),
83		(append(DC2,[Sep],DC) % directory ends in /
84		-> append(DC2,FC2,FullCodes)
85		; append(DC,FC2,FullCodes)),
86		atom_codes(FullPath,FullCodes).
87
88
89		%safe_atom_concat(A,B,C,ExceptionOcc) :-
90		% catch(atom_concat(A,B,C),
91		% error(representation_error(max_atom_length),_),
92		% (print(exception(max_atom_length)),nl,A=C,ExceptionOcc=true)).
93
94		:- use_module(library(codesio),[write_to_codes/2]).
95
96		%toAtom(Number,Atom) :- number(Number),!,number_chars(Number,C),atom_chars(Atom,C).
97		%toAtom(Atom,Atom) :- atomic(Atom),!.
98		%toAtom(Term,Atom) :- write_to_codes(Term,Codes),safe_atom_codes(Atom,Codes).
99
100		toCodes(Number,Codes) :- number(Number),!,number_codes(Number,Codes).
101		toCodes(Atom,Codes) :- atomic(Atom),!,atom_codes(Atom,Codes).
102		toCodes(Term,Codes) :- write_to_codes(Term,Codes).
103
104
105		% match an atom with a term
106		% we want to match the atom 's(0)' with the term s(0)
107		match_atom(Atom,Term) :- atom(Term),!,Term=Atom.
108		match_atom(Atom,Term) :- atom_codes(Atom,AtomCodes), write_to_codes(Term,Codes),Codes=AtomCodes.
109
110		% version where atom_codes is precomputed for efficiency:
111		match_atom(Atom,_,Term) :- atom(Term),!,Term=Atom.
112		match_atom(_Atom,AtomCodes,Term) :- write_to_codes(Term,Codes),Codes=AtomCodes.
113
114
115		% a version of number codes with 0 padding at the left to achieve minimum length.
116		number_codes_min_length(Nr,Min,Codes) :- number_codes(Nr,C1), length(C1,Len),
117		pad_nrc(Len,Min,C1,Codes).
118		pad_nrc(L,Min,C,R) :- L >= Min,!, R=C.
119		pad_nrc(L,Min,C,[0'0 \| RT]) :- L1 is L+1,
120		pad_nrc(L1,Min,C,RT).
121
122		% a copy of safe_atom_codes/2 from tools to avoid module dependency on error_manager
123		safe_atom_codes(V,C) :- var(V),var(C),!,
124		print_error('Variables in call: '),print_error(safe_atom_codes(V,C)),
125		C='$VARIABLE$'.
126		safe_atom_codes(A,C) :-
127		catch(atom_codes(A,C),
128		error(representation_error(max_atom_length),_),
129		(print(exception(max_atom_length)),nl,atom_codes_with_limit(A,1000,C))).
130
131		% will concatenate until the Limit is reached or exceeded; it may produce atoms longer than Limit
132		% (if first atom already longer than limit + it adds ...
133		%:- assert_must_succeed((tools: ajoin_with_limit(['A','B','C','D'],100,Text), Text == 'ABCD')).
134		%:- assert_must_succeed((tools: ajoin_with_limit(['A','B','C','D'],2,Text), Text == 'AB...')).
135
136
137		ajoin_with_limit(Atoms,Limit,Result) :-
138		ajoin_codes_with_limit(Atoms,Limit,Codes),
139		safe_atom_codes(Atom,Codes), Result=Atom.
140
141		%:- assert_must_succeed((tools: ajoin_codes_with_limit(['A','B','C','D'],100,Text), Text == "ABCD")).
142		ajoin_codes_with_limit([],_,[]).
143		ajoin_codes_with_limit([Atom\|TAtoms],Limit,Res) :-
144		toCodes(Atom,AtomCodes),
145		add_codes(AtomCodes,TAtoms,Limit,Res).
146
147		add_codes([],TAtoms,Limit,Res) :- !, ajoin_codes_with_limit(TAtoms,Limit,Res).
148		add_codes(_,_,Limit,Res) :- Limit < 1, !, Res = "...".
149		add_codes([H\|T],TAtoms,Limit,[H\|TR]) :- L1 is Limit-1,
150		add_codes(T,TAtoms,L1,TR).
151
152
153
154		:- use_module(tools_printing,[print_error/1]).
155		safe_name([], "[]") :- !. % [] is not an atom on SWI-Prolog!
156		safe_name(X,N) :- atom(X),!, atom_codes(X,N).
157		safe_name(X,N) :- number(X),!, number_codes(X,N).
158		safe_name(X,N) :- var(X),!, N="var".
159		safe_name(lambda_res(X),[114,101,115,95\|N]) :- !, atom_codes(X,N).
160		safe_name(X,N) :- functor(X,F,_),atom_codes(F,N), print_error(non_atomic_in_safe_name(X)).
161
162
163		predicate_functor(X,F,N) :- var(X),!, print_error(var_in_predicate_functor),F='$VAR',N=0.
164		predicate_functor(_Module:Pred,F,N) :- !,predicate_functor(Pred,F,N).
165		predicate_functor(P,F,N) :- functor(P,F,N).
166
167
168
169		atom_codes_with_limit(A,C) :-
170		catch(
171		atom_codes(A,C),
172		error(representation_error(max_atom_length),_),
173		(print(exception(max_atom_length)),nl,atom_codes_with_limit(A,1000,C))).
174
175
176		atom_codes_with_limit(A,Limit,Codes) :- var(A), Limit >= 0, !,
177		truncate_codes(Codes,Limit,TCodes,_),
178		atom_codes(A,TCodes).
179		%atom_codes_with_limit(A,Limit,Codes) :- compound(A),!, % should we catch this error?
180		atom_codes_with_limit(A,Limit,Codes) :- Limit < 1, !, atom_codes(A,Codes).
181		atom_codes_with_limit(A,Limit,Codes) :- atom_codes(A,Codes1),
182		truncate_codes(Codes,Limit,Codes1,_).
183
184
185		truncate_codes([],_,[],false).
186		truncate_codes([H\|T],Count,Res,Trunc) :-
187		Count<1 -> Res = [46,46,46],Trunc=true /* '...' */
188		; Res = [H\|TT], C1 is Count-1, truncate_codes(T,C1,TT,Trunc).
189
190		%:- assert_must_succeed((tools_strings:truncate_atom(abcd,100,Text), Text == 'abcd')).
191		%:- assert_must_succeed((tools_strings:truncate_atom(abcd,2,Text), Text == 'ab...')).
192		%:- assert_must_succeed((tools_strings:truncate_atom(abcd,0,Text), Text == '...')).
193		% TO DO: could be made more efficient by using something like sub_atom(Atom,0,Limit,_,NewAtom)
194		truncate_atom(Atom,Limit,NewAtom) :-
195		atom_codes(Atom,Codes),
196		truncate_codes(Codes,Limit,TCodes,Trunc),
197		(Trunc=true -> atom_codes(NewAtom,TCodes) ; NewAtom=Atom).
198
199
200		%atom_prefix(_,Atom) :- \+ atom(Atom),!, trace,fail.
201		atom_prefix(Prefix,Atom) :-
202		sub_atom(Atom,0,_,_,Prefix). % instead of atom_concat(Prefix,_,Atom)
203
204		%atom_suffix(_,Atom) :- \+ atom(Atom),!, trace,fail.
205		atom_suffix(Suffix,Atom) :-
206		sub_atom(Atom,_,_,0,Suffix). % instead of atom_concat(_,Suffix,Atom)
207
208		% Atom can be split into Prefix.Sep.Suffix
209		atom_split(Prefix,Sep,Suffix,Atom) :-
210		atom_concat(Prefix,Suffix1,Atom),
211		atom_concat(Sep,Suffix,Suffix1).
212
213		convert_cli_arg(PrefVal,Value) :- compound(PrefVal),!,Value=PrefVal.
214		convert_cli_arg(Atom,Value) :-
215		convert_atom_to_number(Atom,Nr),!, /* convert '12' to 12 */
216		Value=Nr.
217		convert_cli_arg(V,V).
218
219		convert_to_number(Nr,Res) :- number(Nr),!,Res=Nr.
220		convert_to_number(Atom,Nr) :- convert_atom_to_number(Atom,Nr).
221
222		convert_atom_to_number(Atom,Nr) :-
223		atom(Atom), atom_codes(Atom,C),
224		catch(number_codes(Nr,C),
225		error(syntax_error(_N),_),
226		% in this case safe_number_codes fails ; we cannot convert the codes into a number
227		fail).
228
229
230		% detect simple ASCII classical B identifiers accepted by the parser
231		is_simple_classical_b_identifier(Atom) :- atom_codes(Atom,Codes),
232		is_simple_classical_b_identifier_codes(Codes).
233		is_simple_classical_b_identifier_codes(Codes) :-
234		Codes = [Code\|T],
235	?	is_valid_id_letter_code(Code),!,
236		legal_id_aux(T).
237
238		legal_id_aux([]).
239		legal_id_aux([0'$,0'0]) :- !. % only $0 allowed
240	?	legal_id_aux([Code\|T]) :- legal_id_code(Code),!,legal_id_aux(T).
241
242		legal_id_code(0'_).
243		% FIXME Primes should only be allowed at the end of the identifier, not in the middle!
244		legal_id_code(0'\').
245		legal_id_code(0x2032). % Unicode prime
246		legal_id_code(C) :- is_digit_code(C).
247	?	legal_id_code(C) :- is_valid_id_letter_code(C).
248
249		is_digit_code(Code) :- Code >= 48, Code =< 57. % 0-9
250
251		is_alphabetical_ascii_code(Code) :- Code >= 65,
252		( Code =< 90 -> true % A-Z
253		; Code >= 97, Code =< 122). % a-z
254
255		is_greek_lower_case(Code) :- Code >= 945, % alpha
256		Code =< 1017. % omega
257		is_greek_upper_case(Code) :- Code >= 916, % Alpha
258		Code =< 937. % Omega
259
260		% partially taken from BParser.scc: unicode_letter definition from java-1.7.sablecc
261		is_umlaut_code(Code) :- Code < 0xc0, !, fail.
262		is_umlaut_code(Code) :- Code >= 0xc0, Code =< 0xd6. % [0x00c0..0x00d6] in BParser.scc )
263		is_umlaut_code(Code) :- Code >= 0xd8, Code =< 0xf6. % [0x00c0..0x00d6] in BParser.scc
264		is_umlaut_code(Code) :- Code >= 0xf8, Code =< 0x01f5. % [0x00f8..0x01f5] in BParser.scc
265		is_umlaut_code(Code) :- Code >= 0x01fa, Code =< 0x0217.
266		is_umlaut_code(Code) :- Code >= 0x0401, Code =< 0x040c. % Cyrillic 0x0401..0x040c] + [0x040e..0x044f]
267		is_umlaut_code(Code) :- Code >= 0x040e, Code =< 0x044f.
268		is_umlaut_code(Code) :- Code >= 0x0451, Code =< 0x045c. % [0x0451..0x045c] + [0x045e..0x0481]
269		is_umlaut_code(Code) :- Code >= 0x045e, Code =< 0x0481.
270		% TO DO: add more
271
272		is_greek_lambda_code(955). % special treatment as it is used as an operator
273
274		is_valid_id_letter_code(Code) :- is_alphabetical_ascii_code(Code).
275		is_valid_id_letter_code(Code) :- is_greek_lower_case(Code), % accepted by ProB, but not by Atelier-B
276		\+ is_greek_lambda_code(Code).
277		is_valid_id_letter_code(Code) :- is_greek_upper_case(Code). % accepted by ProB, but not by Atelier-B
278		is_valid_id_letter_code(Code) :- is_umlaut_code(Code). % accepted by ProB, but not by Atelier-B
279
280		is_composed_classical_b_identifier(Atom) :- atom_codes(Atom,Codes),
281		Codes = [Code\|T],
282		is_valid_id_letter_code(Code), !,
283		legal_comp_id_aux(T).
284
285		legal_comp_id_aux([]).
286		legal_comp_id_aux([0'$,0'0]). % only $0 allowed
287		legal_comp_id_aux([0'. , Code \|T]) :- !, % a dot which must be followed by a new legal id
288		is_valid_id_letter_code(Code), !, legal_comp_id_aux(T).
289		legal_comp_id_aux([Code\|T]) :- legal_id_code(Code), !, legal_comp_id_aux(T).
290
291		% strip newlines and replace by space
292		strip_newlines(Atom,SAtom) :- atom_codes(Atom,Codes), strip_aux(Codes,SC), atom_codes(SAtom,SC).
293
294		strip_aux([],R) :- !,R=[].
295		strip_aux([10\|T],R) :- !, strip_aux(T,R).
296		strip_aux([13\|T],[32\|R]) :- !, strip_aux(T,R).
297		strip_aux([H\|T],[H\|R]) :- !,strip_aux(T,R).
298
299		:- use_module(library(lists),[maplist/3]).
300
301		% transform upper case to lower case, except for first letter
302		atom_tail_to_lower_case(ATOM_uc,Atom_lc) :-
303		atom_codes(ATOM_uc,[First\|TC]),
304		maplist(simple_lowcase,TC,TC2),
305		atom_codes(Atom_lc,[First\|TC2]).
306
307		simple_lowcase(H,R) :- 0'A =< H, H =< 0'Z, !, R is H+0'a-0'A.
308		simple_lowcase(Code,Code).
309
310
311		% ---------- hex utilities: to do move to tools ?
312
313
314		%:- assert_must_succeed(tools_strings: get_hex_codes(255,"ff").
315		get_hex_codes(0,Chars) :- !, Chars="0".
316		get_hex_codes(Nr,Chars) :- get_hex_codes(Nr,[],Chars).
317
318		get_hex_codes(0,Acc,R) :- !, R=Acc.
319		get_hex_codes(Nr,Acc,R) :-
320		DigNr is Nr mod 16,
321		get_hex_code(DigNr,Char),
322		N1 is Nr // 16,
323		get_hex_codes(N1,[Char\|Acc],R).
324
325		%:- assert_must_succeed(tools_strings: get_hex_bytes([255,3],"ff03")).
326		% use for converting output of sha hash library:
327		get_hex_bytes([],[]).
328		get_hex_bytes([Byte\|T],[C1,C2\|HT]) :- get_hex_byte(Byte,C1,C2), get_hex_bytes(T,HT).
329
330		get_hex_byte(Byte,C1,C2) :-
331		N1 is Byte // 16, N2 is Byte mod 16,
332		get_hex_code(N1,C1), get_hex_code(N2,C2).
333		get_hex_code(Nr,Digit) :- Nr<10,!, Digit is Nr+48. % "0" = [48]
334		get_hex_code(Nr,Digit) :- Nr<16,!, Digit is Nr+87. % "A" = [65], "a" = [97]
335
336