1		% (c) 2025-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2		% Heinrich Heine Universitaet Duesseldorf
3		% This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
4
5		:- module(source_indenter,[indent_b_file/2, indent_b_file/3,
6		indent_b_file_to_codes/2, indent_b_file_to_codes/3,
7		reformat_b_file_to_codes/2]).
8
9
10		:- use_module(probsrc(module_information),[module_info/2]).
11		:- module_info(group,misc).
12		:- module_info(description,'This module can indent B source files using a lexing approach.').
13
14		% the idea is to keep track of keywords relevant of increase/decrease of indentation level
15		% it basically requires correct lexing of these keywords, and does not require parsing
16		% it can thus deal with comments anywhere in the source code and also with files that cannot be parsed
17		% it, however, currently does not check that opening/closing tags match
18
19		% options for:
20		% - indent_spaces/Nr: number of whitespaces per indentation :
21		% - insert_new_lines_before_keywords: whether to automatically insert newlines before keywords like ELSIF, ...
22		% - insert_new_lines_after_keywords: whether to automatically insert newlines after keywords like ELSIF, ...
23		% - whether to add current indent level inside comments or not
24		% TODO: parse special unicode identifiers, ... (ideally we would conform exactly to BLexer.java)
25
26		:- use_module(library(codesio), [with_output_to_codes/4]).
27
28		reformat_b_file_to_codes(File,Codes) :-
29		indent_b_file_to_codes(File,Codes,[insert_new_lines_before_keywords,insert_new_lines_after_keywords]).
30		indent_b_file_to_codes(File,Codes) :-
31		indent_b_file_to_codes(File,Codes,[]).
32		indent_b_file_to_codes(File,Codes,Opts) :-
33		with_output_to_codes(
34		indent_b_file(File,OutStream,Opts),
35		OutStream,
36		Codes, []).
37
38		indent_b_file(File,OutStream) :-
39		indent_b_file(File,OutStream,
40		[] %
41		% [insert_new_lines_after_keywords, insert_new_lines_before_keywords, half_indent_elsif, indent_spaces/8]
42		).
43
44		indent_b_file(File,OutStream,Opts) :-
45		open(File,read,Stream,[encoding(utf8)]),
46		init_env(OutStream,Opts,Env),
47		call_cleanup(process_file(Stream,Env), close(Stream)).
48
49		process_file(Stream,Env) :-
50		read_line(Stream,Codes),
51		(Codes = end_of_file -> true
52		; % format('>~s[~w]~n',[Codes,Env]),
53		(process_line(Env,Env1,Codes,[]) -> true
54		; write(error),nl, trace, process_line(Env,Env1,Codes,[]),
55		Env2=Env),
56		output_new_line(Env1,Env2),
57		process_file(Stream,Env2)
58		).
59
60
61		identifier(ID) --> [X], {is_alpha(X)}, id_aux(T), {atom_codes(ID,[X\|T])}.
62
63		id_aux([0'_\|T]) --> "_",!, id_aux(T).
64		id_aux([X\|T]) --> [X], {is_numerical(X)},!, id_aux(T).
65		id_aux([X\|T]) --> [X], {is_alpha(X)},!, id_aux(T).
66		id_aux([]) --> "".
67
68
69		is_numerical(Code) :- Code >= 48, Code =< 57. % 0-9
70
71		is_alpha(Code) :- Code >= 65, Code =< 90. % A-Z
72		is_alpha(Code) :- Code >= 97, Code =< 122. % a-z
73
74		process_line(env(I,Stream,Open,Opts),ResEnv) -->
75		{Open = open_special_token(Kind,Close)},!, % continue processing open special token
76		{(Kind = comment -> output_indent(env(I,Stream,Open,Opts)) ; true)},
77		process_special_token(Kind,Close,env(I,Stream,no_open_special_token,Opts),ResEnv).
78		process_line(Env,ResEnv) --> process_line2(Env,ResEnv).
79
80
81		process_line2(Env,ResEnv) --> ws, !, process_line2(Env,ResEnv). % ignore any leading ws on a line
82		process_line2(Env,ResEnv) --> identifier(ID),
83		{is_b_keyword(ID,section), \+ is_indent_relevant_keyword(ID,_)},!, % No indent
84		{output_atom(Env,ID),process_id(ID,Env,Env1)},
85		process_line_rest(Env1,ResEnv).
86		process_line2(Env,ResEnv) --> identifier(ID),
87		{is_indent_relevant_keyword(ID,Delta), Delta=<0},!,
88		% END, decrement indent immediately; ELSIF decrement for this line only
89		{process_id(ID,Env,Env1),
90		output_indent_delta(Delta,Env1),
91		output_atom(Env1,ID)},
92		process_line_after_id(ID,Env1,ResEnv).
93		process_line2(Env,ResEnv) --> [C],
94		{is_ident_relevant_code(C,Delta), Delta =< 0},!, % open or closing brackets
95		{update_indent(Env,Delta,Env1),
96		output_indent_delta(Delta,Env1), output_code(Env1,C)},
97		process_line_rest(Env1,ResEnv).
98		%process_line2(Env,ResEnv,T,T) :- T \= [_\|_], !, write(xx(T)),ResEnv=Env. % finished
99		process_line2(Env,ResEnv) --> {output_indent(Env)}, process_line_rest(Env,ResEnv).
100
101		% continue processing line after an ID; optionally insert newlines
102		process_line_after_id(ID,Env,Res) -->
103		({is_indent_relevant_keyword(ID,0),
104		option_set(insert_new_lines_after_keywords,Env)},
105		\+ at_end_of_line
106		-> process_new_line(Env,Res)
107		; process_line_rest(Env,Res)
108		).
109
110		at_end_of_line --> ws,!, at_end_of_line.
111		at_end_of_line --> "".
112
113
114		% output indent for a indent_relevant_keyword with given Delta value
115		output_indent_delta(Delta,Env1) :- Delta=0,!, % things like ELSIF, ...
116		dec_indent(Env1,Env2),% move ELSIF one to the left
117		output_indent(Env2).
118		output_indent_delta(_,Env) :- output_indent(Env).
119
120		process_line_rest(Env,Res) --> identifier(ID),!,
121		{(is_indent_relevant_keyword(ID,0),
122		option_set(insert_new_lines_before_keywords,Env)
123		-> output_new_line(Env,Env1),output_indent_delta(0,Env1) ; Env1=Env),
124		output_atom(Env1,ID),
125		process_id(ID,Env1,Env2)
126		},
127		process_line_after_id(ID,Env2,Res).
128		process_line_rest(Env,Res) --> new_line,!, process_new_line(Env,Res).
129		process_line_rest(Env,Res) --> line_comment,!, {output_codes(Env,"//")}, process_line_comment(Env,Res).
130		process_line_rest(Env,Res) --> open_special_token(Kind,Open,Close),!,
131		{output_codes(Env,Open)}, process_special_token(Kind,Close,Env,Res).
132		process_line_rest(In,Out) --> get_regular_token(Ts), !, {output_codes(In,Ts)}, process_line_rest(In,Out).
133		process_line_rest(In,Out) --> [C], {is_ident_relevant_code(C,Delta)},!,
134		{output_code(In,C),update_indent(In,Delta,In2)}, process_line_rest(In2,Out).
135		process_line_rest(In,Out) --> [C], !, {output_code(In,C)}, process_line_rest(In,Out).
136		process_line_rest(Env,Env) --> "".
137		% TODO: detect other comments, strings
138		% TODO: possibly insert new-lines for sections or things like ELSIF
139
140		process_new_line(Env,Env2) --> {output_new_line(Env,Env1)}, process_line2(Env1,Env2).
141
142		process_line_comment(Env,Env2) --> new_line,!, process_new_line(Env,Env2).
143		process_line_comment(Env,Env2) --> [X],!, {output_code(Env,X)}, process_line_comment(Env,Env2).
144		process_line_comment(Env,Env) --> "".
145
146		% TODO: process special encodings \", ....
147		process_special_token(string,Close,Env,OutEnv) --> special_string_encoding(Env),!,
148		process_special_token(string,Close,Env,OutEnv).
149		process_special_token(_Kind,Close,Env,OutEnv) --> Close,!, {output_codes(Env,Close)},
150		process_line_rest(Env,OutEnv).
151		process_special_token(Kind,Close,In,Out) --> [C], !, {output_code(In,C)}, process_special_token(Kind,Close,In,Out).
152		process_special_token(Kind,Close,Env,OutEnv) --> "", {register_open_special_token(Env,Kind,Close,OutEnv)}.
153
154		special_string_encoding(Env) --> "\\", [X], % TODO: check exactly which encodings are processed
155		{output_codes(Env,"\\"),output_code(Env,X)}.
156
157		ws --> " "; "\t".
158		new_line --> "\n". %,{inc_line_nr}.
159		line_comment --> "//".
160
161		% open special token whose contents should be ignored for indentation
162		open_special_token(comment,"/","/") --> "/*".
163		open_special_token(string,"\"","\"") --> "\"".
164		open_special_token(string,"'''","'''") --> "'''".
165		open_special_token(string,"```","```") --> "```".
166		open_special_token(identifier,"`","`") --> "`". % multi-line allowed??
167
168		% recognise some tokes, in particular those that contain indent relevant tokens like vertical bar \|
169		get_regular_token("\|->") --> "\|->". % maplet
170		get_regular_token("\|\|") --> "\|\|".
171		get_regular_token("\|>>") --> "\|>>".
172		get_regular_token("\|>") --> "\|>".
173		get_regular_token("<<\|") --> "<<\|".
174		get_regular_token("<\|") --> "<\|".
175		get_regular_token("/\|\\") --> "/\|\\".
176		get_regular_token("\\\|/") --> "\\\|/".
177		get_regular_token("..") --> "..".
178		% below not really necessary?
179		get_regular_token("\\/") --> "\\/".
180		get_regular_token("/\\") --> "/\\".
181
182		% use a fact to keep track of line numbers
183		:- dynamic cur_line/1.
184		reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)).
185		inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)).
186		inc_line_nr.
187
188
189		init_env(OutStream,Opts,env(0,OutStream,no_open_special_token,Opts)).
190
191		option_set(Option,env(_,_,_,Opts)) :- member(Option,Opts).
192
193		register_open_special_token(env(C,Stream,_,Opts),Kind,CloseTag,
194		env(C,Stream,open_special_token(Kind,CloseTag),Opts)).
195
196		% output preds
197		output_indent(env(Nr,Str,K,Opts)) :- Nr>0, !, write(Str,' '), N1 is Nr-1, output_indent(env(N1,Str,K,Opts)).
198		output_indent(_).
199
200		:- dynamic linecount/1.
201		inc_linecount(X) :- retract(linecount(X)), !, X1 is X+1, assert(linecount(X1)).
202		inc_linecount(0) :- assert(linecount(1)).
203
204		output_new_line(env(E,Stream,K,Opts),env(E,Stream,K,Opts)) :-
205		%inc_linecount(LC), format('//[~w - line ~w]',[E,LC]), % for debugging
206		nl(Stream).
207
208		output_code(env(_,Stream,_,_),C) :- put_code(Stream,C).
209		output_codes(env(_,Stream,_,_),Cs) :- format(Stream,'~s',[Cs]).
210		output_atom(env(_,Stream,_,_),C) :- format(Stream,'~w',[C]).
211
212
213		:- use_module(probsrc(tools_matching),[is_b_keyword/2]).
214
215		process_id(ID,env(C,S,K,Opts),env(NC,S,K,Opts)) :- is_indent_relevant_keyword(ID,Delta),!,
216		get_indent_spaces(Opts,Nr), NC is C+Nr*Delta. % format('~w->~w ',[C,NC]).
217		process_id(ID,env(_,S,K,Opts),env(NC,S,K,Opts)) :- is_b_keyword(ID,section),!,
218		get_indent_spaces(Opts,NC). % reset to beginning of line
219		process_id(_ID,C,C). % :- format('?~w?',[_ID]).
220
221		get_indent_spaces(Opts,Nr) :- (member(indent_spaces/N,Opts) -> Nr=N ; Nr=4).
222
223		% dec indent for one line for ELSIF, ...
224		dec_indent(env(I,Stream,K,Opts),env(I1,Stream,K,Opts)) :-
225		(I>0 -> get_indent_spaces(Opts,Nr),
226		(member(half_indent_elsif,Opts) -> I1 is I-(Nr//2) % ELSIF is slightly indented
227		; I1 is I-Nr)
228		; I1=I).
229
230		update_indent(env(I,Stream,K,Opts),Delta,env(I1,Stream,K,Opts)) :- I1 is I+4*Delta.
231
232
233		is_ident_relevant_code(0'{,1).
234		is_ident_relevant_code(0'},-1).
235		is_ident_relevant_code(0'(,1).
236		is_ident_relevant_code(0'),-1).
237		is_ident_relevant_code(0'[,1).
238		is_ident_relevant_code(0'],-1).
239		is_ident_relevant_code(0'\|,0).
240		% TODO: should we treat •, . in quantifiers also like this?
241		% TODO: also treat =>, &, or, ... in a similar fashion?
242
243		is_indent_relevant_keyword('ANY',1).
244		is_indent_relevant_keyword('ASSERT',1).
245		is_indent_relevant_keyword('BEGIN',1).
246		is_indent_relevant_keyword('CASE',1).
247		is_indent_relevant_keyword('CHOICE',1).
248		is_indent_relevant_keyword('IF',1).
249		is_indent_relevant_keyword('LET',1).
250		is_indent_relevant_keyword('PRE',1).
251		is_indent_relevant_keyword('SELECT',1).
252		is_indent_relevant_keyword('VAR',1).
253		is_indent_relevant_keyword('WHILE',1).
254		% SIGMA, PI : accompanied by parentheses which will force indent
255
256		% TODO: support rules_dsl
257		% is_indent_relevant_keyword('FOR',1).
258
259		is_indent_relevant_keyword('END',-1).
260
261
262		is_indent_relevant_keyword('BE',0).
263		is_indent_relevant_keyword('DO',0).
264		is_indent_relevant_keyword('EITHER',0).
265		is_indent_relevant_keyword('ELSE',0).
266		is_indent_relevant_keyword('ELSIF',0).
267		is_indent_relevant_keyword('IN',0).
268		is_indent_relevant_keyword('OF',0).
269		is_indent_relevant_keyword('OR',0).
270		is_indent_relevant_keyword('THEN',0).
271		is_indent_relevant_keyword('WHEN',0).
272		is_indent_relevant_keyword('WHERE',0).
273		is_indent_relevant_keyword('WITH',0).
274		is_indent_relevant_keyword('WITNESS',0).
275
276
277