1 % (c) 2025-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
4
5 :- module(source_indenter,[indent_b_file/2, indent_b_file/3,
6 indent_b_file_to_codes/2, indent_b_file_to_codes/3,
7 reformat_b_file_to_codes/2]).
8
9
10 :- use_module(probsrc(module_information),[module_info/2]).
11 :- module_info(group,misc).
12 :- module_info(description,'This module can indent B source files using a lexing approach.').
13
14 % the idea is to keep track of keywords relevant of increase/decrease of indentation level
15 % it basically requires correct lexing of these keywords, and does not require parsing
16 % it can thus deal with comments anywhere in the source code and also with files that cannot be parsed
17 % it, however, currently does not check that opening/closing tags match
18
19 % options for:
20 % - indent_spaces/Nr: number of whitespaces per indentation :
21 % - insert_new_lines_before_keywords: whether to automatically insert newlines before keywords like ELSIF, ...
22 % - insert_new_lines_after_keywords: whether to automatically insert newlines after keywords like ELSIF, ...
23 % - whether to add current indent level inside comments or not
24 % TODO: parse special unicode identifiers, ... (ideally we would conform exactly to BLexer.java)
25
26 :- use_module(library(codesio), [with_output_to_codes/4]).
27
28 reformat_b_file_to_codes(File,Codes) :-
29 indent_b_file_to_codes(File,Codes,[insert_new_lines_before_keywords,insert_new_lines_after_keywords]).
30 indent_b_file_to_codes(File,Codes) :-
31 indent_b_file_to_codes(File,Codes,[]).
32 indent_b_file_to_codes(File,Codes,Opts) :-
33 with_output_to_codes(
34 indent_b_file(File,OutStream,Opts),
35 OutStream,
36 Codes, []).
37
38 indent_b_file(File,OutStream) :-
39 indent_b_file(File,OutStream,
40 [] %
41 % [insert_new_lines_after_keywords, insert_new_lines_before_keywords, half_indent_elsif, indent_spaces/8]
42 ).
43
44 indent_b_file(File,OutStream,Opts) :-
45 open(File,read,Stream,[encoding(utf8)]),
46 init_env(OutStream,Opts,Env),
47 call_cleanup(process_file(Stream,Env), close(Stream)).
48
49 process_file(Stream,Env) :-
50 read_line(Stream,Codes),
51 (Codes = end_of_file -> true
52 ; % format('>~s[~w]~n',[Codes,Env]),
53 (process_line(Env,Env1,Codes,[]) -> true
54 ; write(error),nl, trace, process_line(Env,Env1,Codes,[]),
55 Env2=Env),
56 output_new_line(Env1,Env2),
57 process_file(Stream,Env2)
58 ).
59
60
61 identifier(ID) --> [X], {is_alpha(X)}, id_aux(T), {atom_codes(ID,[X|T])}.
62
63 id_aux([0'_|T]) --> "_",!, id_aux(T).
64 id_aux([X|T]) --> [X], {is_numerical(X)},!, id_aux(T).
65 id_aux([X|T]) --> [X], {is_alpha(X)},!, id_aux(T).
66 id_aux([]) --> "".
67
68
69 is_numerical(Code) :- Code >= 48, Code =< 57. % 0-9
70
71 is_alpha(Code) :- Code >= 65, Code =< 90. % A-Z
72 is_alpha(Code) :- Code >= 97, Code =< 122. % a-z
73
74 process_line(env(I,Stream,Open,Opts),ResEnv) -->
75 {Open = open_special_token(Kind,Close)},!, % continue processing open special token
76 {(Kind = comment -> output_indent(env(I,Stream,Open,Opts)) ; true)},
77 process_special_token(Kind,Close,env(I,Stream,no_open_special_token,Opts),ResEnv).
78 process_line(Env,ResEnv) --> process_line2(Env,ResEnv).
79
80
81 process_line2(Env,ResEnv) --> ws, !, process_line2(Env,ResEnv).
82 process_line2(Env,ResEnv) --> identifier(ID),
83 {is_b_keyword(ID,section), \+ is_indent_relevant_keyword(ID,_)},!, % No indent
84 {output_atom(Env,ID),process_id(ID,Env,Env1)},
85 process_line_rest(Env1,ResEnv).
86 process_line2(Env,ResEnv) --> identifier(ID),
87 {is_indent_relevant_keyword(ID,Delta), Delta=<0},!,
88 % END, decrement indent immediately; ELSIF decrement for this line only
89 {process_id(ID,Env,Env1),
90 output_indent_delta(Delta,Env1),
91 output_atom(Env1,ID)},
92 process_line_after_id(ID,Env1,ResEnv).
93 process_line2(Env,ResEnv) --> [C],
94 {is_ident_relevant_code(C,Delta), Delta =< 0},!,
95 {update_indent(Env,Delta,Env1),
96 output_indent_delta(Delta,Env1), output_code(Env1,C)},
97 process_line_rest(Env1,ResEnv).
98 %process_line2(Env,ResEnv,T,T) :- T \= [_|_], !, write(xx(T)),ResEnv=Env. % finished
99 process_line2(Env,ResEnv) --> {output_indent(Env)}, process_line_rest(Env,ResEnv).
100
101 % continue processing line after an ID; optionally insert newlines
102 process_line_after_id(ID,Env,Res) -->
103 ({is_indent_relevant_keyword(ID,0),
104 option_set(insert_new_lines_after_keywords,Env)},
105 \+ at_end_of_line
106 -> process_new_line(Env,Res)
107 ; process_line_rest(Env,Res)
108 ).
109
110 at_end_of_line --> ws,!, at_end_of_line.
111 at_end_of_line --> "".
112
113
114 % output indent for a indent_relevant_keyword with given Delta value
115 output_indent_delta(Delta,Env1) :- Delta=0,!, % things like ELSIF, ...
116 dec_indent(Env1,Env2),% move ELSIF one to the left
117 output_indent(Env2).
118 output_indent_delta(_,Env) :- output_indent(Env).
119
120 process_line_rest(Env,Res) --> identifier(ID),!,
121 {(is_indent_relevant_keyword(ID,0),
122 option_set(insert_new_lines_before_keywords,Env)
123 -> output_new_line(Env,Env1),output_indent_delta(0,Env1) ; Env1=Env),
124 output_atom(Env1,ID),
125 process_id(ID,Env1,Env2)
126 },
127 process_line_after_id(ID,Env2,Res).
128 process_line_rest(Env,Res) --> new_line,!, process_new_line(Env,Res).
129 process_line_rest(Env,Res) --> line_comment,!, {output_codes(Env,"//")}, process_line_comment(Env,Res).
130 process_line_rest(Env,Res) --> open_special_token(Kind,Open,Close),!,
131 {output_codes(Env,Open)}, process_special_token(Kind,Close,Env,Res).
132 process_line_rest(In,Out) --> get_regular_token(Ts), !, {output_codes(In,Ts)}, process_line_rest(In,Out).
133 process_line_rest(In,Out) --> [C], {is_ident_relevant_code(C,Delta)},!,
134 {output_code(In,C),update_indent(In,Delta,In2)}, process_line_rest(In2,Out).
135 process_line_rest(In,Out) --> [C], !, {output_code(In,C)}, process_line_rest(In,Out).
136 process_line_rest(Env,Env) --> "".
137 % TODO: detect other comments, strings
138 % TODO: possibly insert new-lines for sections or things like ELSIF
139
140 process_new_line(Env,Env2) --> {output_new_line(Env,Env1)}, process_line2(Env1,Env2).
141
142 process_line_comment(Env,Env2) --> new_line,!, process_new_line(Env,Env2).
143 process_line_comment(Env,Env2) --> [X],!, {output_code(Env,X)}, process_line_comment(Env,Env2).
144 process_line_comment(Env,Env) --> "".
145
146 % TODO: process special encodings \", ....
147 process_special_token(string,Close,Env,OutEnv) --> special_string_encoding(Env),!,
148 process_special_token(string,Close,Env,OutEnv).
149 process_special_token(_Kind,Close,Env,OutEnv) --> Close,!, {output_codes(Env,Close)},
150 process_line_rest(Env,OutEnv).
151 process_special_token(Kind,Close,In,Out) --> [C], !, {output_code(In,C)}, process_special_token(Kind,Close,In,Out).
152 process_special_token(Kind,Close,Env,OutEnv) --> "", {register_open_special_token(Env,Kind,Close,OutEnv)}.
153
154 special_string_encoding(Env) --> "\\", [X], % TODO: check exactly which encodings are processed
155 {output_codes(Env,"\\"),output_code(Env,X)}.
156
157 ws --> " "; "\t".
158 new_line --> "\n". %,{inc_line_nr}.
159 line_comment --> "//".
160
161 % open special token whose contents should be ignored for indentation
162 open_special_token(comment,"/*","*/") --> "/*".
163 open_special_token(string,"\"","\"") --> "\"".
164 open_special_token(string,"'''","'''") --> "'''".
165 open_special_token(string,"```","```") --> "```".
166 open_special_token(identifier,"`","`") --> "`". % multi-line allowed??
167
168 % recognise some tokes, in particular those that contain indent relevant tokens like vertical bar |
169 get_regular_token("|->") --> "|->". % maplet
170 get_regular_token("||") --> "||".
171 get_regular_token("|>>") --> "|>>".
172 get_regular_token("|>") --> "|>".
173 get_regular_token("<<|") --> "<<|".
174 get_regular_token("<|") --> "<|".
175 get_regular_token("/|\\") --> "/|\\".
176 get_regular_token("\\|/") --> "\\|/".
177 get_regular_token("..") --> "..".
178 % below not really necessary?
179 get_regular_token("\\/") --> "\\/".
180 get_regular_token("/\\") --> "/\\".
181
182 % use a fact to keep track of line numbers
183 :- dynamic cur_line/1.
184 reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)).
185 inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)).
186 inc_line_nr.
187
188
189 init_env(OutStream,Opts,env(0,OutStream,no_open_special_token,Opts)).
190
191 option_set(Option,env(_,_,_,Opts)) :- member(Option,Opts).
192
193 register_open_special_token(env(C,Stream,_,Opts),Kind,CloseTag,
194 env(C,Stream,open_special_token(Kind,CloseTag),Opts)).
195
196 % output preds
197 output_indent(env(Nr,Str,K,Opts)) :- Nr>0, !, write(Str,' '), N1 is Nr-1, output_indent(env(N1,Str,K,Opts)).
198 output_indent(_).
199
200 output_new_line(env(E,Stream,K,Opts),env(E,Stream,K,Opts)) :- % format('//[~w]',[E]),
201 nl(Stream).
202
203 output_code(env(_,Stream,_,_),C) :- put_code(Stream,C).
204 output_codes(env(_,Stream,_,_),Cs) :- format(Stream,'~s',[Cs]).
205 output_atom(env(_,Stream,_,_),C) :- format(Stream,'~w',[C]).
206
207
208 :- use_module(probsrc(tools_matching),[is_b_keyword/2]).
209
210 process_id(ID,env(C,S,K,Opts),env(NC,S,K,Opts)) :- is_indent_relevant_keyword(ID,Delta),!,
211 get_indent_spaces(Opts,Nr), NC is C+Nr*Delta. % format('~w->~w ',[C,NC]).
212 process_id(ID,env(_,S,K,Opts),env(NC,S,K,Opts)) :- is_b_keyword(ID,section),!,
213 get_indent_spaces(Opts,NC). % reset to beginning of line
214 process_id(_ID,C,C). % :- format('?~w?',[_ID]).
215
216 get_indent_spaces(Opts,Nr) :- (member(indent_spaces/N,Opts) -> Nr=N ; Nr=4).
217
218 % dec indent for one line for ELSIF, ...
219 dec_indent(env(I,Stream,K,Opts),env(I1,Stream,K,Opts)) :-
220 (I>0 -> get_indent_spaces(Opts,Nr),
221 (member(half_indent_elsif,Opts) -> I1 is I-(Nr//2) % ELSIF is slightly indented
222 ; I1 is I-Nr)
223 ; I1=I).
224
225 update_indent(env(I,Stream,K,Opts),Delta,env(I1,Stream,K,Opts)) :- I1 is I+4*Delta.
226
227
228 is_ident_relevant_code(0'{,1).
229 is_ident_relevant_code(0'},-1).
230 is_ident_relevant_code(0'(,1).
231 is_ident_relevant_code(0'),-1).
232 is_ident_relevant_code(0'[,1).
233 is_ident_relevant_code(0'],-1).
234 is_ident_relevant_code(0'|,0).
235 % TODO: should we treat •, . in quantifiers also like this?
236 % TODO: also treat =>, &, or, ... in a similar fashion?
237
238 is_indent_relevant_keyword('ANY',1).
239 is_indent_relevant_keyword('ASSERT',1).
240 is_indent_relevant_keyword('BEGIN',1).
241 is_indent_relevant_keyword('CASE',1).
242 is_indent_relevant_keyword('CHOICE',1).
243 is_indent_relevant_keyword('IF',1).
244 is_indent_relevant_keyword('LET',1).
245 is_indent_relevant_keyword('PRE',1).
246 is_indent_relevant_keyword('SELECT',1).
247 is_indent_relevant_keyword('VAR',1).
248 is_indent_relevant_keyword('WHILE',1).
249 % SIGMA, PI : accompanied by parentheses which will force indent
250
251 % TODO: support rules_dsl
252 % is_indent_relevant_keyword('FOR',1).
253
254 is_indent_relevant_keyword('END',-1).
255
256
257 is_indent_relevant_keyword('BE',0).
258 is_indent_relevant_keyword('DO',0).
259 is_indent_relevant_keyword('EITHER',0).
260 is_indent_relevant_keyword('ELSE',0).
261 is_indent_relevant_keyword('ELSIF',0).
262 is_indent_relevant_keyword('IN',0).
263 is_indent_relevant_keyword('OF',0).
264 is_indent_relevant_keyword('OR',0).
265 is_indent_relevant_keyword('THEN',0).
266 is_indent_relevant_keyword('WHEN',0).
267 is_indent_relevant_keyword('WHERE',0).
268 is_indent_relevant_keyword('WITH',0).
269 is_indent_relevant_keyword('WITNESS',0).
270
271
272