1 % (c) 2025-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
4
5 :- module(source_indenter,[indent_b_file/2, indent_b_file_to_codes/2]).
6
7
8 :- use_module(probsrc(module_information),[module_info/2]).
9 :- module_info(group,misc).
10 :- module_info(description,'This module can indent B source files using a lexing approach.').
11
12 % the idea is to keep track of keywords relevant of increase/decrease of indentation level
13 % it basically requires correct lexing of these keywords, and does not require parsing
14 % it can thus deal with comments anywhere in the source code and also with files that cannot be parsed
15 % it, however, currently does not check that opening/closing tags match
16
17 % TODO: provide options for:
18 % - number of whitespaces per indentation,
19 % - whether to automatically insert newlines for keywords like ELSIF, ...
20 % - whether to add current indent level inside comments or not
21 % TODO: parse special unicode identifiers, ... (ideally we would conform exactly to BLexer.java)
22
23 :- use_module(library(codesio), [with_output_to_codes/4]).
24
25 indent_b_file_to_codes(File,Codes) :-
26 with_output_to_codes(
27 indent_b_file(File,OutStream),
28 OutStream,
29 Codes, []).
30
31 indent_b_file(File,OutStream) :-
32 indent_b_file(File,OutStream,
33 [] %
34 % [insert_new_lines_after_keywords, insert_new_lines_before_keywords, half_indent_elsif, indent_spaces/8]
35 ).
36
37 indent_b_file(File,OutStream,Opts) :-
38 open(File,read,Stream,[encoding(utf8)]),
39 init_env(OutStream,Opts,Env),
40 call_cleanup(process_file(Stream,Env), close(Stream)).
41
42 process_file(Stream,Env) :-
43 read_line(Stream,Codes),
44 (Codes = end_of_file -> true
45 ; % format('>~s[~w]~n',[Codes,Env]),
46 (process_line(Env,Env1,Codes,[]) -> true
47 ; write(error),nl, trace, process_line(Env,Env1,Codes,[]),
48 Env2=Env),
49 output_new_line(Env1,Env2),
50 process_file(Stream,Env2)
51 ).
52
53
54 identifier(ID) --> [X], {is_alpha(X)}, id_aux(T), {atom_codes(ID,[X|T])}.
55
56 id_aux([0'_|T]) --> "_",!, id_aux(T).
57 id_aux([X|T]) --> [X], {is_numerical(X)},!, id_aux(T).
58 id_aux([X|T]) --> [X], {is_alpha(X)},!, id_aux(T).
59 id_aux([]) --> "".
60
61
62 is_numerical(Code) :- Code >= 48, Code =< 57. % 0-9
63
64 is_alpha(Code) :- Code >= 65, Code =< 90. % A-Z
65 is_alpha(Code) :- Code >= 97, Code =< 122. % a-z
66
67 process_line(env(I,Stream,Open,Opts),ResEnv) -->
68 {Open = open_special_token(Kind,Close)},!, % continue processing open special token
69 {(Kind = comment -> output_indent(env(I,Stream,Open,Opts)) ; true)},
70 process_special_token(Kind,Close,env(I,Stream,no_open_special_token,Opts),ResEnv).
71 process_line(Env,ResEnv) --> process_line2(Env,ResEnv).
72
73
74 process_line2(Env,ResEnv) --> ws, !, process_line2(Env,ResEnv).
75 process_line2(Env,ResEnv) --> identifier(ID),
76 {is_b_keyword(ID,section), \+ is_indent_relevant_keyword(ID,_)},!, % No indent
77 {output_atom(Env,ID),process_id(ID,Env,Env1)},
78 process_line_rest(Env1,ResEnv).
79 process_line2(Env,ResEnv) --> identifier(ID),
80 {is_indent_relevant_keyword(ID,Delta), Delta=<0},!,
81 % END, decrement indent immediately; ELSIF decrement for this line only
82 {process_id(ID,Env,Env1),
83 output_indent_delta(Delta,Env1),
84 output_atom(Env1,ID)},
85 process_line_after_id(ID,Env1,ResEnv).
86 process_line2(Env,ResEnv) --> [C],
87 {is_ident_relevant_code(C,Delta), Delta =< 0},!,
88 {update_indent(Env,Delta,Env1),
89 output_indent_delta(Delta,Env1), output_code(Env1,C)},
90 process_line_rest(Env1,ResEnv).
91 %process_line2(Env,ResEnv,T,T) :- T \= [_|_], !, write(xx(T)),ResEnv=Env. % finished
92 process_line2(Env,ResEnv) --> {output_indent(Env)}, process_line_rest(Env,ResEnv).
93
94 % continue processing line after an ID; optionally insert newlines
95 process_line_after_id(ID,Env,Res) -->
96 ({is_indent_relevant_keyword(ID,0),
97 option_set(insert_new_lines_after_keywords,Env)},
98 \+ at_end_of_line
99 -> process_new_line(Env,Res)
100 ; process_line_rest(Env,Res)
101 ).
102
103 at_end_of_line --> ws,!, at_end_of_line.
104 at_end_of_line --> "".
105
106
107 % output indent for a indent_relevant_keyword with given Delta value
108 output_indent_delta(Delta,Env1) :- Delta=0,!, % things like ELSIF, ...
109 dec_indent(Env1,Env2),% move ELSIF one to the left
110 output_indent(Env2).
111 output_indent_delta(_,Env) :- output_indent(Env).
112
113 process_line_rest(Env,Res) --> identifier(ID),!,
114 {(is_indent_relevant_keyword(ID,0),
115 option_set(insert_new_lines_before_keywords,Env)
116 -> output_new_line(Env,Env1),output_indent_delta(0,Env1) ; Env1=Env),
117 output_atom(Env1,ID),
118 process_id(ID,Env1,Env2)
119 },
120 process_line_after_id(ID,Env2,Res).
121 process_line_rest(Env,Res) --> new_line,!, process_new_line(Env,Res).
122 process_line_rest(Env,Res) --> line_comment,!, {output_codes(Env,"//")}, process_line_comment(Env,Res).
123 process_line_rest(Env,Res) --> open_special_token(Kind,Open,Close),!,
124 {output_codes(Env,Open)}, process_special_token(Kind,Close,Env,Res).
125 process_line_rest(In,Out) --> get_regular_token(Ts), !, {output_codes(In,Ts)}, process_line_rest(In,Out).
126 process_line_rest(In,Out) --> [C], {is_ident_relevant_code(C,Delta)},!,
127 {output_code(In,C),update_indent(In,Delta,In2)}, process_line_rest(In2,Out).
128 process_line_rest(In,Out) --> [C], !, {output_code(In,C)}, process_line_rest(In,Out).
129 process_line_rest(Env,Env) --> "".
130 % TODO: detect other comments, strings
131 % TODO: possibly insert new-lines for sections or things like ELSIF
132
133 process_new_line(Env,Env2) --> {output_new_line(Env,Env1)}, process_line2(Env1,Env2).
134
135 process_line_comment(Env,Env2) --> new_line,!, process_new_line(Env,Env2).
136 process_line_comment(Env,Env2) --> [X],!, {output_code(Env,X)}, process_line_comment(Env,Env2).
137 process_line_comment(Env,Env) --> "".
138
139 % TODO: process special encodings \", ....
140 process_special_token(string,Close,Env,OutEnv) --> special_string_encoding(Env),!,
141 process_special_token(string,Close,Env,OutEnv).
142 process_special_token(_Kind,Close,Env,OutEnv) --> Close,!, {output_codes(Env,Close)},
143 process_line_rest(Env,OutEnv).
144 process_special_token(Kind,Close,In,Out) --> [C], !, {output_code(In,C)}, process_special_token(Kind,Close,In,Out).
145 process_special_token(Kind,Close,Env,OutEnv) --> "", {register_open_special_token(Env,Kind,Close,OutEnv)}.
146
147 special_string_encoding(Env) --> "\\", [X], % TODO: check exactly which encodings are processed
148 {output_codes(Env,"\\"),output_code(Env,X)}.
149
150 ws --> " "; "\t".
151 new_line --> "\n". %,{inc_line_nr}.
152 line_comment --> "//".
153
154 % open special token whose contents should be ignored for indentation
155 open_special_token(comment,"/*","*/") --> "/*".
156 open_special_token(string,"\"","\"") --> "\"".
157 open_special_token(string,"'''","'''") --> "'''".
158 open_special_token(string,"```","```") --> "```".
159 open_special_token(identifier,"`","`") --> "`". % multi-line allowed??
160
161 % recognise some tokes, in particular those that contain indent relevant tokens like vertical bar |
162 get_regular_token("|->") --> "|->". % maplet
163 get_regular_token("||") --> "||".
164 get_regular_token("|>>") --> "|>>".
165 get_regular_token("|>") --> "|>".
166 get_regular_token("<<|") --> "<<|".
167 get_regular_token("<|") --> "<|".
168 get_regular_token("/|\\") --> "/|\\".
169 get_regular_token("\\|/") --> "\\|/".
170 get_regular_token("..") --> "..".
171 % below not really necessary?
172 get_regular_token("\\/") --> "\\/".
173 get_regular_token("/\\") --> "/\\".
174
175 % use a fact to keep track of line numbers
176 :- dynamic cur_line/1.
177 reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)).
178 inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)).
179 inc_line_nr.
180
181
182 init_env(OutStream,Opts,env(0,OutStream,no_open_special_token,Opts)).
183
184 option_set(Option,env(_,_,_,Opts)) :- member(Option,Opts).
185
186 register_open_special_token(env(C,Stream,_,Opts),Kind,CloseTag,
187 env(C,Stream,open_special_token(Kind,CloseTag),Opts)).
188
189 % output preds
190 output_indent(env(Nr,Str,K,Opts)) :- Nr>0, !, write(Str,' '), N1 is Nr-1, output_indent(env(N1,Str,K,Opts)).
191 output_indent(_).
192
193 output_new_line(env(E,Stream,K,Opts),env(E,Stream,K,Opts)) :- % format('//[~w]',[E]),
194 nl(Stream).
195
196 output_code(env(_,Stream,_,_),C) :- put_code(Stream,C).
197 output_codes(env(_,Stream,_,_),Cs) :- format(Stream,'~s',[Cs]).
198 output_atom(env(_,Stream,_,_),C) :- format(Stream,'~w',[C]).
199
200
201 :- use_module(probsrc(tools_matching),[is_b_keyword/2]).
202
203 process_id(ID,env(C,S,K,Opts),env(NC,S,K,Opts)) :- is_indent_relevant_keyword(ID,Delta),!,
204 get_indent_spaces(Opts,Nr), NC is C+Nr*Delta. % format('~w->~w ',[C,NC]).
205 process_id(ID,env(_,S,K,Opts),env(NC,S,K,Opts)) :- is_b_keyword(ID,section),!,
206 get_indent_spaces(Opts,NC). % reset to beginning of line
207 process_id(_ID,C,C). % :- format('?~w?',[_ID]).
208
209 get_indent_spaces(Opts,Nr) :- (member(indent_spaces/N,Opts) -> Nr=N ; Nr=4).
210
211 % dec indent for one line for ELSIF, ...
212 dec_indent(env(I,Stream,K,Opts),env(I1,Stream,K,Opts)) :-
213 (I>0 -> get_indent_spaces(Opts,Nr),
214 (member(half_indent_elsif,Opts) -> I1 is I-(Nr//2) % ELSIF is slightly indented
215 ; I1 is I-Nr)
216 ; I1=I).
217
218 update_indent(env(I,Stream,K,Opts),Delta,env(I1,Stream,K,Opts)) :- I1 is I+4*Delta.
219
220
221 is_ident_relevant_code(0'{,1).
222 is_ident_relevant_code(0'},-1).
223 is_ident_relevant_code(0'(,1).
224 is_ident_relevant_code(0'),-1).
225 is_ident_relevant_code(0'[,1).
226 is_ident_relevant_code(0'],-1).
227 is_ident_relevant_code(0'|,0).
228 % TODO: should we treat •, . in quantifiers also like this?
229 % TODO: also treat =>, &, or, ... in a similar fashion?
230
231 is_indent_relevant_keyword('ANY',1).
232 is_indent_relevant_keyword('ASSERT',1).
233 is_indent_relevant_keyword('BEGIN',1).
234 is_indent_relevant_keyword('CASE',1).
235 is_indent_relevant_keyword('CHOICE',1).
236 is_indent_relevant_keyword('IF',1).
237 is_indent_relevant_keyword('LET',1).
238 is_indent_relevant_keyword('PRE',1).
239 is_indent_relevant_keyword('SELECT',1).
240 is_indent_relevant_keyword('VAR',1).
241 is_indent_relevant_keyword('WHILE',1).
242 % SIGMA, PI : accompanied by parentheses which will force indent
243
244 % TODO: support rules_dsl
245 % is_indent_relevant_keyword('FOR',1).
246
247 is_indent_relevant_keyword('END',-1).
248
249
250 is_indent_relevant_keyword('BE',0).
251 is_indent_relevant_keyword('DO',0).
252 is_indent_relevant_keyword('EITHER',0).
253 is_indent_relevant_keyword('ELSE',0).
254 is_indent_relevant_keyword('ELSIF',0).
255 is_indent_relevant_keyword('IN',0).
256 is_indent_relevant_keyword('OF',0).
257 is_indent_relevant_keyword('OR',0).
258 is_indent_relevant_keyword('THEN',0).
259 is_indent_relevant_keyword('WHEN',0).
260 is_indent_relevant_keyword('WHERE',0).
261 is_indent_relevant_keyword('WITH',0).
262 is_indent_relevant_keyword('WITNESS',0).
263
264
265