1 | % (c) 2025-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
4 | ||
5 | :- module(source_indenter,[indent_b_file/2, indent_b_file/3, | |
6 | indent_b_file_to_codes/2, indent_b_file_to_codes/3, | |
7 | reformat_b_file_to_codes/2]). | |
8 | ||
9 | ||
10 | :- use_module(probsrc(module_information),[module_info/2]). | |
11 | :- module_info(group,misc). | |
12 | :- module_info(description,'This module can indent B source files using a lexing approach.'). | |
13 | ||
14 | % the idea is to keep track of keywords relevant of increase/decrease of indentation level | |
15 | % it basically requires correct lexing of these keywords, and does not require parsing | |
16 | % it can thus deal with comments anywhere in the source code and also with files that cannot be parsed | |
17 | % it, however, currently does not check that opening/closing tags match | |
18 | ||
19 | % options for: | |
20 | % - indent_spaces/Nr: number of whitespaces per indentation : | |
21 | % - insert_new_lines_before_keywords: whether to automatically insert newlines before keywords like ELSIF, ... | |
22 | % - insert_new_lines_after_keywords: whether to automatically insert newlines after keywords like ELSIF, ... | |
23 | % - whether to add current indent level inside comments or not | |
24 | % TODO: parse special unicode identifiers, ... (ideally we would conform exactly to BLexer.java) | |
25 | ||
26 | :- use_module(library(codesio), [with_output_to_codes/4]). | |
27 | ||
28 | reformat_b_file_to_codes(File,Codes) :- | |
29 | indent_b_file_to_codes(File,Codes,[insert_new_lines_before_keywords,insert_new_lines_after_keywords]). | |
30 | indent_b_file_to_codes(File,Codes) :- | |
31 | indent_b_file_to_codes(File,Codes,[]). | |
32 | indent_b_file_to_codes(File,Codes,Opts) :- | |
33 | with_output_to_codes( | |
34 | indent_b_file(File,OutStream,Opts), | |
35 | OutStream, | |
36 | Codes, []). | |
37 | ||
38 | indent_b_file(File,OutStream) :- | |
39 | indent_b_file(File,OutStream, | |
40 | [] % | |
41 | % [insert_new_lines_after_keywords, insert_new_lines_before_keywords, half_indent_elsif, indent_spaces/8] | |
42 | ). | |
43 | ||
44 | indent_b_file(File,OutStream,Opts) :- | |
45 | open(File,read,Stream,[encoding(utf8)]), | |
46 | init_env(OutStream,Opts,Env), | |
47 | call_cleanup(process_file(Stream,Env), close(Stream)). | |
48 | ||
49 | process_file(Stream,Env) :- | |
50 | read_line(Stream,Codes), | |
51 | (Codes = end_of_file -> true | |
52 | ; % format('>~s[~w]~n',[Codes,Env]), | |
53 | (process_line(Env,Env1,Codes,[]) -> true | |
54 | ; write(error),nl, trace, process_line(Env,Env1,Codes,[]), | |
55 | Env2=Env), | |
56 | output_new_line(Env1,Env2), | |
57 | process_file(Stream,Env2) | |
58 | ). | |
59 | ||
60 | ||
61 | identifier(ID) --> [X], {is_alpha(X)}, id_aux(T), {atom_codes(ID,[X|T])}. | |
62 | ||
63 | id_aux([0'_|T]) --> "_",!, id_aux(T). | |
64 | id_aux([X|T]) --> [X], {is_numerical(X)},!, id_aux(T). | |
65 | id_aux([X|T]) --> [X], {is_alpha(X)},!, id_aux(T). | |
66 | id_aux([]) --> "". | |
67 | ||
68 | ||
69 | is_numerical(Code) :- Code >= 48, Code =< 57. % 0-9 | |
70 | ||
71 | is_alpha(Code) :- Code >= 65, Code =< 90. % A-Z | |
72 | is_alpha(Code) :- Code >= 97, Code =< 122. % a-z | |
73 | ||
74 | process_line(env(I,Stream,Open,Opts),ResEnv) --> | |
75 | {Open = open_special_token(Kind,Close)},!, % continue processing open special token | |
76 | {(Kind = comment -> output_indent(env(I,Stream,Open,Opts)) ; true)}, | |
77 | process_special_token(Kind,Close,env(I,Stream,no_open_special_token,Opts),ResEnv). | |
78 | process_line(Env,ResEnv) --> process_line2(Env,ResEnv). | |
79 | ||
80 | ||
81 | process_line2(Env,ResEnv) --> ws, !, process_line2(Env,ResEnv). | |
82 | process_line2(Env,ResEnv) --> identifier(ID), | |
83 | {is_b_keyword(ID,section), \+ is_indent_relevant_keyword(ID,_)},!, % No indent | |
84 | {output_atom(Env,ID),process_id(ID,Env,Env1)}, | |
85 | process_line_rest(Env1,ResEnv). | |
86 | process_line2(Env,ResEnv) --> identifier(ID), | |
87 | {is_indent_relevant_keyword(ID,Delta), Delta=<0},!, | |
88 | % END, decrement indent immediately; ELSIF decrement for this line only | |
89 | {process_id(ID,Env,Env1), | |
90 | output_indent_delta(Delta,Env1), | |
91 | output_atom(Env1,ID)}, | |
92 | process_line_after_id(ID,Env1,ResEnv). | |
93 | process_line2(Env,ResEnv) --> [C], | |
94 | {is_ident_relevant_code(C,Delta), Delta =< 0},!, | |
95 | {update_indent(Env,Delta,Env1), | |
96 | output_indent_delta(Delta,Env1), output_code(Env1,C)}, | |
97 | process_line_rest(Env1,ResEnv). | |
98 | %process_line2(Env,ResEnv,T,T) :- T \= [_|_], !, write(xx(T)),ResEnv=Env. % finished | |
99 | process_line2(Env,ResEnv) --> {output_indent(Env)}, process_line_rest(Env,ResEnv). | |
100 | ||
101 | % continue processing line after an ID; optionally insert newlines | |
102 | process_line_after_id(ID,Env,Res) --> | |
103 | ({is_indent_relevant_keyword(ID,0), | |
104 | option_set(insert_new_lines_after_keywords,Env)}, | |
105 | \+ at_end_of_line | |
106 | -> process_new_line(Env,Res) | |
107 | ; process_line_rest(Env,Res) | |
108 | ). | |
109 | ||
110 | at_end_of_line --> ws,!, at_end_of_line. | |
111 | at_end_of_line --> "". | |
112 | ||
113 | ||
114 | % output indent for a indent_relevant_keyword with given Delta value | |
115 | output_indent_delta(Delta,Env1) :- Delta=0,!, % things like ELSIF, ... | |
116 | dec_indent(Env1,Env2),% move ELSIF one to the left | |
117 | output_indent(Env2). | |
118 | output_indent_delta(_,Env) :- output_indent(Env). | |
119 | ||
120 | process_line_rest(Env,Res) --> identifier(ID),!, | |
121 | {(is_indent_relevant_keyword(ID,0), | |
122 | option_set(insert_new_lines_before_keywords,Env) | |
123 | -> output_new_line(Env,Env1),output_indent_delta(0,Env1) ; Env1=Env), | |
124 | output_atom(Env1,ID), | |
125 | process_id(ID,Env1,Env2) | |
126 | }, | |
127 | process_line_after_id(ID,Env2,Res). | |
128 | process_line_rest(Env,Res) --> new_line,!, process_new_line(Env,Res). | |
129 | process_line_rest(Env,Res) --> line_comment,!, {output_codes(Env,"//")}, process_line_comment(Env,Res). | |
130 | process_line_rest(Env,Res) --> open_special_token(Kind,Open,Close),!, | |
131 | {output_codes(Env,Open)}, process_special_token(Kind,Close,Env,Res). | |
132 | process_line_rest(In,Out) --> get_regular_token(Ts), !, {output_codes(In,Ts)}, process_line_rest(In,Out). | |
133 | process_line_rest(In,Out) --> [C], {is_ident_relevant_code(C,Delta)},!, | |
134 | {output_code(In,C),update_indent(In,Delta,In2)}, process_line_rest(In2,Out). | |
135 | process_line_rest(In,Out) --> [C], !, {output_code(In,C)}, process_line_rest(In,Out). | |
136 | process_line_rest(Env,Env) --> "". | |
137 | % TODO: detect other comments, strings | |
138 | % TODO: possibly insert new-lines for sections or things like ELSIF | |
139 | ||
140 | process_new_line(Env,Env2) --> {output_new_line(Env,Env1)}, process_line2(Env1,Env2). | |
141 | ||
142 | process_line_comment(Env,Env2) --> new_line,!, process_new_line(Env,Env2). | |
143 | process_line_comment(Env,Env2) --> [X],!, {output_code(Env,X)}, process_line_comment(Env,Env2). | |
144 | process_line_comment(Env,Env) --> "". | |
145 | ||
146 | % TODO: process special encodings \", .... | |
147 | process_special_token(string,Close,Env,OutEnv) --> special_string_encoding(Env),!, | |
148 | process_special_token(string,Close,Env,OutEnv). | |
149 | process_special_token(_Kind,Close,Env,OutEnv) --> Close,!, {output_codes(Env,Close)}, | |
150 | process_line_rest(Env,OutEnv). | |
151 | process_special_token(Kind,Close,In,Out) --> [C], !, {output_code(In,C)}, process_special_token(Kind,Close,In,Out). | |
152 | process_special_token(Kind,Close,Env,OutEnv) --> "", {register_open_special_token(Env,Kind,Close,OutEnv)}. | |
153 | ||
154 | special_string_encoding(Env) --> "\\", [X], % TODO: check exactly which encodings are processed | |
155 | {output_codes(Env,"\\"),output_code(Env,X)}. | |
156 | ||
157 | ws --> " "; "\t". | |
158 | new_line --> "\n". %,{inc_line_nr}. | |
159 | line_comment --> "//". | |
160 | ||
161 | % open special token whose contents should be ignored for indentation | |
162 | open_special_token(comment,"/*","*/") --> "/*". | |
163 | open_special_token(string,"\"","\"") --> "\"". | |
164 | open_special_token(string,"'''","'''") --> "'''". | |
165 | open_special_token(string,"```","```") --> "```". | |
166 | open_special_token(identifier,"`","`") --> "`". % multi-line allowed?? | |
167 | ||
168 | % recognise some tokes, in particular those that contain indent relevant tokens like vertical bar | | |
169 | get_regular_token("|->") --> "|->". % maplet | |
170 | get_regular_token("||") --> "||". | |
171 | get_regular_token("|>>") --> "|>>". | |
172 | get_regular_token("|>") --> "|>". | |
173 | get_regular_token("<<|") --> "<<|". | |
174 | get_regular_token("<|") --> "<|". | |
175 | get_regular_token("/|\\") --> "/|\\". | |
176 | get_regular_token("\\|/") --> "\\|/". | |
177 | get_regular_token("..") --> "..". | |
178 | % below not really necessary? | |
179 | get_regular_token("\\/") --> "\\/". | |
180 | get_regular_token("/\\") --> "/\\". | |
181 | ||
182 | % use a fact to keep track of line numbers | |
183 | :- dynamic cur_line/1. | |
184 | reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)). | |
185 | inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)). | |
186 | inc_line_nr. | |
187 | ||
188 | ||
189 | init_env(OutStream,Opts,env(0,OutStream,no_open_special_token,Opts)). | |
190 | ||
191 | option_set(Option,env(_,_,_,Opts)) :- member(Option,Opts). | |
192 | ||
193 | register_open_special_token(env(C,Stream,_,Opts),Kind,CloseTag, | |
194 | env(C,Stream,open_special_token(Kind,CloseTag),Opts)). | |
195 | ||
196 | % output preds | |
197 | output_indent(env(Nr,Str,K,Opts)) :- Nr>0, !, write(Str,' '), N1 is Nr-1, output_indent(env(N1,Str,K,Opts)). | |
198 | output_indent(_). | |
199 | ||
200 | output_new_line(env(E,Stream,K,Opts),env(E,Stream,K,Opts)) :- % format('//[~w]',[E]), | |
201 | nl(Stream). | |
202 | ||
203 | output_code(env(_,Stream,_,_),C) :- put_code(Stream,C). | |
204 | output_codes(env(_,Stream,_,_),Cs) :- format(Stream,'~s',[Cs]). | |
205 | output_atom(env(_,Stream,_,_),C) :- format(Stream,'~w',[C]). | |
206 | ||
207 | ||
208 | :- use_module(probsrc(tools_matching),[is_b_keyword/2]). | |
209 | ||
210 | process_id(ID,env(C,S,K,Opts),env(NC,S,K,Opts)) :- is_indent_relevant_keyword(ID,Delta),!, | |
211 | get_indent_spaces(Opts,Nr), NC is C+Nr*Delta. % format('~w->~w ',[C,NC]). | |
212 | process_id(ID,env(_,S,K,Opts),env(NC,S,K,Opts)) :- is_b_keyword(ID,section),!, | |
213 | get_indent_spaces(Opts,NC). % reset to beginning of line | |
214 | process_id(_ID,C,C). % :- format('?~w?',[_ID]). | |
215 | ||
216 | get_indent_spaces(Opts,Nr) :- (member(indent_spaces/N,Opts) -> Nr=N ; Nr=4). | |
217 | ||
218 | % dec indent for one line for ELSIF, ... | |
219 | dec_indent(env(I,Stream,K,Opts),env(I1,Stream,K,Opts)) :- | |
220 | (I>0 -> get_indent_spaces(Opts,Nr), | |
221 | (member(half_indent_elsif,Opts) -> I1 is I-(Nr//2) % ELSIF is slightly indented | |
222 | ; I1 is I-Nr) | |
223 | ; I1=I). | |
224 | ||
225 | update_indent(env(I,Stream,K,Opts),Delta,env(I1,Stream,K,Opts)) :- I1 is I+4*Delta. | |
226 | ||
227 | ||
228 | is_ident_relevant_code(0'{,1). | |
229 | is_ident_relevant_code(0'},-1). | |
230 | is_ident_relevant_code(0'(,1). | |
231 | is_ident_relevant_code(0'),-1). | |
232 | is_ident_relevant_code(0'[,1). | |
233 | is_ident_relevant_code(0'],-1). | |
234 | is_ident_relevant_code(0'|,0). | |
235 | % TODO: should we treat •, . in quantifiers also like this? | |
236 | % TODO: also treat =>, &, or, ... in a similar fashion? | |
237 | ||
238 | is_indent_relevant_keyword('ANY',1). | |
239 | is_indent_relevant_keyword('ASSERT',1). | |
240 | is_indent_relevant_keyword('BEGIN',1). | |
241 | is_indent_relevant_keyword('CASE',1). | |
242 | is_indent_relevant_keyword('CHOICE',1). | |
243 | is_indent_relevant_keyword('IF',1). | |
244 | is_indent_relevant_keyword('LET',1). | |
245 | is_indent_relevant_keyword('PRE',1). | |
246 | is_indent_relevant_keyword('SELECT',1). | |
247 | is_indent_relevant_keyword('VAR',1). | |
248 | is_indent_relevant_keyword('WHILE',1). | |
249 | % SIGMA, PI : accompanied by parentheses which will force indent | |
250 | ||
251 | % TODO: support rules_dsl | |
252 | % is_indent_relevant_keyword('FOR',1). | |
253 | ||
254 | is_indent_relevant_keyword('END',-1). | |
255 | ||
256 | ||
257 | is_indent_relevant_keyword('BE',0). | |
258 | is_indent_relevant_keyword('DO',0). | |
259 | is_indent_relevant_keyword('EITHER',0). | |
260 | is_indent_relevant_keyword('ELSE',0). | |
261 | is_indent_relevant_keyword('ELSIF',0). | |
262 | is_indent_relevant_keyword('IN',0). | |
263 | is_indent_relevant_keyword('OF',0). | |
264 | is_indent_relevant_keyword('OR',0). | |
265 | is_indent_relevant_keyword('THEN',0). | |
266 | is_indent_relevant_keyword('WHEN',0). | |
267 | is_indent_relevant_keyword('WHERE',0). | |
268 | is_indent_relevant_keyword('WITH',0). | |
269 | is_indent_relevant_keyword('WITNESS',0). | |
270 | ||
271 | ||
272 |