1 | % (c) 2025-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
4 | ||
5 | :- module(source_indenter,[indent_b_file/2, indent_b_file_to_codes/2]). | |
6 | ||
7 | ||
8 | :- use_module(probsrc(module_information),[module_info/2]). | |
9 | :- module_info(group,misc). | |
10 | :- module_info(description,'This module can indent B source files using a lexing approach.'). | |
11 | ||
12 | % the idea is to keep track of keywords relevant of increase/decrease of indentation level | |
13 | % it basically requires correct lexing of these keywords, and does not require parsing | |
14 | % it can thus deal with comments anywhere in the source code and also with files that cannot be parsed | |
15 | % it, however, currently does not check that opening/closing tags match | |
16 | ||
17 | % TODO: provide options for: | |
18 | % - number of whitespaces per indentation, | |
19 | % - whether to automatically insert newlines for keywords like ELSIF, ... | |
20 | % - whether to add current indent level inside comments or not | |
21 | % TODO: parse special unicode identifiers, ... (ideally we would conform exactly to BLexer.java) | |
22 | ||
23 | :- use_module(library(codesio), [with_output_to_codes/4]). | |
24 | ||
25 | indent_b_file_to_codes(File,Codes) :- | |
26 | with_output_to_codes( | |
27 | indent_b_file(File,OutStream), | |
28 | OutStream, | |
29 | Codes, []). | |
30 | ||
31 | indent_b_file(File,OutStream) :- | |
32 | indent_b_file(File,OutStream, | |
33 | [] % | |
34 | % [insert_new_lines_after_keywords, insert_new_lines_before_keywords, half_indent_elsif, indent_spaces/8] | |
35 | ). | |
36 | ||
37 | indent_b_file(File,OutStream,Opts) :- | |
38 | open(File,read,Stream,[encoding(utf8)]), | |
39 | init_env(OutStream,Opts,Env), | |
40 | call_cleanup(process_file(Stream,Env), close(Stream)). | |
41 | ||
42 | process_file(Stream,Env) :- | |
43 | read_line(Stream,Codes), | |
44 | (Codes = end_of_file -> true | |
45 | ; % format('>~s[~w]~n',[Codes,Env]), | |
46 | (process_line(Env,Env1,Codes,[]) -> true | |
47 | ; write(error),nl, trace, process_line(Env,Env1,Codes,[]), | |
48 | Env2=Env), | |
49 | output_new_line(Env1,Env2), | |
50 | process_file(Stream,Env2) | |
51 | ). | |
52 | ||
53 | ||
54 | identifier(ID) --> [X], {is_alpha(X)}, id_aux(T), {atom_codes(ID,[X|T])}. | |
55 | ||
56 | id_aux([0'_|T]) --> "_",!, id_aux(T). | |
57 | id_aux([X|T]) --> [X], {is_numerical(X)},!, id_aux(T). | |
58 | id_aux([X|T]) --> [X], {is_alpha(X)},!, id_aux(T). | |
59 | id_aux([]) --> "". | |
60 | ||
61 | ||
62 | is_numerical(Code) :- Code >= 48, Code =< 57. % 0-9 | |
63 | ||
64 | is_alpha(Code) :- Code >= 65, Code =< 90. % A-Z | |
65 | is_alpha(Code) :- Code >= 97, Code =< 122. % a-z | |
66 | ||
67 | process_line(env(I,Stream,Open,Opts),ResEnv) --> | |
68 | {Open = open_special_token(Kind,Close)},!, % continue processing open special token | |
69 | {(Kind = comment -> output_indent(env(I,Stream,Open,Opts)) ; true)}, | |
70 | process_special_token(Kind,Close,env(I,Stream,no_open_special_token,Opts),ResEnv). | |
71 | process_line(Env,ResEnv) --> process_line2(Env,ResEnv). | |
72 | ||
73 | ||
74 | process_line2(Env,ResEnv) --> ws, !, process_line2(Env,ResEnv). | |
75 | process_line2(Env,ResEnv) --> identifier(ID), | |
76 | {is_b_keyword(ID,section), \+ is_indent_relevant_keyword(ID,_)},!, % No indent | |
77 | {output_atom(Env,ID),process_id(ID,Env,Env1)}, | |
78 | process_line_rest(Env1,ResEnv). | |
79 | process_line2(Env,ResEnv) --> identifier(ID), | |
80 | {is_indent_relevant_keyword(ID,Delta), Delta=<0},!, | |
81 | % END, decrement indent immediately; ELSIF decrement for this line only | |
82 | {process_id(ID,Env,Env1), | |
83 | output_indent_delta(Delta,Env1), | |
84 | output_atom(Env1,ID)}, | |
85 | process_line_after_id(ID,Env1,ResEnv). | |
86 | process_line2(Env,ResEnv) --> [C], | |
87 | {is_ident_relevant_code(C,Delta), Delta =< 0},!, | |
88 | {update_indent(Env,Delta,Env1), | |
89 | output_indent_delta(Delta,Env1), output_code(Env1,C)}, | |
90 | process_line_rest(Env1,ResEnv). | |
91 | %process_line2(Env,ResEnv,T,T) :- T \= [_|_], !, write(xx(T)),ResEnv=Env. % finished | |
92 | process_line2(Env,ResEnv) --> {output_indent(Env)}, process_line_rest(Env,ResEnv). | |
93 | ||
94 | % continue processing line after an ID; optionally insert newlines | |
95 | process_line_after_id(ID,Env,Res) --> | |
96 | ({is_indent_relevant_keyword(ID,0), | |
97 | option_set(insert_new_lines_after_keywords,Env)}, | |
98 | \+ at_end_of_line | |
99 | -> process_new_line(Env,Res) | |
100 | ; process_line_rest(Env,Res) | |
101 | ). | |
102 | ||
103 | at_end_of_line --> ws,!, at_end_of_line. | |
104 | at_end_of_line --> "". | |
105 | ||
106 | ||
107 | % output indent for a indent_relevant_keyword with given Delta value | |
108 | output_indent_delta(Delta,Env1) :- Delta=0,!, % things like ELSIF, ... | |
109 | dec_indent(Env1,Env2),% move ELSIF one to the left | |
110 | output_indent(Env2). | |
111 | output_indent_delta(_,Env) :- output_indent(Env). | |
112 | ||
113 | process_line_rest(Env,Res) --> identifier(ID),!, | |
114 | {(is_indent_relevant_keyword(ID,0), | |
115 | option_set(insert_new_lines_before_keywords,Env) | |
116 | -> output_new_line(Env,Env1),output_indent_delta(0,Env1) ; Env1=Env), | |
117 | output_atom(Env1,ID), | |
118 | process_id(ID,Env1,Env2) | |
119 | }, | |
120 | process_line_after_id(ID,Env2,Res). | |
121 | process_line_rest(Env,Res) --> new_line,!, process_new_line(Env,Res). | |
122 | process_line_rest(Env,Res) --> line_comment,!, {output_codes(Env,"//")}, process_line_comment(Env,Res). | |
123 | process_line_rest(Env,Res) --> open_special_token(Kind,Open,Close),!, | |
124 | {output_codes(Env,Open)}, process_special_token(Kind,Close,Env,Res). | |
125 | process_line_rest(In,Out) --> get_regular_token(Ts), !, {output_codes(In,Ts)}, process_line_rest(In,Out). | |
126 | process_line_rest(In,Out) --> [C], {is_ident_relevant_code(C,Delta)},!, | |
127 | {output_code(In,C),update_indent(In,Delta,In2)}, process_line_rest(In2,Out). | |
128 | process_line_rest(In,Out) --> [C], !, {output_code(In,C)}, process_line_rest(In,Out). | |
129 | process_line_rest(Env,Env) --> "". | |
130 | % TODO: detect other comments, strings | |
131 | % TODO: possibly insert new-lines for sections or things like ELSIF | |
132 | ||
133 | process_new_line(Env,Env2) --> {output_new_line(Env,Env1)}, process_line2(Env1,Env2). | |
134 | ||
135 | process_line_comment(Env,Env2) --> new_line,!, process_new_line(Env,Env2). | |
136 | process_line_comment(Env,Env2) --> [X],!, {output_code(Env,X)}, process_line_comment(Env,Env2). | |
137 | process_line_comment(Env,Env) --> "". | |
138 | ||
139 | % TODO: process special encodings \", .... | |
140 | process_special_token(string,Close,Env,OutEnv) --> special_string_encoding(Env),!, | |
141 | process_special_token(string,Close,Env,OutEnv). | |
142 | process_special_token(_Kind,Close,Env,OutEnv) --> Close,!, {output_codes(Env,Close)}, | |
143 | process_line_rest(Env,OutEnv). | |
144 | process_special_token(Kind,Close,In,Out) --> [C], !, {output_code(In,C)}, process_special_token(Kind,Close,In,Out). | |
145 | process_special_token(Kind,Close,Env,OutEnv) --> "", {register_open_special_token(Env,Kind,Close,OutEnv)}. | |
146 | ||
147 | special_string_encoding(Env) --> "\\", [X], % TODO: check exactly which encodings are processed | |
148 | {output_codes(Env,"\\"),output_code(Env,X)}. | |
149 | ||
150 | ws --> " "; "\t". | |
151 | new_line --> "\n". %,{inc_line_nr}. | |
152 | line_comment --> "//". | |
153 | ||
154 | % open special token whose contents should be ignored for indentation | |
155 | open_special_token(comment,"/*","*/") --> "/*". | |
156 | open_special_token(string,"\"","\"") --> "\"". | |
157 | open_special_token(string,"'''","'''") --> "'''". | |
158 | open_special_token(string,"```","```") --> "```". | |
159 | open_special_token(identifier,"`","`") --> "`". % multi-line allowed?? | |
160 | ||
161 | % recognise some tokes, in particular those that contain indent relevant tokens like vertical bar | | |
162 | get_regular_token("|->") --> "|->". % maplet | |
163 | get_regular_token("||") --> "||". | |
164 | get_regular_token("|>>") --> "|>>". | |
165 | get_regular_token("|>") --> "|>". | |
166 | get_regular_token("<<|") --> "<<|". | |
167 | get_regular_token("<|") --> "<|". | |
168 | get_regular_token("/|\\") --> "/|\\". | |
169 | get_regular_token("\\|/") --> "\\|/". | |
170 | get_regular_token("..") --> "..". | |
171 | % below not really necessary? | |
172 | get_regular_token("\\/") --> "\\/". | |
173 | get_regular_token("/\\") --> "/\\". | |
174 | ||
175 | % use a fact to keep track of line numbers | |
176 | :- dynamic cur_line/1. | |
177 | reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)). | |
178 | inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)). | |
179 | inc_line_nr. | |
180 | ||
181 | ||
182 | init_env(OutStream,Opts,env(0,OutStream,no_open_special_token,Opts)). | |
183 | ||
184 | option_set(Option,env(_,_,_,Opts)) :- member(Option,Opts). | |
185 | ||
186 | register_open_special_token(env(C,Stream,_,Opts),Kind,CloseTag, | |
187 | env(C,Stream,open_special_token(Kind,CloseTag),Opts)). | |
188 | ||
189 | % output preds | |
190 | output_indent(env(Nr,Str,K,Opts)) :- Nr>0, !, write(Str,' '), N1 is Nr-1, output_indent(env(N1,Str,K,Opts)). | |
191 | output_indent(_). | |
192 | ||
193 | output_new_line(env(E,Stream,K,Opts),env(E,Stream,K,Opts)) :- % format('//[~w]',[E]), | |
194 | nl(Stream). | |
195 | ||
196 | output_code(env(_,Stream,_,_),C) :- put_code(Stream,C). | |
197 | output_codes(env(_,Stream,_,_),Cs) :- format(Stream,'~s',[Cs]). | |
198 | output_atom(env(_,Stream,_,_),C) :- format(Stream,'~w',[C]). | |
199 | ||
200 | ||
201 | :- use_module(probsrc(tools_matching),[is_b_keyword/2]). | |
202 | ||
203 | process_id(ID,env(C,S,K,Opts),env(NC,S,K,Opts)) :- is_indent_relevant_keyword(ID,Delta),!, | |
204 | get_indent_spaces(Opts,Nr), NC is C+Nr*Delta. % format('~w->~w ',[C,NC]). | |
205 | process_id(ID,env(_,S,K,Opts),env(NC,S,K,Opts)) :- is_b_keyword(ID,section),!, | |
206 | get_indent_spaces(Opts,NC). % reset to beginning of line | |
207 | process_id(_ID,C,C). % :- format('?~w?',[_ID]). | |
208 | ||
209 | get_indent_spaces(Opts,Nr) :- (member(indent_spaces/N,Opts) -> Nr=N ; Nr=4). | |
210 | ||
211 | % dec indent for one line for ELSIF, ... | |
212 | dec_indent(env(I,Stream,K,Opts),env(I1,Stream,K,Opts)) :- | |
213 | (I>0 -> get_indent_spaces(Opts,Nr), | |
214 | (member(half_indent_elsif,Opts) -> I1 is I-(Nr//2) % ELSIF is slightly indented | |
215 | ; I1 is I-Nr) | |
216 | ; I1=I). | |
217 | ||
218 | update_indent(env(I,Stream,K,Opts),Delta,env(I1,Stream,K,Opts)) :- I1 is I+4*Delta. | |
219 | ||
220 | ||
221 | is_ident_relevant_code(0'{,1). | |
222 | is_ident_relevant_code(0'},-1). | |
223 | is_ident_relevant_code(0'(,1). | |
224 | is_ident_relevant_code(0'),-1). | |
225 | is_ident_relevant_code(0'[,1). | |
226 | is_ident_relevant_code(0'],-1). | |
227 | is_ident_relevant_code(0'|,0). | |
228 | % TODO: should we treat •, . in quantifiers also like this? | |
229 | % TODO: also treat =>, &, or, ... in a similar fashion? | |
230 | ||
231 | is_indent_relevant_keyword('ANY',1). | |
232 | is_indent_relevant_keyword('ASSERT',1). | |
233 | is_indent_relevant_keyword('BEGIN',1). | |
234 | is_indent_relevant_keyword('CASE',1). | |
235 | is_indent_relevant_keyword('CHOICE',1). | |
236 | is_indent_relevant_keyword('IF',1). | |
237 | is_indent_relevant_keyword('LET',1). | |
238 | is_indent_relevant_keyword('PRE',1). | |
239 | is_indent_relevant_keyword('SELECT',1). | |
240 | is_indent_relevant_keyword('VAR',1). | |
241 | is_indent_relevant_keyword('WHILE',1). | |
242 | % SIGMA, PI : accompanied by parentheses which will force indent | |
243 | ||
244 | % TODO: support rules_dsl | |
245 | % is_indent_relevant_keyword('FOR',1). | |
246 | ||
247 | is_indent_relevant_keyword('END',-1). | |
248 | ||
249 | ||
250 | is_indent_relevant_keyword('BE',0). | |
251 | is_indent_relevant_keyword('DO',0). | |
252 | is_indent_relevant_keyword('EITHER',0). | |
253 | is_indent_relevant_keyword('ELSE',0). | |
254 | is_indent_relevant_keyword('ELSIF',0). | |
255 | is_indent_relevant_keyword('IN',0). | |
256 | is_indent_relevant_keyword('OF',0). | |
257 | is_indent_relevant_keyword('OR',0). | |
258 | is_indent_relevant_keyword('THEN',0). | |
259 | is_indent_relevant_keyword('WHEN',0). | |
260 | is_indent_relevant_keyword('WHERE',0). | |
261 | is_indent_relevant_keyword('WITH',0). | |
262 | is_indent_relevant_keyword('WITNESS',0). | |
263 | ||
264 | ||
265 |