| 1 | % (c) 2025-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
| 2 | % Heinrich Heine Universitaet Duesseldorf | |
| 3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
| 4 | ||
| 5 | :- module(source_indenter,[indent_b_file/2, indent_b_file/3, | |
| 6 | indent_b_file_to_codes/2, indent_b_file_to_codes/3, | |
| 7 | reformat_b_file_to_codes/2]). | |
| 8 | ||
| 9 | ||
| 10 | :- use_module(probsrc(module_information),[module_info/2]). | |
| 11 | :- module_info(group,misc). | |
| 12 | :- module_info(description,'This module can indent B source files using a lexing approach.'). | |
| 13 | ||
| 14 | % the idea is to keep track of keywords relevant of increase/decrease of indentation level | |
| 15 | % it basically requires correct lexing of these keywords, and does not require parsing | |
| 16 | % it can thus deal with comments anywhere in the source code and also with files that cannot be parsed | |
| 17 | % it, however, currently does not check that opening/closing tags match | |
| 18 | ||
| 19 | % options for: | |
| 20 | % - indent_spaces/Nr: number of whitespaces per indentation : | |
| 21 | % - insert_new_lines_before_keywords: whether to automatically insert newlines before keywords like ELSIF, ... | |
| 22 | % - insert_new_lines_after_keywords: whether to automatically insert newlines after keywords like ELSIF, ... | |
| 23 | % - whether to add current indent level inside comments or not | |
| 24 | % TODO: parse special unicode identifiers, ... (ideally we would conform exactly to BLexer.java) | |
| 25 | ||
| 26 | :- use_module(library(codesio), [with_output_to_codes/4]). | |
| 27 | ||
| 28 | reformat_b_file_to_codes(File,Codes) :- | |
| 29 | indent_b_file_to_codes(File,Codes,[insert_new_lines_before_keywords,insert_new_lines_after_keywords]). | |
| 30 | indent_b_file_to_codes(File,Codes) :- | |
| 31 | indent_b_file_to_codes(File,Codes,[]). | |
| 32 | indent_b_file_to_codes(File,Codes,Opts) :- | |
| 33 | with_output_to_codes( | |
| 34 | indent_b_file(File,OutStream,Opts), | |
| 35 | OutStream, | |
| 36 | Codes, []). | |
| 37 | ||
| 38 | indent_b_file(File,OutStream) :- | |
| 39 | indent_b_file(File,OutStream, | |
| 40 | [] % | |
| 41 | % [insert_new_lines_after_keywords, insert_new_lines_before_keywords, half_indent_elsif, indent_spaces/8] | |
| 42 | ). | |
| 43 | ||
| 44 | indent_b_file(File,OutStream,Opts) :- | |
| 45 | open(File,read,Stream,[encoding(utf8)]), | |
| 46 | init_env(OutStream,Opts,Env), | |
| 47 | call_cleanup(process_file(Stream,Env), close(Stream)). | |
| 48 | ||
| 49 | process_file(Stream,Env) :- | |
| 50 | read_line(Stream,Codes), | |
| 51 | (Codes = end_of_file -> true | |
| 52 | ; % format('>~s[~w]~n',[Codes,Env]), | |
| 53 | (process_line(Env,Env1,Codes,[]) -> true | |
| 54 | ; write(error),nl, trace, process_line(Env,Env1,Codes,[]), | |
| 55 | Env2=Env), | |
| 56 | output_new_line(Env1,Env2), | |
| 57 | process_file(Stream,Env2) | |
| 58 | ). | |
| 59 | ||
| 60 | ||
| 61 | identifier(ID) --> [X], {is_alpha(X)}, id_aux(T), {atom_codes(ID,[X|T])}. | |
| 62 | ||
| 63 | id_aux([0'_|T]) --> "_",!, id_aux(T). | |
| 64 | id_aux([X|T]) --> [X], {is_numerical(X)},!, id_aux(T). | |
| 65 | id_aux([X|T]) --> [X], {is_alpha(X)},!, id_aux(T). | |
| 66 | id_aux([]) --> "". | |
| 67 | ||
| 68 | ||
| 69 | is_numerical(Code) :- Code >= 48, Code =< 57. % 0-9 | |
| 70 | ||
| 71 | is_alpha(Code) :- Code >= 65, Code =< 90. % A-Z | |
| 72 | is_alpha(Code) :- Code >= 97, Code =< 122. % a-z | |
| 73 | ||
| 74 | process_line(env(I,Stream,Open,Opts),ResEnv) --> | |
| 75 | {Open = open_special_token(Kind,Close)},!, % continue processing open special token | |
| 76 | {(Kind = comment -> output_indent(env(I,Stream,Open,Opts)) ; true)}, | |
| 77 | process_special_token(Kind,Close,env(I,Stream,no_open_special_token,Opts),ResEnv). | |
| 78 | process_line(Env,ResEnv) --> process_line2(Env,ResEnv). | |
| 79 | ||
| 80 | ||
| 81 | process_line2(Env,ResEnv) --> ws, !, process_line2(Env,ResEnv). % ignore any leading ws on a line | |
| 82 | process_line2(Env,ResEnv) --> identifier(ID), | |
| 83 | {is_b_keyword(ID,section), \+ is_indent_relevant_keyword(ID,_)},!, % No indent | |
| 84 | {output_atom(Env,ID),process_id(ID,Env,Env1)}, | |
| 85 | process_line_rest(Env1,ResEnv). | |
| 86 | process_line2(Env,ResEnv) --> identifier(ID), | |
| 87 | {is_indent_relevant_keyword(ID,Delta), Delta=<0},!, | |
| 88 | % END, decrement indent immediately; ELSIF decrement for this line only | |
| 89 | {process_id(ID,Env,Env1), | |
| 90 | output_indent_delta(Delta,Env1), | |
| 91 | output_atom(Env1,ID)}, | |
| 92 | process_line_after_id(ID,Env1,ResEnv). | |
| 93 | process_line2(Env,ResEnv) --> [C], | |
| 94 | {is_ident_relevant_code(C,Delta), Delta =< 0},!, % open or closing brackets | |
| 95 | {update_indent(Env,Delta,Env1), | |
| 96 | output_indent_delta(Delta,Env1), output_code(Env1,C)}, | |
| 97 | process_line_rest(Env1,ResEnv). | |
| 98 | %process_line2(Env,ResEnv,T,T) :- T \= [_|_], !, write(xx(T)),ResEnv=Env. % finished | |
| 99 | process_line2(Env,ResEnv) --> {output_indent(Env)}, process_line_rest(Env,ResEnv). | |
| 100 | ||
| 101 | % continue processing line after an ID; optionally insert newlines | |
| 102 | process_line_after_id(ID,Env,Res) --> | |
| 103 | ({is_indent_relevant_keyword(ID,0), | |
| 104 | option_set(insert_new_lines_after_keywords,Env)}, | |
| 105 | \+ at_end_of_line | |
| 106 | -> process_new_line(Env,Res) | |
| 107 | ; process_line_rest(Env,Res) | |
| 108 | ). | |
| 109 | ||
| 110 | at_end_of_line --> ws,!, at_end_of_line. | |
| 111 | at_end_of_line --> "". | |
| 112 | ||
| 113 | ||
| 114 | % output indent for a indent_relevant_keyword with given Delta value | |
| 115 | output_indent_delta(Delta,Env1) :- Delta=0,!, % things like ELSIF, ... | |
| 116 | dec_indent(Env1,Env2),% move ELSIF one to the left | |
| 117 | output_indent(Env2). | |
| 118 | output_indent_delta(_,Env) :- output_indent(Env). | |
| 119 | ||
| 120 | process_line_rest(Env,Res) --> identifier(ID),!, | |
| 121 | {(is_indent_relevant_keyword(ID,0), | |
| 122 | option_set(insert_new_lines_before_keywords,Env) | |
| 123 | -> output_new_line(Env,Env1),output_indent_delta(0,Env1) ; Env1=Env), | |
| 124 | output_atom(Env1,ID), | |
| 125 | process_id(ID,Env1,Env2) | |
| 126 | }, | |
| 127 | process_line_after_id(ID,Env2,Res). | |
| 128 | process_line_rest(Env,Res) --> new_line,!, process_new_line(Env,Res). | |
| 129 | process_line_rest(Env,Res) --> line_comment,!, {output_codes(Env,"//")}, process_line_comment(Env,Res). | |
| 130 | process_line_rest(Env,Res) --> open_special_token(Kind,Open,Close),!, | |
| 131 | {output_codes(Env,Open)}, process_special_token(Kind,Close,Env,Res). | |
| 132 | process_line_rest(In,Out) --> get_regular_token(Ts), !, {output_codes(In,Ts)}, process_line_rest(In,Out). | |
| 133 | process_line_rest(In,Out) --> [C], {is_ident_relevant_code(C,Delta)},!, | |
| 134 | {output_code(In,C),update_indent(In,Delta,In2)}, process_line_rest(In2,Out). | |
| 135 | process_line_rest(In,Out) --> [C], !, {output_code(In,C)}, process_line_rest(In,Out). | |
| 136 | process_line_rest(Env,Env) --> "". | |
| 137 | % TODO: detect other comments, strings | |
| 138 | % TODO: possibly insert new-lines for sections or things like ELSIF | |
| 139 | ||
| 140 | process_new_line(Env,Env2) --> {output_new_line(Env,Env1)}, process_line2(Env1,Env2). | |
| 141 | ||
| 142 | process_line_comment(Env,Env2) --> new_line,!, process_new_line(Env,Env2). | |
| 143 | process_line_comment(Env,Env2) --> [X],!, {output_code(Env,X)}, process_line_comment(Env,Env2). | |
| 144 | process_line_comment(Env,Env) --> "". | |
| 145 | ||
| 146 | % TODO: process special encodings \", .... | |
| 147 | process_special_token(string,Close,Env,OutEnv) --> special_string_encoding(Env),!, | |
| 148 | process_special_token(string,Close,Env,OutEnv). | |
| 149 | process_special_token(_Kind,Close,Env,OutEnv) --> Close,!, {output_codes(Env,Close)}, | |
| 150 | process_line_rest(Env,OutEnv). | |
| 151 | process_special_token(Kind,Close,In,Out) --> [C], !, {output_code(In,C)}, process_special_token(Kind,Close,In,Out). | |
| 152 | process_special_token(Kind,Close,Env,OutEnv) --> "", {register_open_special_token(Env,Kind,Close,OutEnv)}. | |
| 153 | ||
| 154 | special_string_encoding(Env) --> "\\", [X], % TODO: check exactly which encodings are processed | |
| 155 | {output_codes(Env,"\\"),output_code(Env,X)}. | |
| 156 | ||
| 157 | ws --> " "; "\t". | |
| 158 | new_line --> "\n". %,{inc_line_nr}. | |
| 159 | line_comment --> "//". | |
| 160 | ||
| 161 | % open special token whose contents should be ignored for indentation | |
| 162 | open_special_token(comment,"/*","*/") --> "/*". | |
| 163 | open_special_token(string,"\"","\"") --> "\"". | |
| 164 | open_special_token(string,"'''","'''") --> "'''". | |
| 165 | open_special_token(string,"```","```") --> "```". | |
| 166 | open_special_token(identifier,"`","`") --> "`". % multi-line allowed?? | |
| 167 | ||
| 168 | % recognise some tokes, in particular those that contain indent relevant tokens like vertical bar | | |
| 169 | get_regular_token("|->") --> "|->". % maplet | |
| 170 | get_regular_token("||") --> "||". | |
| 171 | get_regular_token("|>>") --> "|>>". | |
| 172 | get_regular_token("|>") --> "|>". | |
| 173 | get_regular_token("<<|") --> "<<|". | |
| 174 | get_regular_token("<|") --> "<|". | |
| 175 | get_regular_token("/|\\") --> "/|\\". | |
| 176 | get_regular_token("\\|/") --> "\\|/". | |
| 177 | get_regular_token("..") --> "..". | |
| 178 | % below not really necessary? | |
| 179 | get_regular_token("\\/") --> "\\/". | |
| 180 | get_regular_token("/\\") --> "/\\". | |
| 181 | ||
| 182 | % use a fact to keep track of line numbers | |
| 183 | :- dynamic cur_line/1. | |
| 184 | reset_line_nr :- retractall(cur_line(_)), assertz(cur_line(1)). | |
| 185 | inc_line_nr :- cur_line(N), !, retractall(cur_line(_)), N1 is N+1, assertz(cur_line(N1)). | |
| 186 | inc_line_nr. | |
| 187 | ||
| 188 | ||
| 189 | init_env(OutStream,Opts,env(0,OutStream,no_open_special_token,Opts)). | |
| 190 | ||
| 191 | option_set(Option,env(_,_,_,Opts)) :- member(Option,Opts). | |
| 192 | ||
| 193 | register_open_special_token(env(C,Stream,_,Opts),Kind,CloseTag, | |
| 194 | env(C,Stream,open_special_token(Kind,CloseTag),Opts)). | |
| 195 | ||
| 196 | % output preds | |
| 197 | output_indent(env(Nr,Str,K,Opts)) :- Nr>0, !, write(Str,' '), N1 is Nr-1, output_indent(env(N1,Str,K,Opts)). | |
| 198 | output_indent(_). | |
| 199 | ||
| 200 | :- dynamic linecount/1. | |
| 201 | inc_linecount(X) :- retract(linecount(X)), !, X1 is X+1, assert(linecount(X1)). | |
| 202 | inc_linecount(0) :- assert(linecount(1)). | |
| 203 | ||
| 204 | output_new_line(env(E,Stream,K,Opts),env(E,Stream,K,Opts)) :- | |
| 205 | %inc_linecount(LC), format('//[~w - line ~w]',[E,LC]), % for debugging | |
| 206 | nl(Stream). | |
| 207 | ||
| 208 | output_code(env(_,Stream,_,_),C) :- put_code(Stream,C). | |
| 209 | output_codes(env(_,Stream,_,_),Cs) :- format(Stream,'~s',[Cs]). | |
| 210 | output_atom(env(_,Stream,_,_),C) :- format(Stream,'~w',[C]). | |
| 211 | ||
| 212 | ||
| 213 | :- use_module(probsrc(tools_matching),[is_b_keyword/2]). | |
| 214 | ||
| 215 | process_id(ID,env(C,S,K,Opts),env(NC,S,K,Opts)) :- is_indent_relevant_keyword(ID,Delta),!, | |
| 216 | get_indent_spaces(Opts,Nr), NC is C+Nr*Delta. % format('~w->~w ',[C,NC]). | |
| 217 | process_id(ID,env(_,S,K,Opts),env(NC,S,K,Opts)) :- is_b_keyword(ID,section),!, | |
| 218 | get_indent_spaces(Opts,NC). % reset to beginning of line | |
| 219 | process_id(_ID,C,C). % :- format('?~w?',[_ID]). | |
| 220 | ||
| 221 | get_indent_spaces(Opts,Nr) :- (member(indent_spaces/N,Opts) -> Nr=N ; Nr=4). | |
| 222 | ||
| 223 | % dec indent for one line for ELSIF, ... | |
| 224 | dec_indent(env(I,Stream,K,Opts),env(I1,Stream,K,Opts)) :- | |
| 225 | (I>0 -> get_indent_spaces(Opts,Nr), | |
| 226 | (member(half_indent_elsif,Opts) -> I1 is I-(Nr//2) % ELSIF is slightly indented | |
| 227 | ; I1 is I-Nr) | |
| 228 | ; I1=I). | |
| 229 | ||
| 230 | update_indent(env(I,Stream,K,Opts),Delta,env(I1,Stream,K,Opts)) :- I1 is I+4*Delta. | |
| 231 | ||
| 232 | ||
| 233 | is_ident_relevant_code(0'{,1). | |
| 234 | is_ident_relevant_code(0'},-1). | |
| 235 | is_ident_relevant_code(0'(,1). | |
| 236 | is_ident_relevant_code(0'),-1). | |
| 237 | is_ident_relevant_code(0'[,1). | |
| 238 | is_ident_relevant_code(0'],-1). | |
| 239 | is_ident_relevant_code(0'|,0). | |
| 240 | % TODO: should we treat •, . in quantifiers also like this? | |
| 241 | % TODO: also treat =>, &, or, ... in a similar fashion? | |
| 242 | ||
| 243 | is_indent_relevant_keyword('ANY',1). | |
| 244 | is_indent_relevant_keyword('ASSERT',1). | |
| 245 | is_indent_relevant_keyword('BEGIN',1). | |
| 246 | is_indent_relevant_keyword('CASE',1). | |
| 247 | is_indent_relevant_keyword('CHOICE',1). | |
| 248 | is_indent_relevant_keyword('IF',1). | |
| 249 | is_indent_relevant_keyword('LET',1). | |
| 250 | is_indent_relevant_keyword('PRE',1). | |
| 251 | is_indent_relevant_keyword('SELECT',1). | |
| 252 | is_indent_relevant_keyword('VAR',1). | |
| 253 | is_indent_relevant_keyword('WHILE',1). | |
| 254 | % SIGMA, PI : accompanied by parentheses which will force indent | |
| 255 | ||
| 256 | % TODO: support rules_dsl | |
| 257 | % is_indent_relevant_keyword('FOR',1). | |
| 258 | ||
| 259 | is_indent_relevant_keyword('END',-1). | |
| 260 | ||
| 261 | ||
| 262 | is_indent_relevant_keyword('BE',0). | |
| 263 | is_indent_relevant_keyword('DO',0). | |
| 264 | is_indent_relevant_keyword('EITHER',0). | |
| 265 | is_indent_relevant_keyword('ELSE',0). | |
| 266 | is_indent_relevant_keyword('ELSIF',0). | |
| 267 | is_indent_relevant_keyword('IN',0). | |
| 268 | is_indent_relevant_keyword('OF',0). | |
| 269 | is_indent_relevant_keyword('OR',0). | |
| 270 | is_indent_relevant_keyword('THEN',0). | |
| 271 | is_indent_relevant_keyword('WHEN',0). | |
| 272 | is_indent_relevant_keyword('WHERE',0). | |
| 273 | is_indent_relevant_keyword('WITH',0). | |
| 274 | is_indent_relevant_keyword('WITNESS',0). | |
| 275 | ||
| 276 | ||
| 277 |