1 | % (c) 2019-2022 Universitaet Duesseldorf | |
2 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
3 | ||
4 | :- module(operation_data_generator, [generate_operation_data_from_machine_path/4, | |
5 | generate_data_from_machine_operation/6]). | |
6 | ||
7 | :- use_module(library(sets), [subtract/3]). | |
8 | :- use_module(library(random), [random/3]). | |
9 | :- use_module(library(lists), [append/2,maplist/3,remove_dups/2,select/3]). | |
10 | ||
11 | :- use_module(probsrc(bmachine)). | |
12 | :- use_module(probsrc(solver_interface), [solve_predicate/5]). | |
13 | :- use_module(probsrc(translate), [translate_bexpression/2]). | |
14 | :- use_module(probsrc(preferences), [set_preference/2]). | |
15 | :- use_module(probsrc(tools), [flatten/2]). | |
16 | :- use_module(probsrc(module_information), [module_info/2]). | |
17 | :- use_module(extrasrc(before_after_predicates), [before_after_predicate_for_operation/2]). | |
18 | :- use_module(probsrc(bsyntaxtree), [conjunct_predicates/2,add_texpr_infos/3]). | |
19 | ||
20 | :- use_module(synthesis('deep_learning/ground_truth')). | |
21 | :- use_module(synthesis('deep_learning/b_machine_identifier_normalization')). | |
22 | :- use_module(synthesis(synthesis_util), [get_precondition_from_operation_body/2, | |
23 | b_get_typed_invariant_from_machine/1, | |
24 | get_valid_inputs_for_operation/3, | |
25 | create_equality_nodes_from_example/2, | |
26 | get_output_nodes_from_bindings/2, | |
27 | reduce_input_to_violating_vars/3]). | |
28 | ||
29 | :- module_info(group, synthesis). | |
30 | :- module_info(description, 'This module generates data to train a neural network that predicts the library components that are necessary to synthesize a B operation\'s substitution for given I/O examples.'). | |
31 | ||
32 | min_amount_of_examples(3). | |
33 | max_amount_of_examples(16). | |
34 | ||
35 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
36 | is_classical_b_machine_path(AbsoluteFilePath), | |
37 | !, | |
38 | b_load_machine_from_file(AbsoluteFilePath), | |
39 | b_machine_precompile. | |
40 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
41 | b_load_eventb_project(AbsoluteFilePath), | |
42 | b_machine_precompile. | |
43 | ||
44 | is_classical_b_machine_path(Path) :- | |
45 | atom(Path), | |
46 | atom_concat(_, Ext, Path), | |
47 | Ext == '.mch'. | |
48 | ||
49 | load_machine_for_data_generation(AbsoluteFilePath) :- | |
50 | b_machine_reset, | |
51 | set_preference(use_solver_on_load, prob), | |
52 | load_b_or_eventb_machine(AbsoluteFilePath), | |
53 | \+ current_machine_uses_records, | |
54 | b_get_machine_variables(MachineVars), | |
55 | MachineVars \== []. | |
56 | ||
57 | get_all_operation_bodies_from_current_machine(NOperationBodies) :- | |
58 | findall((OperationName,OpReturnVars,OperationBody), b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), OperationBodies), | |
59 | skip_operations_using_records(OperationBodies, [], NOperationBodies). | |
60 | ||
61 | % We do not need records and we do not synthesize them by now. | |
62 | skip_operations_using_records([], Acc, Acc). | |
63 | skip_operations_using_records([OperationBody|T], Acc, NOperationBodies) :- | |
64 | \+ contains_record_type(OperationBody), | |
65 | !, | |
66 | skip_operations_using_records(T, [OperationBody|Acc], NOperationBodies). | |
67 | skip_operations_using_records([_|T], Acc, NOperationBodies) :- | |
68 | skip_operations_using_records(T, Acc, NOperationBodies). | |
69 | ||
70 | get_number_random_list(0, Acc, Acc) :- | |
71 | !. | |
72 | get_number_random_list(C, Acc, L) :- | |
73 | C1 is C-1, | |
74 | get_random_amount_of_examples(R), | |
75 | \+ member(R, Acc), | |
76 | !, | |
77 | get_number_random_list(C1, [R|Acc], L). | |
78 | get_number_random_list(C, Acc, L) :- | |
79 | get_number_random_list(C, Acc, L). | |
80 | ||
81 | b_get_machine_operation_any_parameters(OperationName, Ids) :- | |
82 | b_get_machine_operation(OperationName, _, _, Body), | |
83 | b_get_machine_operation_any_parameters_aux([], Body, Ids), | |
84 | !. | |
85 | b_get_machine_operation_any_parameters(_, []). | |
86 | ||
87 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
88 | Node =.. [Functor,AnyIds|Args], | |
89 | ( Functor = any | |
90 | ; Functor = let | |
91 | ), | |
92 | !, | |
93 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
94 | flatten(TArgIds, ArgIds), | |
95 | append([Acc,ArgIds,AnyIds], Ids). | |
96 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
97 | Node =.. [_|Args], | |
98 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
99 | flatten(TArgIds, ArgIds), | |
100 | append(Acc, ArgIds, Ids). | |
101 | b_get_machine_operation_any_parameters_aux(Acc, _, Acc). | |
102 | ||
103 | %%% | |
104 | generate_operation_data_from_machine_path(AbsoluteFilePath, Augmentations, SolverTimeoutMs, OperationData) :- | |
105 | retractall(tools:id_counter(_)), | |
106 | load_machine_for_data_generation(AbsoluteFilePath), | |
107 | get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames), | |
108 | get_all_operation_bodies_from_current_machine(OperationBodies), | |
109 | b_machine_name(MachineName), | |
110 | Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
111 | set_preference(time_out, SolverTimeoutMs), | |
112 | generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, OperationBodies, _NewEnv, [], OperationData). | |
113 | ||
114 | %% generate_data_from_machine_operation(+OperationName, +Augmentations, +SolverTimeOut, +AbsoluteFilePath, +MachineName, -OperationData). | |
115 | % Assuming that the corresponding machine is loaded. | |
116 | generate_data_from_machine_operation(OperationName, Augmentations, SolverTimeoutMs, AbsoluteFilePath, MachineName, OperationData) :- | |
117 | retractall(tools:id_counter(_)), | |
118 | load_b_machine_if_unloaded(AbsoluteFilePath), | |
119 | set_preference(time_out, SolverTimeoutMs), | |
120 | b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), | |
121 | get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames), | |
122 | Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
123 | generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)], _, [], OperationData). | |
124 | ||
125 | generate_data_from_operation_substitutions(_, _, _, [], _, Acc, Acc) :- | |
126 | !. | |
127 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)|T], NewEnv, Acc, OperationData) :- | |
128 | format('.', []), | |
129 | b_get_machine_operation_typed_parameters(OperationName, Parameters), | |
130 | b_get_machine_operation_any_parameters(OperationName, AnyParameters), | |
131 | append(Parameters, AnyParameters, TNParameters), | |
132 | findall(NP, ( member(P, TNParameters), | |
133 | add_texpr_infos(P, [parameter], NP) | |
134 | ), NParameters), | |
135 | get_ground_truth_from_operation_body(OpReturnVars, OperationBody, NParameters, GroundTruth), | |
136 | !, | |
137 | get_number_random_list(AR, [], RandomA), | |
138 | length(NParameters, ParameterAmount), | |
139 | select((global_ground_truth_params,TGlobalGt), GroundTruth, RestGt), | |
140 | extend_ground_truth(parameter, ParameterAmount, TGlobalGt, GlobalGt), | |
141 | enumerate_ids([], 'P', NParameters, EParameters), | |
142 | Env = [NormalizedStrings,NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
143 | append(EParameters, NormalizedIds, NNormalizedIds), | |
144 | Env1 = [NormalizedStrings,NormalizedSets,NNormalizedIds,NOperationNames,NRecordFieldNames], | |
145 | normalize_ids_in_ground_truth(NormalizedIds, [(global_ground_truth_params,GlobalGt)|RestGt], NGroundTruth), | |
146 | generate_data_from_operation_substitutions_aux(Env1, 0, RandomA, AbsoluteFilePath-MachineName, OperationName, Env2, [], Data), | |
147 | generate_data_from_operation_substitutions(Env2, AR, AbsoluteFilePath-MachineName, T, NewEnv, [(Data,OperationName,NGroundTruth)|Acc], OperationData), | |
148 | !. | |
149 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(_,_,_)|T], NewEnv, Acc, OperationData) :- | |
150 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, T, NewEnv, Acc, OperationData), | |
151 | !. | |
152 | ||
153 | load_b_machine_if_unloaded(MachinePath) :- | |
154 | bmachine_is_precompiled, | |
155 | b_get_main_filename(LoadedMachinePath), | |
156 | atom_concat(_, MachinePath, LoadedMachinePath), | |
157 | !. | |
158 | load_b_machine_if_unloaded(MachinePath) :- | |
159 | b_load_machine_from_file(MachinePath), | |
160 | b_machine_precompile. | |
161 | ||
162 | % Generate several records for each operation using a varying amount of examples in order to obtain more data. | |
163 | generate_data_from_operation_substitutions_aux(Env, _, [], _, _, Env, Acc, Acc) :- | |
164 | !. | |
165 | generate_data_from_operation_substitutions_aux(Env, _, [AmountOfExamples|AT], AbsoluteFilePath-MachineName, OperationName, NewEnv, Acc, Data) :- | |
166 | get_valid_inputs_for_operation(OperationName, AmountOfExamples, InputExamples), | |
167 | remove_dups(InputExamples, InputExamplesSet), | |
168 | InputExamplesSet \== [], | |
169 | InputExamplesSet \== [[]], | |
170 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, Env1), | |
171 | !, | |
172 | length(InputOutputTuples, Size), | |
173 | min_amount_of_examples(MinExamples), | |
174 | ( ( InputOutputTuples \== [], | |
175 | Size >= MinExamples | |
176 | ) | |
177 | -> | |
178 | AtLeastOneSolution = 1, | |
179 | NewAcc = [InputOutputTuples|Acc] | |
180 | ; % skip this operation if no output examples could be generated or no ground truth can be inferred | |
181 | AtLeastOneSolution = 0, | |
182 | NewAcc = Acc | |
183 | ), | |
184 | generate_data_from_operation_substitutions_aux(Env1, AtLeastOneSolution, AT, AbsoluteFilePath-MachineName, OperationName, NewEnv, NewAcc, Data). | |
185 | % no valid inputs can be generated | |
186 | generate_data_from_operation_substitutions_aux(Env, AtLeastOneSolution, _, AbsoluteFilePath-_, OperationName, Env, Acc, Data) :- | |
187 | AtLeastOneSolution == 0, | |
188 | !, | |
189 | format('~n~nWarning: No valid inputs could be generated for operation ~w in machine ~w.~n~n', [OperationName,AbsoluteFilePath]), | |
190 | Data = Acc. | |
191 | % at least one record could be generated but by excluding prior solutions we might not be able to generate as much records as initially planed | |
192 | generate_data_from_operation_substitutions_aux(Env, _, _, _, _, Env, Acc, Acc). | |
193 | %%% | |
194 | normalize_ids_in_ground_truth(_, [], []). | |
195 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(NVarName,GroundTruth)|NT]) :- | |
196 | member((b(identifier(VarName),_,_),b(identifier(NVarName),_,_)), NormalizedIds), | |
197 | !, | |
198 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
199 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(VarName,GroundTruth)|NT]) :- | |
200 | !, | |
201 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
202 | normalize_ids_in_ground_truth(_, [H|_], _) :- | |
203 | format("~nError: Ground truth accumulator ~w is not properly wrapped. Abort.~n~n", [H]), | |
204 | !, | |
205 | fail. | |
206 | ||
207 | get_random_amount_of_examples(AmountOfExamples) :- | |
208 | min_amount_of_examples(MinAmountOfExamples), | |
209 | max_amount_of_examples(TempMaxAmountOfExamples), | |
210 | MaxAmountOfExamples is TempMaxAmountOfExamples+1, | |
211 | random(MinAmountOfExamples, MaxAmountOfExamples, AmountOfExamples), | |
212 | !. | |
213 | ||
214 | create_example_triple(Env, Ast, (NVarName,NType,Value)) :- | |
215 | % ( | |
216 | Env = [_,NormalizedSets,_,_,_], | |
217 | b_machine_identifier_normalization:normalize_ids_in_b_ast(Env, Ast, NAst, _), % TODO update env? | |
218 | % ; trace , normalize_ids_in_b_ast(NormalizedSets,NormalizedIds,Ast,NAst)) , | |
219 | NAst = b(_,Type,NInfo), | |
220 | normalize_id_type(NormalizedSets, Type, NType), | |
221 | member(synthesis(machinevar,NVarName), NInfo), | |
222 | translate_bexpression(NAst, Value). | |
223 | ||
224 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, NewEnv) :- | |
225 | b_get_machine_operation(OperationName, _, _, Substitution), | |
226 | get_precondition_from_operation_body(Substitution, Precondition), | |
227 | b_get_typed_invariant_from_machine(Invariant), | |
228 | conjunct_predicates([Invariant,Precondition], InvariantAndPrecondition), | |
229 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, InputExamples, OperationName, [], InputOutputTuples, NewEnv). | |
230 | ||
231 | % Compute the corresponding output for each input example and return a list of I/O tuples. | |
232 | get_outputs_for_input_examples(Env, _, [], _, Acc, Acc, Env). | |
233 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, [InputExampleAst|T], OperationName, Acc, InputOutputTuples, NewEnv) :- | |
234 | create_equality_nodes_from_example(InputExampleAst, EqualityNodes), | |
235 | conjunct_predicates(EqualityNodes, EqualityPredicate), | |
236 | before_after_predicate_for_operation(OperationName, BeforeAfterPredicate), | |
237 | conjunct_predicates([InvariantAndPrecondition,EqualityPredicate,BeforeAfterPredicate], BeforeAfterPredicateWithEqualities), | |
238 | solve_predicate(BeforeAfterPredicateWithEqualities, _, 1, [force_evaluation], Solution), | |
239 | Solution = solution(SolutionBindings), | |
240 | get_output_nodes_from_bindings(SolutionBindings, OutputExampleAst), | |
241 | \+ contains_symbolic_value(OutputExampleAst), | |
242 | maplist(create_example_triple(Env), InputExampleAst, InputExample), | |
243 | findall(Name, member((Name,_,_), InputExample), InputVarNames), | |
244 | b_machine_identifier_normalization:map_normalize_ids_in_b_ast(Env, OutputExampleAst, OutputExampleAst2, Env1), | |
245 | reduce_input_to_violating_vars(InputVarNames, OutputExampleAst2, OutputExampleAst3), | |
246 | findall(N, ( member(b(_,_,I), OutputExampleAst3), | |
247 | member(synthesis(machinevar,N), I) | |
248 | ), OutputVarNames), | |
249 | subtract(InputVarNames, OutputVarNames, Sub), | |
250 | ( Sub == [] | |
251 | -> | |
252 | maplist(create_example_triple(Env1), OutputExampleAst3, OutputExample), | |
253 | get_outputs_for_input_examples(Env1, InvariantAndPrecondition, T, OperationName, [(InputExample,OutputExample)|Acc], InputOutputTuples, NewEnv) | |
254 | ; format("Error: Missing Output value for at least one variable. Probably a bug in normalize_ids_in_b_ast/4.~n", []), | |
255 | InputOutputTuples = Acc, | |
256 | NewEnv = Env1 | |
257 | ). | |
258 | get_outputs_for_input_examples(Env, _, _, _, Acc, Acc, Env). | |
259 | ||
260 | contains_symbolic_value(LoE) :- | |
261 | % TO DO: extend for lambda functions? | |
262 | member(E, LoE), | |
263 | contains_symbolic_value_aux(E). | |
264 | ||
265 | contains_symbolic_value_aux(b(value(closure(_,_,b(member(_,_),_,_))),_,_)). | |
266 | contains_symbolic_value_aux(b(Node,_,_)) :- | |
267 | Node =.. [_|Args], | |
268 | member(Arg, Args), | |
269 | contains_symbolic_value_aux(Arg). |