1 | :- module(data_generator, [generate_operation_data_from_machine_path/4]). | |
2 | ||
3 | :- use_module(library(sets), [subtract/3]). | |
4 | :- use_module(library(random), [random/3]). | |
5 | :- use_module(library(lists), [append/2,maplist/3,remove_dups/2,select/3]). | |
6 | ||
7 | :- use_module(probsrc(bmachine)). | |
8 | :- use_module(probsrc(solver_interface), [solve_predicate/5]). | |
9 | :- use_module(probsrc(translate), [translate_bexpression/2]). | |
10 | :- use_module(probsrc(preferences), [set_preference/2]). | |
11 | :- use_module(probsrc(tools), [flatten/2]). | |
12 | :- use_module(probsrc(module_information), [module_info/2]). | |
13 | :- use_module(probsrc(before_after_predicates), [before_after_predicate_for_operation/2]). | |
14 | :- use_module(probsrc(bsyntaxtree), [conjunct_predicates/2,add_texpr_infos/3]). | |
15 | ||
16 | :- use_module(synthesis('deep_learning/ground_truth')). | |
17 | :- use_module(synthesis('deep_learning/b_machine_identifier_normalization')). | |
18 | :- use_module(synthesis(synthesis_util), [get_precondition_from_operation_body/2, | |
19 | b_get_typed_invariant_from_machine/1, | |
20 | get_valid_inputs_for_operation/3, | |
21 | create_equality_nodes_from_example/2, | |
22 | get_output_nodes_from_bindings/2, | |
23 | reduce_input_to_violating_vars/3]). | |
24 | ||
25 | :- module_info(group, synthesis). | |
26 | :- module_info(description, 'This module generates data to train a neural network that predicts the library components that are necessary to synthesize a B operation\'s substitution for given I/O examples.'). | |
27 | ||
28 | min_amount_of_examples(3). | |
29 | max_amount_of_examples(16). | |
30 | ||
31 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
32 | is_classical_b_machine_path(AbsoluteFilePath), | |
33 | !, | |
34 | b_load_machine_from_file(AbsoluteFilePath), | |
35 | b_machine_precompile. | |
36 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
37 | b_load_eventb_project(AbsoluteFilePath), | |
38 | b_machine_precompile. | |
39 | % TODO: set desired timeout from java | |
40 | is_classical_b_machine_path(Path) :- | |
41 | atom(Path), | |
42 | atom_concat(_, Ext, Path), | |
43 | Ext == '.mch'. | |
44 | ||
45 | load_machine_for_data_generation(AbsoluteFilePath) :- | |
46 | b_machine_reset, | |
47 | set_preference(try_kodkod_on_load, false), | |
48 | load_b_or_eventb_machine(AbsoluteFilePath), | |
49 | \+ current_machine_uses_records, | |
50 | b_get_machine_variables(MachineVars), | |
51 | MachineVars \== []. | |
52 | ||
53 | get_all_operation_bodies_from_current_machine(NOperationBodies) :- | |
54 | findall((OperationName,OpReturnVars,OperationBody), b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), OperationBodies), | |
55 | skip_operations_using_records(OperationBodies, [], NOperationBodies). | |
56 | ||
57 | % We do not need records and we do not synthesize them by now. | |
58 | skip_operations_using_records([], Acc, Acc). | |
59 | skip_operations_using_records([OperationBody|T], Acc, NOperationBodies) :- | |
60 | \+ contains_record_type(OperationBody), | |
61 | !, | |
62 | skip_operations_using_records(T, [OperationBody|Acc], NOperationBodies). | |
63 | skip_operations_using_records([_|T], Acc, NOperationBodies) :- | |
64 | skip_operations_using_records(T, Acc, NOperationBodies). | |
65 | ||
66 | get_number_random_list(0, Acc, Acc) :- | |
67 | !. | |
68 | get_number_random_list(C, Acc, L) :- | |
69 | C1 is C-1, | |
70 | get_random_amount_of_examples(R), | |
71 | \+ member(R, Acc), | |
72 | !, | |
73 | get_number_random_list(C1, [R|Acc], L). | |
74 | get_number_random_list(C, Acc, L) :- | |
75 | get_number_random_list(C, Acc, L). | |
76 | ||
77 | b_get_machine_operation_any_parameters(OperationName, Ids) :- | |
78 | b_get_machine_operation(OperationName, _, _, Body), | |
79 | b_get_machine_operation_any_parameters_aux([], Body, Ids), | |
80 | !. | |
81 | b_get_machine_operation_any_parameters(_, []). | |
82 | ||
83 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
84 | Node =.. [Functor,AnyIds|Args], | |
85 | ( Functor = any | |
86 | ; Functor = let | |
87 | ), | |
88 | !, | |
89 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
90 | flatten(TArgIds, ArgIds), | |
91 | append([Acc,ArgIds,AnyIds], Ids). | |
92 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
93 | Node =.. [_|Args], | |
94 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
95 | flatten(TArgIds, ArgIds), | |
96 | append(Acc, ArgIds, Ids). | |
97 | b_get_machine_operation_any_parameters_aux(Acc, _, Acc). | |
98 | ||
99 | %%% | |
100 | generate_operation_data_from_machine_path(AbsoluteFilePath, Augmentations, SolverTimeoutMs, OperationData) :- | |
101 | retractall(tools:id_counter(_)), | |
102 | load_machine_for_data_generation(AbsoluteFilePath), | |
103 | get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames), | |
104 | get_all_operation_bodies_from_current_machine(OperationBodies), | |
105 | b_machine_name(MachineName), | |
106 | Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
107 | set_preference(time_out, SolverTimeoutMs), | |
108 | generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, OperationBodies, _NewEnv, [], OperationData). | |
109 | ||
110 | generate_data_from_operation_substitutions(_, _, _, [], _, Acc, Acc) :- | |
111 | !. | |
112 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)|T], NewEnv, Acc, OperationData) :- | |
113 | format('.', []), | |
114 | b_get_machine_operation_typed_parameters(OperationName, Parameters), | |
115 | b_get_machine_operation_any_parameters(OperationName, AnyParameters), | |
116 | append(Parameters, AnyParameters, TNParameters), | |
117 | findall(NP, ( member(P, TNParameters), | |
118 | add_texpr_infos(P, [parameter], NP) | |
119 | ), NParameters), | |
120 | get_ground_truth_from_operation_body(OpReturnVars, OperationBody, NParameters, GroundTruth), | |
121 | !, | |
122 | get_number_random_list(AR, [], RandomA), | |
123 | length(NParameters, ParameterAmount), | |
124 | select((global_ground_truth_params,TGlobalGt), GroundTruth, RestGt), | |
125 | extend_ground_truth(parameter, ParameterAmount, TGlobalGt, GlobalGt), | |
126 | enumerate_ids([], 'P', NParameters, EParameters), | |
127 | Env = [NormalizedStrings,NormalizedSets,NormalizedIds,NOperationNames], | |
128 | append(EParameters, NormalizedIds, NNormalizedIds), | |
129 | Env1 = [NormalizedStrings,NormalizedSets,NNormalizedIds,NOperationNames], | |
130 | normalize_ids_in_ground_truth(NormalizedIds, [(global_ground_truth_params,GlobalGt)|RestGt], NGroundTruth), | |
131 | generate_data_from_operation_substitutions_aux(Env1, 0, RandomA, AbsoluteFilePath-MachineName, OperationName, Env2, [], Data), | |
132 | generate_data_from_operation_substitutions(Env2, AR, AbsoluteFilePath-MachineName, T, NewEnv, [(Data,OperationName,NGroundTruth)|Acc], OperationData), | |
133 | !. | |
134 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(_,_,_)|T], NewEnv, Acc, OperationData) :- | |
135 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, T, NewEnv, Acc, OperationData), | |
136 | !. | |
137 | ||
138 | % Generate several records for each operation using a varying amount of examples in order to obtain more data. | |
139 | generate_data_from_operation_substitutions_aux(Env, _, [], _, _, Env, Acc, Acc) :- | |
140 | !. | |
141 | generate_data_from_operation_substitutions_aux(Env, _, [AmountOfExamples|AT], AbsoluteFilePath-MachineName, OperationName, NewEnv, Acc, Data) :- | |
142 | get_valid_inputs_for_operation(OperationName, AmountOfExamples, InputExamples), | |
143 | remove_dups(InputExamples, InputExamplesSet), | |
144 | InputExamplesSet \== [], | |
145 | InputExamplesSet \== [[]], | |
146 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, Env1), | |
147 | !, | |
148 | length(InputOutputTuples, Size), | |
149 | min_amount_of_examples(MinExamples), | |
150 | ( ( InputOutputTuples \== [], | |
151 | Size >= MinExamples | |
152 | ) | |
153 | -> | |
154 | AtLeastOneSolution = 1, | |
155 | NewAcc = [InputOutputTuples|Acc] | |
156 | ; % skip this operation if no output examples could be generated or no ground truth can be inferred | |
157 | AtLeastOneSolution = 0, | |
158 | NewAcc = Acc | |
159 | ), | |
160 | generate_data_from_operation_substitutions_aux(Env1, AtLeastOneSolution, AT, AbsoluteFilePath-MachineName, OperationName, NewEnv, NewAcc, Data). | |
161 | % no valid inputs can be generated | |
162 | generate_data_from_operation_substitutions_aux(Env, AtLeastOneSolution, _, AbsoluteFilePath-_, OperationName, Env, Acc, Data) :- | |
163 | AtLeastOneSolution == 0, | |
164 | !, | |
165 | format('~n~nWarning: No valid inputs could be generated for operation ~w in machine ~w.~n~n', [OperationName,AbsoluteFilePath]), | |
166 | Data = Acc. | |
167 | % at least one record could be generated but by excluding prior solutions we might not be able to generate as much records as initially planed | |
168 | generate_data_from_operation_substitutions_aux(Env, _, _, _, _, Env, Acc, Acc). | |
169 | %%% | |
170 | normalize_ids_in_ground_truth(_, [], []). | |
171 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(NVarName,GroundTruth)|NT]) :- | |
172 | member((b(identifier(VarName),_,_),b(identifier(NVarName),_,_)), NormalizedIds), | |
173 | !, | |
174 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
175 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(VarName,GroundTruth)|NT]) :- | |
176 | !, | |
177 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
178 | normalize_ids_in_ground_truth(_, [H|_], _) :- | |
179 | format("~nError: Ground truth accumulator ~w is not properly wrapped. Abort.~n~n", [H]), | |
180 | !, | |
181 | fail. | |
182 | ||
183 | get_random_amount_of_examples(AmountOfExamples) :- | |
184 | min_amount_of_examples(MinAmountOfExamples), | |
185 | max_amount_of_examples(TempMaxAmountOfExamples), | |
186 | MaxAmountOfExamples is TempMaxAmountOfExamples+1, | |
187 | random(MinAmountOfExamples, MaxAmountOfExamples, AmountOfExamples), | |
188 | !. | |
189 | ||
190 | create_example_triple(Env, Ast, (Value,NType,NVarName)) :- | |
191 | % ( | |
192 | Env = [_,NormalizedSets,_,_], | |
193 | b_machine_identifier_normalization:normalize_ids_in_b_ast(Env, Ast, NAst, _), % TODO update env? | |
194 | % ; trace , normalize_ids_in_b_ast(NormalizedSets,NormalizedIds,Ast,NAst)) , | |
195 | NAst = b(_,Type,NInfo), | |
196 | normalize_id_type(NormalizedSets, Type, NType), | |
197 | member(synthesis(machinevar,NVarName), NInfo), | |
198 | translate_bexpression(NAst, Value). | |
199 | ||
200 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, NewEnv) :- | |
201 | b_get_machine_operation(OperationName, _, _, Substitution), | |
202 | get_precondition_from_operation_body(Substitution, Precondition), | |
203 | b_get_typed_invariant_from_machine(Invariant), | |
204 | conjunct_predicates([Invariant,Precondition], InvariantAndPrecondition), | |
205 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, InputExamples, OperationName, [], InputOutputTuples, NewEnv). | |
206 | ||
207 | % Compute the corresponding output for each input example and return a list of I/O tuples. | |
208 | get_outputs_for_input_examples(Env, _, [], _, Acc, Acc, Env). | |
209 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, [InputExampleAst|T], OperationName, Acc, InputOutputTuples, NewEnv) :- | |
210 | create_equality_nodes_from_example(InputExampleAst, EqualityNodes), | |
211 | conjunct_predicates(EqualityNodes, EqualityPredicate), | |
212 | before_after_predicate_for_operation(OperationName, BeforeAfterPredicate), | |
213 | conjunct_predicates([InvariantAndPrecondition,EqualityPredicate,BeforeAfterPredicate], BeforeAfterPredicateWithEqualities), | |
214 | solve_predicate(BeforeAfterPredicateWithEqualities, _, 1, [force_evaluation], Solution), | |
215 | Solution = solution(SolutionBindings), | |
216 | get_output_nodes_from_bindings(SolutionBindings, OutputExampleAst), | |
217 | \+ contains_symbolic_value(OutputExampleAst), | |
218 | maplist(create_example_triple(Env), InputExampleAst, InputExample), | |
219 | findall(Name, member((_,_,Name), InputExample), InputVarNames), | |
220 | b_machine_identifier_normalization:map_normalize_ids_in_b_ast(Env, OutputExampleAst, OutputExampleAst2, Env1), | |
221 | reduce_input_to_violating_vars(InputVarNames, OutputExampleAst2, OutputExampleAst3), | |
222 | findall(N, ( member(b(_,_,I), OutputExampleAst3), | |
223 | member(synthesis(machinevar,N), I) | |
224 | ), OutputVarNames), | |
225 | subtract(InputVarNames, OutputVarNames, Sub), | |
226 | ( Sub == [] | |
227 | -> | |
228 | maplist(create_example_triple(Env1), OutputExampleAst3, OutputExample), | |
229 | get_outputs_for_input_examples(Env1, InvariantAndPrecondition, T, OperationName, [(InputExample,OutputExample)|Acc], InputOutputTuples, NewEnv) | |
230 | ; format("Error: Missing Output value for at least one variable. Probably a bug in normalize_ids_in_b_ast/4.~n", []), | |
231 | InputOutputTuples = Acc, | |
232 | NewEnv = Env1 | |
233 | ). | |
234 | get_outputs_for_input_examples(Env, _, _, _, Acc, Acc, Env). | |
235 | ||
236 | contains_symbolic_value(LoE) :- | |
237 | % TO DO: extend for lambda functions? | |
238 | member(E, LoE), | |
239 | contains_symbolic_value_aux(E). | |
240 | ||
241 | contains_symbolic_value_aux(b(value(closure(_,_,b(member(_,_),_,_))),_,_)). | |
242 | contains_symbolic_value_aux(b(Node,_,_)) :- | |
243 | Node =.. [_|Args], | |
244 | member(Arg, Args), | |
245 | contains_symbolic_value_aux(Arg). |