| 1 | :- module(data_generator, [generate_operation_data_from_machine_path/4]). | |
| 2 | ||
| 3 | :- use_module(library(sets), [subtract/3]). | |
| 4 | :- use_module(library(random), [random/3]). | |
| 5 | :- use_module(library(lists), [append/2,maplist/3,remove_dups/2,select/3]). | |
| 6 | ||
| 7 | :- use_module(probsrc(bmachine)). | |
| 8 | :- use_module(probsrc(solver_interface), [solve_predicate/5]). | |
| 9 | :- use_module(probsrc(translate), [translate_bexpression/2]). | |
| 10 | :- use_module(probsrc(preferences), [set_preference/2]). | |
| 11 | :- use_module(probsrc(tools), [flatten/2]). | |
| 12 | :- use_module(probsrc(module_information), [module_info/2]). | |
| 13 | :- use_module(probsrc(before_after_predicates), [before_after_predicate_for_operation/2]). | |
| 14 | :- use_module(probsrc(bsyntaxtree), [conjunct_predicates/2,add_texpr_infos/3]). | |
| 15 | ||
| 16 | :- use_module(synthesis('deep_learning/ground_truth')). | |
| 17 | :- use_module(synthesis('deep_learning/b_machine_identifier_normalization')). | |
| 18 | :- use_module(synthesis(synthesis_util), [get_precondition_from_operation_body/2, | |
| 19 | b_get_typed_invariant_from_machine/1, | |
| 20 | get_valid_inputs_for_operation/3, | |
| 21 | create_equality_nodes_from_example/2, | |
| 22 | get_output_nodes_from_bindings/2, | |
| 23 | reduce_input_to_violating_vars/3]). | |
| 24 | ||
| 25 | :- module_info(group, synthesis). | |
| 26 | :- module_info(description, 'This module generates data to train a neural network that predicts the library components that are necessary to synthesize a B operation\'s substitution for given I/O examples.'). | |
| 27 | ||
| 28 | min_amount_of_examples(3). | |
| 29 | max_amount_of_examples(16). | |
| 30 | ||
| 31 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
| 32 | is_classical_b_machine_path(AbsoluteFilePath), | |
| 33 | !, | |
| 34 | b_load_machine_from_file(AbsoluteFilePath), | |
| 35 | b_machine_precompile. | |
| 36 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
| 37 | b_load_eventb_project(AbsoluteFilePath), | |
| 38 | b_machine_precompile. | |
| 39 | % TODO: set desired timeout from java | |
| 40 | is_classical_b_machine_path(Path) :- | |
| 41 | atom(Path), | |
| 42 | atom_concat(_, Ext, Path), | |
| 43 | Ext == '.mch'. | |
| 44 | ||
| 45 | load_machine_for_data_generation(AbsoluteFilePath) :- | |
| 46 | b_machine_reset, | |
| 47 | set_preference(try_kodkod_on_load, false), | |
| 48 | load_b_or_eventb_machine(AbsoluteFilePath), | |
| 49 | \+ current_machine_uses_records, | |
| 50 | b_get_machine_variables(MachineVars), | |
| 51 | MachineVars \== []. | |
| 52 | ||
| 53 | get_all_operation_bodies_from_current_machine(NOperationBodies) :- | |
| 54 | findall((OperationName,OpReturnVars,OperationBody), b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), OperationBodies), | |
| 55 | skip_operations_using_records(OperationBodies, [], NOperationBodies). | |
| 56 | ||
| 57 | % We do not need records and we do not synthesize them by now. | |
| 58 | skip_operations_using_records([], Acc, Acc). | |
| 59 | skip_operations_using_records([OperationBody|T], Acc, NOperationBodies) :- | |
| 60 | \+ contains_record_type(OperationBody), | |
| 61 | !, | |
| 62 | skip_operations_using_records(T, [OperationBody|Acc], NOperationBodies). | |
| 63 | skip_operations_using_records([_|T], Acc, NOperationBodies) :- | |
| 64 | skip_operations_using_records(T, Acc, NOperationBodies). | |
| 65 | ||
| 66 | get_number_random_list(0, Acc, Acc) :- | |
| 67 | !. | |
| 68 | get_number_random_list(C, Acc, L) :- | |
| 69 | C1 is C-1, | |
| 70 | get_random_amount_of_examples(R), | |
| 71 | \+ member(R, Acc), | |
| 72 | !, | |
| 73 | get_number_random_list(C1, [R|Acc], L). | |
| 74 | get_number_random_list(C, Acc, L) :- | |
| 75 | get_number_random_list(C, Acc, L). | |
| 76 | ||
| 77 | b_get_machine_operation_any_parameters(OperationName, Ids) :- | |
| 78 | b_get_machine_operation(OperationName, _, _, Body), | |
| 79 | b_get_machine_operation_any_parameters_aux([], Body, Ids), | |
| 80 | !. | |
| 81 | b_get_machine_operation_any_parameters(_, []). | |
| 82 | ||
| 83 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
| 84 | Node =.. [Functor,AnyIds|Args], | |
| 85 | ( Functor = any | |
| 86 | ; Functor = let | |
| 87 | ), | |
| 88 | !, | |
| 89 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
| 90 | flatten(TArgIds, ArgIds), | |
| 91 | append([Acc,ArgIds,AnyIds], Ids). | |
| 92 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
| 93 | Node =.. [_|Args], | |
| 94 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
| 95 | flatten(TArgIds, ArgIds), | |
| 96 | append(Acc, ArgIds, Ids). | |
| 97 | b_get_machine_operation_any_parameters_aux(Acc, _, Acc). | |
| 98 | ||
| 99 | %%% | |
| 100 | generate_operation_data_from_machine_path(AbsoluteFilePath, Augmentations, SolverTimeoutMs, OperationData) :- | |
| 101 | retractall(tools:id_counter(_)), | |
| 102 | load_machine_for_data_generation(AbsoluteFilePath), | |
| 103 | get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames), | |
| 104 | get_all_operation_bodies_from_current_machine(OperationBodies), | |
| 105 | b_machine_name(MachineName), | |
| 106 | Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
| 107 | set_preference(time_out, SolverTimeoutMs), | |
| 108 | generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, OperationBodies, _NewEnv, [], OperationData). | |
| 109 | ||
| 110 | generate_data_from_operation_substitutions(_, _, _, [], _, Acc, Acc) :- | |
| 111 | !. | |
| 112 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)|T], NewEnv, Acc, OperationData) :- | |
| 113 | format('.', []), | |
| 114 | b_get_machine_operation_typed_parameters(OperationName, Parameters), | |
| 115 | b_get_machine_operation_any_parameters(OperationName, AnyParameters), | |
| 116 | append(Parameters, AnyParameters, TNParameters), | |
| 117 | findall(NP, ( member(P, TNParameters), | |
| 118 | add_texpr_infos(P, [parameter], NP) | |
| 119 | ), NParameters), | |
| 120 | get_ground_truth_from_operation_body(OpReturnVars, OperationBody, NParameters, GroundTruth), | |
| 121 | !, | |
| 122 | get_number_random_list(AR, [], RandomA), | |
| 123 | length(NParameters, ParameterAmount), | |
| 124 | select((global_ground_truth_params,TGlobalGt), GroundTruth, RestGt), | |
| 125 | extend_ground_truth(parameter, ParameterAmount, TGlobalGt, GlobalGt), | |
| 126 | enumerate_ids([], 'P', NParameters, EParameters), | |
| 127 | Env = [NormalizedStrings,NormalizedSets,NormalizedIds,NOperationNames], | |
| 128 | append(EParameters, NormalizedIds, NNormalizedIds), | |
| 129 | Env1 = [NormalizedStrings,NormalizedSets,NNormalizedIds,NOperationNames], | |
| 130 | normalize_ids_in_ground_truth(NormalizedIds, [(global_ground_truth_params,GlobalGt)|RestGt], NGroundTruth), | |
| 131 | generate_data_from_operation_substitutions_aux(Env1, 0, RandomA, AbsoluteFilePath-MachineName, OperationName, Env2, [], Data), | |
| 132 | generate_data_from_operation_substitutions(Env2, AR, AbsoluteFilePath-MachineName, T, NewEnv, [(Data,OperationName,NGroundTruth)|Acc], OperationData), | |
| 133 | !. | |
| 134 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(_,_,_)|T], NewEnv, Acc, OperationData) :- | |
| 135 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, T, NewEnv, Acc, OperationData), | |
| 136 | !. | |
| 137 | ||
| 138 | % Generate several records for each operation using a varying amount of examples in order to obtain more data. | |
| 139 | generate_data_from_operation_substitutions_aux(Env, _, [], _, _, Env, Acc, Acc) :- | |
| 140 | !. | |
| 141 | generate_data_from_operation_substitutions_aux(Env, _, [AmountOfExamples|AT], AbsoluteFilePath-MachineName, OperationName, NewEnv, Acc, Data) :- | |
| 142 | get_valid_inputs_for_operation(OperationName, AmountOfExamples, InputExamples), | |
| 143 | remove_dups(InputExamples, InputExamplesSet), | |
| 144 | InputExamplesSet \== [], | |
| 145 | InputExamplesSet \== [[]], | |
| 146 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, Env1), | |
| 147 | !, | |
| 148 | length(InputOutputTuples, Size), | |
| 149 | min_amount_of_examples(MinExamples), | |
| 150 | ( ( InputOutputTuples \== [], | |
| 151 | Size >= MinExamples | |
| 152 | ) | |
| 153 | -> | |
| 154 | AtLeastOneSolution = 1, | |
| 155 | NewAcc = [InputOutputTuples|Acc] | |
| 156 | ; % skip this operation if no output examples could be generated or no ground truth can be inferred | |
| 157 | AtLeastOneSolution = 0, | |
| 158 | NewAcc = Acc | |
| 159 | ), | |
| 160 | generate_data_from_operation_substitutions_aux(Env1, AtLeastOneSolution, AT, AbsoluteFilePath-MachineName, OperationName, NewEnv, NewAcc, Data). | |
| 161 | % no valid inputs can be generated | |
| 162 | generate_data_from_operation_substitutions_aux(Env, AtLeastOneSolution, _, AbsoluteFilePath-_, OperationName, Env, Acc, Data) :- | |
| 163 | AtLeastOneSolution == 0, | |
| 164 | !, | |
| 165 | format('~n~nWarning: No valid inputs could be generated for operation ~w in machine ~w.~n~n', [OperationName,AbsoluteFilePath]), | |
| 166 | Data = Acc. | |
| 167 | % at least one record could be generated but by excluding prior solutions we might not be able to generate as much records as initially planed | |
| 168 | generate_data_from_operation_substitutions_aux(Env, _, _, _, _, Env, Acc, Acc). | |
| 169 | %%% | |
| 170 | normalize_ids_in_ground_truth(_, [], []). | |
| 171 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(NVarName,GroundTruth)|NT]) :- | |
| 172 | member((b(identifier(VarName),_,_),b(identifier(NVarName),_,_)), NormalizedIds), | |
| 173 | !, | |
| 174 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
| 175 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(VarName,GroundTruth)|NT]) :- | |
| 176 | !, | |
| 177 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
| 178 | normalize_ids_in_ground_truth(_, [H|_], _) :- | |
| 179 | format("~nError: Ground truth accumulator ~w is not properly wrapped. Abort.~n~n", [H]), | |
| 180 | !, | |
| 181 | fail. | |
| 182 | ||
| 183 | get_random_amount_of_examples(AmountOfExamples) :- | |
| 184 | min_amount_of_examples(MinAmountOfExamples), | |
| 185 | max_amount_of_examples(TempMaxAmountOfExamples), | |
| 186 | MaxAmountOfExamples is TempMaxAmountOfExamples+1, | |
| 187 | random(MinAmountOfExamples, MaxAmountOfExamples, AmountOfExamples), | |
| 188 | !. | |
| 189 | ||
| 190 | create_example_triple(Env, Ast, (Value,NType,NVarName)) :- | |
| 191 | % ( | |
| 192 | Env = [_,NormalizedSets,_,_], | |
| 193 | b_machine_identifier_normalization:normalize_ids_in_b_ast(Env, Ast, NAst, _), % TODO update env? | |
| 194 | % ; trace , normalize_ids_in_b_ast(NormalizedSets,NormalizedIds,Ast,NAst)) , | |
| 195 | NAst = b(_,Type,NInfo), | |
| 196 | normalize_id_type(NormalizedSets, Type, NType), | |
| 197 | member(synthesis(machinevar,NVarName), NInfo), | |
| 198 | translate_bexpression(NAst, Value). | |
| 199 | ||
| 200 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, NewEnv) :- | |
| 201 | b_get_machine_operation(OperationName, _, _, Substitution), | |
| 202 | get_precondition_from_operation_body(Substitution, Precondition), | |
| 203 | b_get_typed_invariant_from_machine(Invariant), | |
| 204 | conjunct_predicates([Invariant,Precondition], InvariantAndPrecondition), | |
| 205 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, InputExamples, OperationName, [], InputOutputTuples, NewEnv). | |
| 206 | ||
| 207 | % Compute the corresponding output for each input example and return a list of I/O tuples. | |
| 208 | get_outputs_for_input_examples(Env, _, [], _, Acc, Acc, Env). | |
| 209 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, [InputExampleAst|T], OperationName, Acc, InputOutputTuples, NewEnv) :- | |
| 210 | create_equality_nodes_from_example(InputExampleAst, EqualityNodes), | |
| 211 | conjunct_predicates(EqualityNodes, EqualityPredicate), | |
| 212 | before_after_predicate_for_operation(OperationName, BeforeAfterPredicate), | |
| 213 | conjunct_predicates([InvariantAndPrecondition,EqualityPredicate,BeforeAfterPredicate], BeforeAfterPredicateWithEqualities), | |
| 214 | solve_predicate(BeforeAfterPredicateWithEqualities, _, 1, [force_evaluation], Solution), | |
| 215 | Solution = solution(SolutionBindings), | |
| 216 | get_output_nodes_from_bindings(SolutionBindings, OutputExampleAst), | |
| 217 | \+ contains_symbolic_value(OutputExampleAst), | |
| 218 | maplist(create_example_triple(Env), InputExampleAst, InputExample), | |
| 219 | findall(Name, member((_,_,Name), InputExample), InputVarNames), | |
| 220 | b_machine_identifier_normalization:map_normalize_ids_in_b_ast(Env, OutputExampleAst, OutputExampleAst2, Env1), | |
| 221 | reduce_input_to_violating_vars(InputVarNames, OutputExampleAst2, OutputExampleAst3), | |
| 222 | findall(N, ( member(b(_,_,I), OutputExampleAst3), | |
| 223 | member(synthesis(machinevar,N), I) | |
| 224 | ), OutputVarNames), | |
| 225 | subtract(InputVarNames, OutputVarNames, Sub), | |
| 226 | ( Sub == [] | |
| 227 | -> | |
| 228 | maplist(create_example_triple(Env1), OutputExampleAst3, OutputExample), | |
| 229 | get_outputs_for_input_examples(Env1, InvariantAndPrecondition, T, OperationName, [(InputExample,OutputExample)|Acc], InputOutputTuples, NewEnv) | |
| 230 | ; format("Error: Missing Output value for at least one variable. Probably a bug in normalize_ids_in_b_ast/4.~n", []), | |
| 231 | InputOutputTuples = Acc, | |
| 232 | NewEnv = Env1 | |
| 233 | ). | |
| 234 | get_outputs_for_input_examples(Env, _, _, _, Acc, Acc, Env). | |
| 235 | ||
| 236 | contains_symbolic_value(LoE) :- | |
| 237 | % TO DO: extend for lambda functions? | |
| 238 | member(E, LoE), | |
| 239 | contains_symbolic_value_aux(E). | |
| 240 | ||
| 241 | contains_symbolic_value_aux(b(value(closure(_,_,b(member(_,_),_,_))),_,_)). | |
| 242 | contains_symbolic_value_aux(b(Node,_,_)) :- | |
| 243 | Node =.. [_|Args], | |
| 244 | member(Arg, Args), | |
| 245 | contains_symbolic_value_aux(Arg). |