| 1 | % (c) 2019-2022 Universitaet Duesseldorf | |
| 2 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
| 3 | ||
| 4 | :- module(operation_data_generator, [generate_operation_data_from_machine_path/4, | |
| 5 | generate_data_from_machine_operation/6]). | |
| 6 | ||
| 7 | :- use_module(library(sets), [subtract/3]). | |
| 8 | :- use_module(library(random), [random/3]). | |
| 9 | :- use_module(library(lists), [append/2,maplist/3,remove_dups/2,select/3]). | |
| 10 | ||
| 11 | :- use_module(probsrc(bmachine)). | |
| 12 | :- use_module(probsrc(solver_interface), [solve_predicate/5]). | |
| 13 | :- use_module(probsrc(translate), [translate_bexpression/2]). | |
| 14 | :- use_module(probsrc(preferences), [set_preference/2]). | |
| 15 | :- use_module(probsrc(tools), [flatten/2]). | |
| 16 | :- use_module(probsrc(module_information), [module_info/2]). | |
| 17 | :- use_module(extrasrc(before_after_predicates), [before_after_predicate_for_operation/2]). | |
| 18 | :- use_module(probsrc(bsyntaxtree), [conjunct_predicates/2,add_texpr_infos/3]). | |
| 19 | ||
| 20 | :- use_module(synthesis('deep_learning/ground_truth')). | |
| 21 | :- use_module(synthesis('deep_learning/b_machine_identifier_normalization')). | |
| 22 | :- use_module(synthesis(synthesis_util), [get_precondition_from_operation_body/2, | |
| 23 | b_get_typed_invariant_from_machine/1, | |
| 24 | get_valid_inputs_for_operation/3, | |
| 25 | create_equality_nodes_from_example/2, | |
| 26 | get_output_nodes_from_bindings/2, | |
| 27 | reduce_input_to_violating_vars/3]). | |
| 28 | ||
| 29 | :- module_info(group, synthesis). | |
| 30 | :- module_info(description, 'This module generates data to train a neural network that predicts the library components that are necessary to synthesize a B operation\'s substitution for given I/O examples.'). | |
| 31 | ||
| 32 | min_amount_of_examples(3). | |
| 33 | max_amount_of_examples(16). | |
| 34 | ||
| 35 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
| 36 | is_classical_b_machine_path(AbsoluteFilePath), | |
| 37 | !, | |
| 38 | b_load_machine_from_file(AbsoluteFilePath), | |
| 39 | b_machine_precompile. | |
| 40 | load_b_or_eventb_machine(AbsoluteFilePath) :- | |
| 41 | b_load_eventb_project(AbsoluteFilePath), | |
| 42 | b_machine_precompile. | |
| 43 | ||
| 44 | is_classical_b_machine_path(Path) :- | |
| 45 | atom(Path), | |
| 46 | atom_concat(_, Ext, Path), | |
| 47 | Ext == '.mch'. | |
| 48 | ||
| 49 | load_machine_for_data_generation(AbsoluteFilePath) :- | |
| 50 | b_machine_reset, | |
| 51 | set_preference(use_solver_on_load, prob), | |
| 52 | load_b_or_eventb_machine(AbsoluteFilePath), | |
| 53 | \+ current_machine_uses_records, | |
| 54 | b_get_machine_variables(MachineVars), | |
| 55 | MachineVars \== []. | |
| 56 | ||
| 57 | get_all_operation_bodies_from_current_machine(NOperationBodies) :- | |
| 58 | findall((OperationName,OpReturnVars,OperationBody), b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), OperationBodies), | |
| 59 | skip_operations_using_records(OperationBodies, [], NOperationBodies). | |
| 60 | ||
| 61 | % We do not need records and we do not synthesize them by now. | |
| 62 | skip_operations_using_records([], Acc, Acc). | |
| 63 | skip_operations_using_records([OperationBody|T], Acc, NOperationBodies) :- | |
| 64 | \+ contains_record_type(OperationBody), | |
| 65 | !, | |
| 66 | skip_operations_using_records(T, [OperationBody|Acc], NOperationBodies). | |
| 67 | skip_operations_using_records([_|T], Acc, NOperationBodies) :- | |
| 68 | skip_operations_using_records(T, Acc, NOperationBodies). | |
| 69 | ||
| 70 | get_number_random_list(0, Acc, Acc) :- | |
| 71 | !. | |
| 72 | get_number_random_list(C, Acc, L) :- | |
| 73 | C1 is C-1, | |
| 74 | get_random_amount_of_examples(R), | |
| 75 | \+ member(R, Acc), | |
| 76 | !, | |
| 77 | get_number_random_list(C1, [R|Acc], L). | |
| 78 | get_number_random_list(C, Acc, L) :- | |
| 79 | get_number_random_list(C, Acc, L). | |
| 80 | ||
| 81 | b_get_machine_operation_any_parameters(OperationName, Ids) :- | |
| 82 | b_get_machine_operation(OperationName, _, _, Body), | |
| 83 | b_get_machine_operation_any_parameters_aux([], Body, Ids), | |
| 84 | !. | |
| 85 | b_get_machine_operation_any_parameters(_, []). | |
| 86 | ||
| 87 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
| 88 | Node =.. [Functor,AnyIds|Args], | |
| 89 | ( Functor = any | |
| 90 | ; Functor = let | |
| 91 | ), | |
| 92 | !, | |
| 93 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
| 94 | flatten(TArgIds, ArgIds), | |
| 95 | append([Acc,ArgIds,AnyIds], Ids). | |
| 96 | b_get_machine_operation_any_parameters_aux(Acc, b(Node,_,_), Ids) :- | |
| 97 | Node =.. [_|Args], | |
| 98 | maplist(b_get_machine_operation_any_parameters_aux([]), Args, TArgIds), | |
| 99 | flatten(TArgIds, ArgIds), | |
| 100 | append(Acc, ArgIds, Ids). | |
| 101 | b_get_machine_operation_any_parameters_aux(Acc, _, Acc). | |
| 102 | ||
| 103 | %%% | |
| 104 | generate_operation_data_from_machine_path(AbsoluteFilePath, Augmentations, SolverTimeoutMs, OperationData) :- | |
| 105 | retractall(tools:id_counter(_)), | |
| 106 | load_machine_for_data_generation(AbsoluteFilePath), | |
| 107 | get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames), | |
| 108 | get_all_operation_bodies_from_current_machine(OperationBodies), | |
| 109 | b_machine_name(MachineName), | |
| 110 | Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
| 111 | set_preference(time_out, SolverTimeoutMs), | |
| 112 | generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, OperationBodies, _NewEnv, [], OperationData). | |
| 113 | ||
| 114 | %% generate_data_from_machine_operation(+OperationName, +Augmentations, +SolverTimeOut, +AbsoluteFilePath, +MachineName, -OperationData). | |
| 115 | % Assuming that the corresponding machine is loaded. | |
| 116 | generate_data_from_machine_operation(OperationName, Augmentations, SolverTimeoutMs, AbsoluteFilePath, MachineName, OperationData) :- | |
| 117 | retractall(tools:id_counter(_)), | |
| 118 | load_b_machine_if_unloaded(AbsoluteFilePath), | |
| 119 | set_preference(time_out, SolverTimeoutMs), | |
| 120 | b_get_machine_operation(OperationName, OpReturnVars, _, OperationBody), | |
| 121 | get_normalized_id_name_mapping(NormalizedSets, NormalizedIds, NOperationNames, NRecordFieldNames), | |
| 122 | Env = [[],NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
| 123 | generate_data_from_operation_substitutions(Env, Augmentations, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)], _, [], OperationData). | |
| 124 | ||
| 125 | generate_data_from_operation_substitutions(_, _, _, [], _, Acc, Acc) :- | |
| 126 | !. | |
| 127 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(OperationName,OpReturnVars,OperationBody)|T], NewEnv, Acc, OperationData) :- | |
| 128 | format('.', []), | |
| 129 | b_get_machine_operation_typed_parameters(OperationName, Parameters), | |
| 130 | b_get_machine_operation_any_parameters(OperationName, AnyParameters), | |
| 131 | append(Parameters, AnyParameters, TNParameters), | |
| 132 | findall(NP, ( member(P, TNParameters), | |
| 133 | add_texpr_infos(P, [parameter], NP) | |
| 134 | ), NParameters), | |
| 135 | get_ground_truth_from_operation_body(OpReturnVars, OperationBody, NParameters, GroundTruth), | |
| 136 | !, | |
| 137 | get_number_random_list(AR, [], RandomA), | |
| 138 | length(NParameters, ParameterAmount), | |
| 139 | select((global_ground_truth_params,TGlobalGt), GroundTruth, RestGt), | |
| 140 | extend_ground_truth(parameter, ParameterAmount, TGlobalGt, GlobalGt), | |
| 141 | enumerate_ids([], 'P', NParameters, EParameters), | |
| 142 | Env = [NormalizedStrings,NormalizedSets,NormalizedIds,NOperationNames,NRecordFieldNames], | |
| 143 | append(EParameters, NormalizedIds, NNormalizedIds), | |
| 144 | Env1 = [NormalizedStrings,NormalizedSets,NNormalizedIds,NOperationNames,NRecordFieldNames], | |
| 145 | normalize_ids_in_ground_truth(NormalizedIds, [(global_ground_truth_params,GlobalGt)|RestGt], NGroundTruth), | |
| 146 | generate_data_from_operation_substitutions_aux(Env1, 0, RandomA, AbsoluteFilePath-MachineName, OperationName, Env2, [], Data), | |
| 147 | generate_data_from_operation_substitutions(Env2, AR, AbsoluteFilePath-MachineName, T, NewEnv, [(Data,OperationName,NGroundTruth)|Acc], OperationData), | |
| 148 | !. | |
| 149 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, [(_,_,_)|T], NewEnv, Acc, OperationData) :- | |
| 150 | generate_data_from_operation_substitutions(Env, AR, AbsoluteFilePath-MachineName, T, NewEnv, Acc, OperationData), | |
| 151 | !. | |
| 152 | ||
| 153 | load_b_machine_if_unloaded(MachinePath) :- | |
| 154 | bmachine_is_precompiled, | |
| 155 | b_get_main_filename(LoadedMachinePath), | |
| 156 | atom_concat(_, MachinePath, LoadedMachinePath), | |
| 157 | !. | |
| 158 | load_b_machine_if_unloaded(MachinePath) :- | |
| 159 | b_load_machine_from_file(MachinePath), | |
| 160 | b_machine_precompile. | |
| 161 | ||
| 162 | % Generate several records for each operation using a varying amount of examples in order to obtain more data. | |
| 163 | generate_data_from_operation_substitutions_aux(Env, _, [], _, _, Env, Acc, Acc) :- | |
| 164 | !. | |
| 165 | generate_data_from_operation_substitutions_aux(Env, _, [AmountOfExamples|AT], AbsoluteFilePath-MachineName, OperationName, NewEnv, Acc, Data) :- | |
| 166 | get_valid_inputs_for_operation(OperationName, AmountOfExamples, InputExamples), | |
| 167 | remove_dups(InputExamples, InputExamplesSet), | |
| 168 | InputExamplesSet \== [], | |
| 169 | InputExamplesSet \== [[]], | |
| 170 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, Env1), | |
| 171 | !, | |
| 172 | length(InputOutputTuples, Size), | |
| 173 | min_amount_of_examples(MinExamples), | |
| 174 | ( ( InputOutputTuples \== [], | |
| 175 | Size >= MinExamples | |
| 176 | ) | |
| 177 | -> | |
| 178 | AtLeastOneSolution = 1, | |
| 179 | NewAcc = [InputOutputTuples|Acc] | |
| 180 | ; % skip this operation if no output examples could be generated or no ground truth can be inferred | |
| 181 | AtLeastOneSolution = 0, | |
| 182 | NewAcc = Acc | |
| 183 | ), | |
| 184 | generate_data_from_operation_substitutions_aux(Env1, AtLeastOneSolution, AT, AbsoluteFilePath-MachineName, OperationName, NewEnv, NewAcc, Data). | |
| 185 | % no valid inputs can be generated | |
| 186 | generate_data_from_operation_substitutions_aux(Env, AtLeastOneSolution, _, AbsoluteFilePath-_, OperationName, Env, Acc, Data) :- | |
| 187 | AtLeastOneSolution == 0, | |
| 188 | !, | |
| 189 | format('~n~nWarning: No valid inputs could be generated for operation ~w in machine ~w.~n~n', [OperationName,AbsoluteFilePath]), | |
| 190 | Data = Acc. | |
| 191 | % at least one record could be generated but by excluding prior solutions we might not be able to generate as much records as initially planed | |
| 192 | generate_data_from_operation_substitutions_aux(Env, _, _, _, _, Env, Acc, Acc). | |
| 193 | %%% | |
| 194 | normalize_ids_in_ground_truth(_, [], []). | |
| 195 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(NVarName,GroundTruth)|NT]) :- | |
| 196 | member((b(identifier(VarName),_,_),b(identifier(NVarName),_,_)), NormalizedIds), | |
| 197 | !, | |
| 198 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
| 199 | normalize_ids_in_ground_truth(NormalizedIds, [(VarName,GroundTruth)|T], [(VarName,GroundTruth)|NT]) :- | |
| 200 | !, | |
| 201 | normalize_ids_in_ground_truth(NormalizedIds, T, NT). | |
| 202 | normalize_ids_in_ground_truth(_, [H|_], _) :- | |
| 203 | format("~nError: Ground truth accumulator ~w is not properly wrapped. Abort.~n~n", [H]), | |
| 204 | !, | |
| 205 | fail. | |
| 206 | ||
| 207 | get_random_amount_of_examples(AmountOfExamples) :- | |
| 208 | min_amount_of_examples(MinAmountOfExamples), | |
| 209 | max_amount_of_examples(TempMaxAmountOfExamples), | |
| 210 | MaxAmountOfExamples is TempMaxAmountOfExamples+1, | |
| 211 | random(MinAmountOfExamples, MaxAmountOfExamples, AmountOfExamples), | |
| 212 | !. | |
| 213 | ||
| 214 | create_example_triple(Env, Ast, (NVarName,NType,Value)) :- | |
| 215 | % ( | |
| 216 | Env = [_,NormalizedSets,_,_,_], | |
| 217 | b_machine_identifier_normalization:normalize_ids_in_b_ast(Env, Ast, NAst, _), % TODO update env? | |
| 218 | % ; trace , normalize_ids_in_b_ast(NormalizedSets,NormalizedIds,Ast,NAst)) , | |
| 219 | NAst = b(_,Type,NInfo), | |
| 220 | normalize_id_type(NormalizedSets, Type, NType), | |
| 221 | member(synthesis(machinevar,NVarName), NInfo), | |
| 222 | translate_bexpression(NAst, Value). | |
| 223 | ||
| 224 | get_outputs_for_input_examples(Env, InputExamples, OperationName, InputOutputTuples, NewEnv) :- | |
| 225 | b_get_machine_operation(OperationName, _, _, Substitution), | |
| 226 | get_precondition_from_operation_body(Substitution, Precondition), | |
| 227 | b_get_typed_invariant_from_machine(Invariant), | |
| 228 | conjunct_predicates([Invariant,Precondition], InvariantAndPrecondition), | |
| 229 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, InputExamples, OperationName, [], InputOutputTuples, NewEnv). | |
| 230 | ||
| 231 | % Compute the corresponding output for each input example and return a list of I/O tuples. | |
| 232 | get_outputs_for_input_examples(Env, _, [], _, Acc, Acc, Env). | |
| 233 | get_outputs_for_input_examples(Env, InvariantAndPrecondition, [InputExampleAst|T], OperationName, Acc, InputOutputTuples, NewEnv) :- | |
| 234 | create_equality_nodes_from_example(InputExampleAst, EqualityNodes), | |
| 235 | conjunct_predicates(EqualityNodes, EqualityPredicate), | |
| 236 | before_after_predicate_for_operation(OperationName, BeforeAfterPredicate), | |
| 237 | conjunct_predicates([InvariantAndPrecondition,EqualityPredicate,BeforeAfterPredicate], BeforeAfterPredicateWithEqualities), | |
| 238 | solve_predicate(BeforeAfterPredicateWithEqualities, _, 1, [force_evaluation], Solution), | |
| 239 | Solution = solution(SolutionBindings), | |
| 240 | get_output_nodes_from_bindings(SolutionBindings, OutputExampleAst), | |
| 241 | \+ contains_symbolic_value(OutputExampleAst), | |
| 242 | maplist(create_example_triple(Env), InputExampleAst, InputExample), | |
| 243 | findall(Name, member((Name,_,_), InputExample), InputVarNames), | |
| 244 | b_machine_identifier_normalization:map_normalize_ids_in_b_ast(Env, OutputExampleAst, OutputExampleAst2, Env1), | |
| 245 | reduce_input_to_violating_vars(InputVarNames, OutputExampleAst2, OutputExampleAst3), | |
| 246 | findall(N, ( member(b(_,_,I), OutputExampleAst3), | |
| 247 | member(synthesis(machinevar,N), I) | |
| 248 | ), OutputVarNames), | |
| 249 | subtract(InputVarNames, OutputVarNames, Sub), | |
| 250 | ( Sub == [] | |
| 251 | -> | |
| 252 | maplist(create_example_triple(Env1), OutputExampleAst3, OutputExample), | |
| 253 | get_outputs_for_input_examples(Env1, InvariantAndPrecondition, T, OperationName, [(InputExample,OutputExample)|Acc], InputOutputTuples, NewEnv) | |
| 254 | ; format("Error: Missing Output value for at least one variable. Probably a bug in normalize_ids_in_b_ast/4.~n", []), | |
| 255 | InputOutputTuples = Acc, | |
| 256 | NewEnv = Env1 | |
| 257 | ). | |
| 258 | get_outputs_for_input_examples(Env, _, _, _, Acc, Acc, Env). | |
| 259 | ||
| 260 | contains_symbolic_value(LoE) :- | |
| 261 | % TO DO: extend for lambda functions? | |
| 262 | member(E, LoE), | |
| 263 | contains_symbolic_value_aux(E). | |
| 264 | ||
| 265 | contains_symbolic_value_aux(b(value(closure(_,_,b(member(_,_),_,_))),_,_)). | |
| 266 | contains_symbolic_value_aux(b(Node,_,_)) :- | |
| 267 | Node =.. [_|Args], | |
| 268 | member(Arg, Args), | |
| 269 | contains_symbolic_value_aux(Arg). |