1 | % (c) 2014-2024 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
4 | ||
5 | :- module(disprover_test_runner, [run_disprover_on_po/1, run_disprover_on_all_pos/1, | |
6 | get_loaded_po_file/1, load_po_file/1, | |
7 | set_disprover_timeout/1, reset_disprover_timeout/0, | |
8 | get_disprover_options/1, set_disprover_options/1, | |
9 | get_disprover_po/6, get_disprover_result/3, | |
10 | print_disprover_stats/0]). | |
11 | ||
12 | :- use_module(probsrc(prob_startup), [startup_prob/0]). | |
13 | %:- use_module(pathes,[set_search_pathes/0]). % imported first to set_compile_time_search_pathes, now loaded by prob_startup | |
14 | ||
15 | :- multifile user:portray_message/2. | |
16 | user:portray_message(informational, _) :- !. | |
17 | ||
18 | :- use_module(probsrc(module_information)). | |
19 | :- module_info(group,cbc). | |
20 | :- module_info(description,'Rodin Prover / Disprover'). | |
21 | ||
22 | ||
23 | :- use_module(disprover). | |
24 | :- use_module(probsrc(error_manager)). | |
25 | :- use_module(probsrc(tools)). | |
26 | :- use_module(probsrc(debug)). | |
27 | ||
28 | :- use_module(library(system)). | |
29 | :- use_module(library(lists)). | |
30 | ||
31 | ||
32 | ||
33 | %:- use_module(eventhandling, [announce_event/1]). | |
34 | ||
35 | print_disprover_stats :- | |
36 | (debug_mode(on) -> print_prover_result_stats ; true), % statistics about various provers | |
37 | (disprover_timeout(TO) -> true ; TO = 'UNKNOWN'), | |
38 | project_name(N), format('Project ~w~n',[N]), | |
39 | generated(_,D), | |
40 | machine_name(M), format('Machine ~w (exported ~w)~n',[M,D]), | |
41 | print_nr_pos('',_,_), | |
42 | print_nr_pos('proven (within Rodin) ',true,_), | |
43 | format('ProB Disprover Analysis with timeout ~w ms:~n',[TO]), | |
44 | print_nr_pos('proven (by ProB) ',_,true), | |
45 | (disprover_result(_,_,contradiction_in_hypotheses) | |
46 | -> print_nr_pos('those with contradiction in hypotheses ',_,true,contradiction_in_hypotheses) ; true), | |
47 | print_nr_pos('disproven (by ProB) ',_,false), | |
48 | print_nr_pos('unproven (by ProB) ',_,unknown), | |
49 | (raw_result_exists(failure) -> print_nr_pos('failure (of ProB) ',_,failure) ; true). | |
50 | ||
51 | raw_result_exists(RawResult) :- disprover_po_with_info(_,_,_,RawResult). | |
52 | print_nr_pos(Msg,RodinStatus,ProBStatus) :- print_nr_pos(Msg,RodinStatus,ProBStatus,_). | |
53 | print_nr_pos(Msg,RodinStatus,ProBStatus,RawResult) :- | |
54 | findall(Name,disprover_po_with_info(Name,RodinStatus,ProBStatus,RawResult),L), | |
55 | length(L,Len), | |
56 | (Len<12 | |
57 | -> format(' Number of ~wPOs: ~w (~w)~n',[Msg,Len,L]) | |
58 | ; (L = [First|_],last(L,Last)) | |
59 | -> format(' Number of ~wPOs: ~w (~w -- ~w)~n',[Msg,Len,First,Last]) | |
60 | ; format(' Number of ~wPOs: ~w~n',[Msg,Len]) | |
61 | ). | |
62 | ||
63 | disprover_po_with_info(Name,RodinStatus,ProBStatus,RawResult) :- | |
64 | get_disprover_po(Name,_Context,_Goal,_All,_Sel,RodinStatus), | |
65 | (disprover_result(Name,ProBStatus,RawResult) -> true ; ProBStatus='not_run', RawResult='not_run'). | |
66 | ||
67 | :- dynamic disprover_timeout/1. | |
68 | :- if(current_prolog_flag(dialect, swi)). | |
69 | disprover_timeout(1000). | |
70 | :- else. | |
71 | disprover_timeout(300). | |
72 | :- endif. | |
73 | set_disprover_timeout(N) :- retractall(disprover_timeout(_)), assertz(disprover_timeout(N)). | |
74 | reset_disprover_timeout :- retractall(disprover_timeout(_)), assertz(disprover_timeout(300)). | |
75 | ||
76 | :- use_module(probsrc(tools_lists),[count_occurences/2]). | |
77 | ||
78 | run_disprover_on_po(PO) :- | |
79 | disprover_timeout(TO), | |
80 | run_on_po(PO,TO). | |
81 | ||
82 | run_disprover_on_all_pos(_) :- retractall(disprover_result(_,_,_)), disprover_timeout(TO),run_on_po(_,TO), fail. | |
83 | run_disprover_on_all_pos(Summary) :- findall(R,disprover_result(_,R,_),L), | |
84 | count_occurences(L,Occs), | |
85 | format('~nDisprover Run Summary: ~w~n',[Occs]), | |
86 | add_info(true,Occs,O1), add_info(false,O1,O2), add_info(unknown,O2,O3), | |
87 | add_info(failure,O3,Summary). | |
88 | ||
89 | add_info(Info,List,Result) :- member(Info-_,List) -> List=Result ; Result = [Info-0|List]. | |
90 | ||
91 | ||
92 | :- use_module(probsrc(tools_printing),[format_with_colour_nl/4]). | |
93 | :- use_module(probsrc(prob2_interface), [start_animation/0, load_event_b_project/4]). | |
94 | ||
95 | run_on_po(PO_Name,TimeOut) :- | |
96 | get_disprover_po(PO_Name,Context,Goal,AllHypotheses,SelectedHypotheses,Status), | |
97 | formatsilent('Checking PO ~w (~w in Rodin)~n',[PO_Name,Status]), | |
98 | load_event_b_project([],[Context],[],_), | |
99 | start_animation, | |
100 | debug_println(5,loaded_context), | |
101 | statistics(walltime,[T1,_]), | |
102 | (my_disprove(PO_Name,Goal,AllHypotheses,SelectedHypotheses,TimeOut,OutResult) | |
103 | -> translate_result_for_po(OutResult,ProverResult,Color), | |
104 | (silent_mode(on) -> true | |
105 | ; statistics(walltime,[T2,_]), WT is T2-T1, | |
106 | %format(user_output,'Full result for PO ~w is ~w~n',[PO_Name,OutResult]), | |
107 | format(user_output,'Disprover Result (~w ms): PO ~w is ',[WT,PO_Name]), | |
108 | format_with_colour_nl(user_output,Color,'~w',[ProverResult]) | |
109 | ), | |
110 | print_solution(OutResult), | |
111 | functor(OutResult,RawOutFunctor,_), | |
112 | assertz(disprover_result(PO_Name,ProverResult,RawOutFunctor)), | |
113 | check_soundness(ProverResult,Status,PO_Name) | |
114 | ; add_error(disprover,'Disprover failed: ',(PO_Name:Status)), | |
115 | assertz(disprover_result(PO_Name,'failure',unknown)) | |
116 | ). | |
117 | run_on_po(PO_Name,_TimeOut) :- nonvar(PO_Name), | |
118 | \+ get_disprover_po(PO_Name,_,_,_,_,_), | |
119 | format('PO ~w does not exist !~n',[PO_Name]), | |
120 | findall(PO,get_disprover_po(PO,_,_,_,_,_),L), | |
121 | format('Available POs: ~w~n',[L]), | |
122 | fail. | |
123 | ||
124 | :- use_module(library(lists), [maplist/2]). | |
125 | print_solution(solution(L)) :- | |
126 | formatsilent('Counterexample using all hypotheses:~n',[]), maplist(print_binding,L),!. | |
127 | print_solution(solution_on_selected_hypotheses(L)) :- | |
128 | formatsilent('Counterexample using selected hypotheses:~n',[]), maplist(print_binding,L),!. | |
129 | print_solution(L) :- (debug_mode(on) -> formatsilent(' ~w~n',[L]) ; true). | |
130 | print_binding(binding(ID,_Val,VS)) :- formatsilent(' ~w = ~w~n',[ID,VS]). | |
131 | ||
132 | translate_result_for_po(solution(_),R,Col) :- !, R=false,Col=[bold,red]. | |
133 | translate_result_for_po(contradiction_found,R,Col) :- !,R=true,Col=[bold,green]. | |
134 | translate_result_for_po(contradiction_in_hypotheses,R,Col) :- !,R=true,Col=[green]. | |
135 | translate_result_for_po(solution_on_selected_hypotheses(_),R,Col) :- !, R=unknown,Col=[bold,red]. | |
136 | translate_result_for_po(_,unknown,[red]). | |
137 | ||
138 | check_soundness(unknown,_,_) :- !. | |
139 | check_soundness(Status,unknown,PO_Name) :- !, | |
140 | formatsilent('ProB Disprover result ~w improves upon unknown Rodin prover status for ~w!~n',[Status,PO_Name]). | |
141 | check_soundness(X,X,_) :- !. | |
142 | check_soundness(Disprover,Prover,PO_Name) :- | |
143 | ajoin(['Disprover result ',Disprover, ' incompatible with Rodin proof status ',Prover,' for PO:'],Msg), | |
144 | add_error(disprover_unsound,Msg,PO_Name). | |
145 | ||
146 | ||
147 | :- dynamic extra_opts/1. | |
148 | extra_opts([]). | |
149 | get_disprover_options(L) :- extra_opts(L). | |
150 | set_disprover_options(L) :- | |
151 | retractall(extra_opts(_)), | |
152 | assertz(extra_opts(L)). | |
153 | ||
154 | my_disprove(PO_Name,Goal,AllHypotheses,SelectedHypotheses,Timeout,OutResult) :- | |
155 | get_disprover_default_opts(DOpts), | |
156 | Opts = [disprover_option(po_label(PO_Name))|DOpts], | |
157 | % interesting extra opts : disprover_option(unsat_core),unsat_core_algorithm/linear | |
158 | extra_opts(X), merge_opts(X,Opts,AllOpts), | |
159 | % Note: disprover_mode is always set to true | |
160 | disprove_with_opts(Goal,AllHypotheses,SelectedHypotheses,Timeout,AllOpts,OutResult). | |
161 | ||
162 | merge_opts([],Opts,Opts). | |
163 | merge_opts([Opt/Val|T],Other,[Opt/Val|ResT]) :- !, | |
164 | delete(Other,Opt/_,Other2), | |
165 | merge_opts(T,Other2,ResT). | |
166 | merge_opts([Opt|T],Other,[Opt|ResT]) :- | |
167 | merge_opts(T,Other,ResT). | |
168 | ||
169 | get_disprover_po(FName,Context,Goal,All,Sel,Status) :- | |
170 | disprover_po(Name,Context,Goal,All,Sel,Status), | |
171 | fix_rodin_name(Name,FName). | |
172 | ||
173 | linefeed(10). | |
174 | linefeed(13). | |
175 | % sometimes Rodin POs have a newline before the slash; we remove this (see e.g., test 2023) | |
176 | fix_rodin_name(POName,FixedName) :- atom_codes(POName,Cs), | |
177 | exclude(linefeed,Cs,NewCs), | |
178 | atom_codes(FixedName,NewCs). | |
179 | ||
180 | :- dynamic project_name/1. | |
181 | :- dynamic machine_name/1. | |
182 | :- dynamic generated/2. | |
183 | :- dynamic disprover_po/6. | |
184 | :- dynamic disprover_result/3. | |
185 | project_name(unknown). | |
186 | machine_name(unknown). | |
187 | generated(unknown,unknown). | |
188 | disprover_po(unknown,unknown,unknown,unknown,unknown,unknown). | |
189 | disprover_result(unknown,unknown,unknown). | |
190 | ||
191 | reset_po_facts :- retractall(loaded_po_file(_)), | |
192 | retractall(project_name(_)), | |
193 | retractall(machine_name(_)), retractall(generated(_,_)), | |
194 | retractall(disprover_po(_,_,_,_,_,_)), retractall(disprover_result(_,_,_)). | |
195 | ||
196 | get_disprover_result(PO_Name,Result,RawResult) :- | |
197 | (disprover_result(PO_Name,R,Raw) -> Result=R,RawResult=Raw | |
198 | ; Result = 'not_yet_run', RawResult='not_yet_run'). | |
199 | ||
200 | :- dynamic loaded_po_file/1. | |
201 | get_loaded_po_file(File) :- loaded_po_file(File). | |
202 | load_po_file(File) :- reset_po_facts, | |
203 | % consult_without_redefine_warning | |
204 | get_set_optional_prolog_flag(redefine_warnings, Old, off), | |
205 | (% load in dynamic mode: so that we can retract later | |
206 | my_po_consult(File) | |
207 | -> OK=true ; OK=false), | |
208 | get_set_optional_prolog_flag(redefine_warnings, _, Old), | |
209 | (OK=true -> assertz(loaded_po_file(File)) ; | |
210 | add_error_and_fail(disprover_test_runner,'Consulting of File failed: ',File)). | |
211 | ||
212 | my_po_consult(File) :- | |
213 | catch( | |
214 | load_files(File,[compilation_mode(assert_all)]), | |
215 | error(existence_error(_,_),_), | |
216 | add_error_fail(my_consult,'File does not exist: ',File)). |