1 % (c) 2018-2024 Lehrstuhl fuer Softwaretechnik und Programmiersprachen,
2 % Heinrich Heine Universitaet Duesseldorf
3 % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html)
4
5 :- module(regexp, [regexp_init/0,
6 regexp_match/2, regexp_match/3,
7 is_regexp/1,
8 regexp_replace/4, regexp_replace/5,
9 regexp_search_first/3, regexp_search_first/4,
10 regexp_search_first_detailed/4, regexp_search_first_detailed/5,
11 regexp_search_all/3, regexp_isearch_all/3, regexp_search_all/4,
12 get_cpp_version/1]).
13
14
15 /* the interface to a heap global variable stored and maintained in C */
16
17
18 foreign(cpp_regex_match,cpp_regex_match(+string,+string,+integer,[-integer])).
19 foreign(cpp_regex_imatch,cpp_regex_imatch(+string,+string,+integer,[-integer])). % i for ignore case
20 foreign(cpp_regex_replace,cpp_regex_replace(+string,+string,+string,[-atom])).
21 foreign(cpp_regex_ireplace,cpp_regex_ireplace(+string,+string,+string,[-atom])). % i for ignore case
22 foreign(cpp_regex_search_all_str,cpp_regex_search_all_str(+string,+string,[-term])).
23 foreign(cpp_regex_isearch_all_str,cpp_regex_isearch_all_str(+string,+string,[-term])). % i for ignore case
24 foreign(cpp_regex_search_first_str,cpp_regex_search_first_str(+string,+string,[-atom])).
25 foreign(cpp_regex_isearch_first_str,cpp_regex_isearch_first_str(+string,+string,[-atom])). % i for ignore case
26 foreign(cpp_regex_search_first,cpp_regex_search_first(+string,+integer,+string,[-term])).
27 foreign(cpp_regex_isearch_first,cpp_regex_isearch_first(+string,+integer,+string,[-term])). % i for ignore case
28 foreign(cpp_version,cpp_version([-term])).
29
30 foreign_resource(regexp,
31 [
32 cpp_regex_match,
33 cpp_regex_imatch,
34 cpp_regex_replace,
35 cpp_regex_ireplace,
36 cpp_regex_search_first_str,
37 cpp_regex_isearch_first_str,
38 cpp_regex_search_first,
39 cpp_regex_isearch_first,
40 cpp_regex_search_all_str,
41 cpp_regex_isearch_all_str,
42 cpp_version
43 ]).
44
45 :- use_module(probsrc(error_manager)).
46
47 regexp_match(String,Pattern) :- regexp_match(String,Pattern,match_case).
48
49 % match an atom agains a regular expression in ECMA syntax also represented as an atom
50 regexp_match(String,Pattern,IgnoreCase) :-
51 atom(String), atom(Pattern),
52 regexp_init,
53 !,
54 %format('regexp_match: Checking String ~w for Pattern ~w with IngoreCase=~w~n',[String,Pattern,IgnoreCase]),
55 (ignore_case(IgnoreCase)
56 -> cpp_regex_imatch(String,Pattern,1,Res)
57 ; cpp_regex_match(String,Pattern,1,Res)
58 ),
59 %format('String ~w for Pattern ~w: Result=~w~n',[String,Pattern,Res]),
60 (Res=1 -> true
61 ; Res>1
62 -> add_error(regexp,'Illegal Regular Expression Pattern (use ECMAScript syntax): ',Pattern),
63 fail).
64 regexp_match(String,Pattern,IgnoreCase) :-
65 format(user_error,'Illegal call: ~w~n',[regepx_match(String,Pattern,IgnoreCase)]),
66 fail.
67
68 % check if an atom is a valid regular expression pattern
69 is_regexp(Pattern) :- atom(Pattern),
70 regexp_init,
71 !,
72 cpp_regex_match('',Pattern,0,Res), % TO DO: will still print error messages
73 Res<2. % 0 or 1 means pattern was ok
74 is_regexp(Pattern) :-
75 format(user_error,'Illegal call: ~w~n',[is_regexp(Pattern)]),
76 fail.
77
78 regexp_replace(String,Pattern,ReplStr,Result) :-
79 regexp_replace(String,Pattern,match_case,ReplStr,Result).
80
81 % match an atom agains a regular expression in ECMA syntax also represented as an atom
82 regexp_replace(String,Pattern,IgnoreCase,ReplStr,Result) :-
83 atom(String), atom(Pattern), atom(ReplStr), regexp_init,
84 !,
85 (ignore_case(IgnoreCase)
86 -> cpp_regex_ireplace(String,Pattern,ReplStr,Result)
87 ; cpp_regex_replace(String,Pattern,ReplStr,Result)
88 ).
89 %,format('Replace ~w for Pattern ~w with ~w: Result=~w~n',[String,Pattern,ReplStr,Result]).
90 regexp_replace(String,Pattern,IC,ReplStr,Result) :-
91 format(user_error,'Illegal call: ~w~n',[regexp_replace(String,Pattern,IC,ReplStr,Result)]),
92 fail.
93
94 regexp_search_first(String,Pattern,Result) :- regexp_search_first(String,Pattern,match_case,Result).
95
96 % search for a first match of a pattern; returns empty string if no match found
97 regexp_search_first(String,Pattern,IgnoreCase,Result) :-
98 atom(String), atom(Pattern), regexp_init,
99 !,
100 (ignore_case(IgnoreCase)
101 -> cpp_regex_isearch_first_str(String,Pattern,Result)
102 ; cpp_regex_search_first_str(String,Pattern,Result)
103 ).
104 %,format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]).
105 regexp_search_first(String,Pattern,IgnoreCase,Result) :-
106 format(user_error,'Illegal call: ~w~n',[regexp_search_first(String,Pattern,Result,IgnoreCase)]),
107 fail.
108
109 regexp_search_first_detailed(String,FromPosition,Pattern,Result) :-
110 regexp_search_first_detailed(String,FromPosition,Pattern,match_case,Result).
111 % search for a first match of a pattern;
112 % returns either match(Pos,Len,[MatchAtom|SubMatches]) or no-match
113 regexp_search_first_detailed(String,FromPosition,Pattern,IgnoreCase,Result) :-
114 atom(String), atom(Pattern), FromPosition>0,
115 regexp_init,
116 !,
117 Offset is FromPosition-1,
118 (ignore_case(IgnoreCase)
119 -> cpp_regex_isearch_first(String,Offset,Pattern,Result)
120 ; cpp_regex_search_first(String,Offset,Pattern,Result)
121 ).
122 %, format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]).
123 regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase) :- From<1,!,
124 format(user_error,'From index must be at least 1: ~w~n',[regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase)]),
125 fail.
126 regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase) :-
127 format(user_error,'Illegal call: ~w~n',[regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase)]),
128 fail.
129
130 % ---------------------
131
132 :- use_module(library(lists),[reverse/2]).
133 % search for a first match of a pattern; returns empty string if no match found
134 regexp_search_all(String,Pattern,Result) :- regexp_search_all(String,Pattern,match_case,Result).
135 regexp_isearch_all(String,Pattern,Result) :- regexp_search_all(String,Pattern,ignore_case,Result).
136
137 regexp_search_all(String,Pattern,IgnoreCase,Result) :-
138 atom(String), atom(Pattern), regexp_init,
139 !,
140 (ignore_case(IgnoreCase)
141 -> cpp_regex_isearch_all_str(String,Pattern,CRes)
142 ; cpp_regex_search_all_str(String,Pattern,CRes)
143 ),
144 reverse(CRes,Result).
145 %,format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]).
146 regexp_search_all(String,Pattern,IC,Result) :-
147 format(user_error,'Illegal call: ~w~n',[regexp_search_all(String,Pattern,IC,Result)]),
148 fail.
149
150 ignore_case(ignore_case) :- !.
151 ignore_case(match_case) :- !,fail.
152 ignore_case(IgnoreCase) :- format(user_error,'Illegal case: ~w~n',[IgnoreCase]),fail.
153
154 % ---------------------
155
156 %get_cpp_version(unknown) :- !.
157 get_cpp_version(Version) :- regexp_init,
158 cpp_version(Version).
159
160 % ---------------------
161
162 :- dynamic loaded/0.
163
164 :- use_module(probsrc(pathes_lib),[safe_load_foreign_resource/2]).
165
166 regexp_init :-
167 (loaded -> true
168 ; assertz(loaded),
169 %assert_dir,
170 %assertz(user:library_directory(prob_home('.'))),
171 %print(loading_foreign_resource(myheap)),nl,
172 safe_load_foreign_resource(regexp,regexp)
173 ).
174
175 %assert_dir :- (user:library_directory('.') -> true ; assertz(user:library_directory('.'))).
176
177