1 | % (c) 2018-2024 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
2 | % Heinrich Heine Universitaet Duesseldorf | |
3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
4 | ||
5 | :- module(regexp, [regexp_init/0, | |
6 | regexp_match/2, regexp_match/3, | |
7 | is_regexp/1, | |
8 | regexp_replace/4, regexp_replace/5, | |
9 | regexp_search_first/3, regexp_search_first/4, | |
10 | regexp_search_first_detailed/4, regexp_search_first_detailed/5, | |
11 | regexp_search_all/3, regexp_isearch_all/3, regexp_search_all/4, | |
12 | get_cpp_version/1]). | |
13 | ||
14 | ||
15 | /* the interface to a heap global variable stored and maintained in C */ | |
16 | ||
17 | ||
18 | foreign(cpp_regex_match,cpp_regex_match(+string,+string,+integer,[-integer])). | |
19 | foreign(cpp_regex_imatch,cpp_regex_imatch(+string,+string,+integer,[-integer])). % i for ignore case | |
20 | foreign(cpp_regex_replace,cpp_regex_replace(+string,+string,+string,[-atom])). | |
21 | foreign(cpp_regex_ireplace,cpp_regex_ireplace(+string,+string,+string,[-atom])). % i for ignore case | |
22 | foreign(cpp_regex_search_all_str,cpp_regex_search_all_str(+string,+string,[-term])). | |
23 | foreign(cpp_regex_isearch_all_str,cpp_regex_isearch_all_str(+string,+string,[-term])). % i for ignore case | |
24 | foreign(cpp_regex_search_first_str,cpp_regex_search_first_str(+string,+string,[-atom])). | |
25 | foreign(cpp_regex_isearch_first_str,cpp_regex_isearch_first_str(+string,+string,[-atom])). % i for ignore case | |
26 | foreign(cpp_regex_search_first,cpp_regex_search_first(+string,+integer,+string,[-term])). | |
27 | foreign(cpp_regex_isearch_first,cpp_regex_isearch_first(+string,+integer,+string,[-term])). % i for ignore case | |
28 | foreign(cpp_version,cpp_version([-term])). | |
29 | ||
30 | foreign_resource(regexp, | |
31 | [ | |
32 | cpp_regex_match, | |
33 | cpp_regex_imatch, | |
34 | cpp_regex_replace, | |
35 | cpp_regex_ireplace, | |
36 | cpp_regex_search_first_str, | |
37 | cpp_regex_isearch_first_str, | |
38 | cpp_regex_search_first, | |
39 | cpp_regex_isearch_first, | |
40 | cpp_regex_search_all_str, | |
41 | cpp_regex_isearch_all_str, | |
42 | cpp_version | |
43 | ]). | |
44 | ||
45 | :- use_module(probsrc(error_manager)). | |
46 | ||
47 | regexp_match(String,Pattern) :- regexp_match(String,Pattern,match_case). | |
48 | ||
49 | % match an atom agains a regular expression in ECMA syntax also represented as an atom | |
50 | regexp_match(String,Pattern,IgnoreCase) :- | |
51 | atom(String), atom(Pattern), | |
52 | regexp_init, | |
53 | !, | |
54 | %format('regexp_match: Checking String ~w for Pattern ~w with IngoreCase=~w~n',[String,Pattern,IgnoreCase]), | |
55 | (ignore_case(IgnoreCase) | |
56 | -> cpp_regex_imatch(String,Pattern,1,Res) | |
57 | ; cpp_regex_match(String,Pattern,1,Res) | |
58 | ), | |
59 | %format('String ~w for Pattern ~w: Result=~w~n',[String,Pattern,Res]), | |
60 | (Res=1 -> true | |
61 | ; Res>1 | |
62 | -> add_error(regexp,'Illegal Regular Expression Pattern (use ECMAScript syntax): ',Pattern), | |
63 | fail). | |
64 | regexp_match(String,Pattern,IgnoreCase) :- | |
65 | format(user_error,'Illegal call: ~w~n',[regepx_match(String,Pattern,IgnoreCase)]), | |
66 | fail. | |
67 | ||
68 | % check if an atom is a valid regular expression pattern | |
69 | is_regexp(Pattern) :- atom(Pattern), | |
70 | regexp_init, | |
71 | !, | |
72 | cpp_regex_match('',Pattern,0,Res), % TO DO: will still print error messages | |
73 | Res<2. % 0 or 1 means pattern was ok | |
74 | is_regexp(Pattern) :- | |
75 | format(user_error,'Illegal call: ~w~n',[is_regexp(Pattern)]), | |
76 | fail. | |
77 | ||
78 | regexp_replace(String,Pattern,ReplStr,Result) :- | |
79 | regexp_replace(String,Pattern,match_case,ReplStr,Result). | |
80 | ||
81 | % match an atom agains a regular expression in ECMA syntax also represented as an atom | |
82 | regexp_replace(String,Pattern,IgnoreCase,ReplStr,Result) :- | |
83 | atom(String), atom(Pattern), atom(ReplStr), regexp_init, | |
84 | !, | |
85 | (ignore_case(IgnoreCase) | |
86 | -> cpp_regex_ireplace(String,Pattern,ReplStr,Result) | |
87 | ; cpp_regex_replace(String,Pattern,ReplStr,Result) | |
88 | ). | |
89 | %,format('Replace ~w for Pattern ~w with ~w: Result=~w~n',[String,Pattern,ReplStr,Result]). | |
90 | regexp_replace(String,Pattern,IC,ReplStr,Result) :- | |
91 | format(user_error,'Illegal call: ~w~n',[regexp_replace(String,Pattern,IC,ReplStr,Result)]), | |
92 | fail. | |
93 | ||
94 | regexp_search_first(String,Pattern,Result) :- regexp_search_first(String,Pattern,match_case,Result). | |
95 | ||
96 | % search for a first match of a pattern; returns empty string if no match found | |
97 | regexp_search_first(String,Pattern,IgnoreCase,Result) :- | |
98 | atom(String), atom(Pattern), regexp_init, | |
99 | !, | |
100 | (ignore_case(IgnoreCase) | |
101 | -> cpp_regex_isearch_first_str(String,Pattern,Result) | |
102 | ; cpp_regex_search_first_str(String,Pattern,Result) | |
103 | ). | |
104 | %,format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]). | |
105 | regexp_search_first(String,Pattern,IgnoreCase,Result) :- | |
106 | format(user_error,'Illegal call: ~w~n',[regexp_search_first(String,Pattern,Result,IgnoreCase)]), | |
107 | fail. | |
108 | ||
109 | regexp_search_first_detailed(String,FromPosition,Pattern,Result) :- | |
110 | regexp_search_first_detailed(String,FromPosition,Pattern,match_case,Result). | |
111 | % search for a first match of a pattern; | |
112 | % returns either match(Pos,Len,[MatchAtom|SubMatches]) or no-match | |
113 | regexp_search_first_detailed(String,FromPosition,Pattern,IgnoreCase,Result) :- | |
114 | atom(String), atom(Pattern), FromPosition>0, | |
115 | regexp_init, | |
116 | !, | |
117 | Offset is FromPosition-1, | |
118 | (ignore_case(IgnoreCase) | |
119 | -> cpp_regex_isearch_first(String,Offset,Pattern,Result) | |
120 | ; cpp_regex_search_first(String,Offset,Pattern,Result) | |
121 | ). | |
122 | %, format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]). | |
123 | regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase) :- From<1,!, | |
124 | format(user_error,'From index must be at least 1: ~w~n',[regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase)]), | |
125 | fail. | |
126 | regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase) :- | |
127 | format(user_error,'Illegal call: ~w~n',[regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase)]), | |
128 | fail. | |
129 | ||
130 | % --------------------- | |
131 | ||
132 | :- use_module(library(lists),[reverse/2]). | |
133 | % search for a first match of a pattern; returns empty string if no match found | |
134 | regexp_search_all(String,Pattern,Result) :- regexp_search_all(String,Pattern,match_case,Result). | |
135 | regexp_isearch_all(String,Pattern,Result) :- regexp_search_all(String,Pattern,ignore_case,Result). | |
136 | ||
137 | regexp_search_all(String,Pattern,IgnoreCase,Result) :- | |
138 | atom(String), atom(Pattern), regexp_init, | |
139 | !, | |
140 | (ignore_case(IgnoreCase) | |
141 | -> cpp_regex_isearch_all_str(String,Pattern,CRes) | |
142 | ; cpp_regex_search_all_str(String,Pattern,CRes) | |
143 | ), | |
144 | reverse(CRes,Result). | |
145 | %,format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]). | |
146 | regexp_search_all(String,Pattern,IC,Result) :- | |
147 | format(user_error,'Illegal call: ~w~n',[regexp_search_all(String,Pattern,IC,Result)]), | |
148 | fail. | |
149 | ||
150 | ignore_case(ignore_case) :- !. | |
151 | ignore_case(match_case) :- !,fail. | |
152 | ignore_case(IgnoreCase) :- format(user_error,'Illegal case: ~w~n',[IgnoreCase]),fail. | |
153 | ||
154 | % --------------------- | |
155 | ||
156 | %get_cpp_version(unknown) :- !. | |
157 | get_cpp_version(Version) :- regexp_init, | |
158 | cpp_version(Version). | |
159 | ||
160 | % --------------------- | |
161 | ||
162 | :- dynamic loaded/0. | |
163 | ||
164 | :- use_module(probsrc(pathes_lib),[safe_load_foreign_resource/2]). | |
165 | ||
166 | regexp_init :- | |
167 | (loaded -> true | |
168 | ; assertz(loaded), | |
169 | %assert_dir, | |
170 | %assertz(user:library_directory(prob_home('.'))), | |
171 | %print(loading_foreign_resource(myheap)),nl, | |
172 | safe_load_foreign_resource(regexp,regexp) | |
173 | ). | |
174 | ||
175 | %assert_dir :- (user:library_directory('.') -> true ; assertz(user:library_directory('.'))). | |
176 | ||
177 |