| 1 | % (c) 2018-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
| 2 | % Heinrich Heine Universitaet Duesseldorf | |
| 3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
| 4 | ||
| 5 | :- module(regexp, [regexp_init/0, | |
| 6 | regexp_match/2, regexp_match/4, | |
| 7 | is_regexp/1, | |
| 8 | regexp_replace/4, regexp_replace/5, | |
| 9 | regexp_search_first/3, regexp_search_first/4, | |
| 10 | regexp_search_first_detailed/4, regexp_search_first_detailed/5, | |
| 11 | regexp_search_all/3, regexp_isearch_all/3, regexp_search_all/4, | |
| 12 | get_cpp_version/1]). | |
| 13 | ||
| 14 | ||
| 15 | /* the interface to a heap global variable stored and maintained in C */ | |
| 16 | ||
| 17 | ||
| 18 | foreign(cpp_regex_match,cpp_regex_match(+string,+string,+integer,[-integer])). | |
| 19 | foreign(cpp_regex_imatch,cpp_regex_imatch(+string,+string,+integer,[-integer])). % i for ignore case | |
| 20 | foreign(cpp_regex_replace,cpp_regex_replace(+string,+string,+string,[-atom])). | |
| 21 | foreign(cpp_regex_ireplace,cpp_regex_ireplace(+string,+string,+string,[-atom])). % i for ignore case | |
| 22 | foreign(cpp_regex_search_all_str,cpp_regex_search_all_str(+string,+string,[-term])). | |
| 23 | foreign(cpp_regex_isearch_all_str,cpp_regex_isearch_all_str(+string,+string,[-term])). % i for ignore case | |
| 24 | foreign(cpp_regex_search_first_str,cpp_regex_search_first_str(+string,+string,[-atom])). | |
| 25 | foreign(cpp_regex_isearch_first_str,cpp_regex_isearch_first_str(+string,+string,[-atom])). % i for ignore case | |
| 26 | foreign(cpp_regex_search_first,cpp_regex_search_first(+string,+integer,+string,[-term])). | |
| 27 | foreign(cpp_regex_isearch_first,cpp_regex_isearch_first(+string,+integer,+string,[-term])). % i for ignore case | |
| 28 | foreign(cpp_version,cpp_version([-term])). | |
| 29 | ||
| 30 | foreign_resource(regexp, | |
| 31 | [ | |
| 32 | cpp_regex_match, | |
| 33 | cpp_regex_imatch, | |
| 34 | cpp_regex_replace, | |
| 35 | cpp_regex_ireplace, | |
| 36 | cpp_regex_search_first_str, | |
| 37 | cpp_regex_isearch_first_str, | |
| 38 | cpp_regex_search_first, | |
| 39 | cpp_regex_isearch_first, | |
| 40 | cpp_regex_search_all_str, | |
| 41 | cpp_regex_isearch_all_str, | |
| 42 | cpp_version | |
| 43 | ]). | |
| 44 | ||
| 45 | :- use_module(probsrc(error_manager)). | |
| 46 | ||
| 47 | regexp_match(String,Pattern) :- regexp_match(String,Pattern,match_case,unknown). | |
| 48 | ||
| 49 | % match an atom agains a regular expression in ECMA syntax also represented as an atom | |
| 50 | regexp_match(String,Pattern,IgnoreCase,Span) :- | |
| 51 | atom(String), atom(Pattern), | |
| 52 | regexp_init, | |
| 53 | !, | |
| 54 | %format('regexp_match: Checking String ~w for Pattern ~w with IngoreCase=~w~n',[String,Pattern,IgnoreCase]), | |
| 55 | (ignore_case(IgnoreCase) | |
| 56 | -> cpp_regex_imatch(String,Pattern,1,Res) | |
| 57 | ; cpp_regex_match(String,Pattern,1,Res) | |
| 58 | ), | |
| 59 | %format('String ~w for Pattern ~w: Result=~w~n',[String,Pattern,Res]), | |
| 60 | (Res=1 -> true | |
| 61 | ; Res>1 | |
| 62 | -> add_error(regexp,'Illegal Regular Expression Pattern (use ECMAScript syntax): ',Pattern,Span), | |
| 63 | fail). | |
| 64 | regexp_match(String,Pattern,IgnoreCase,_) :- | |
| 65 | format(user_error,'Illegal call: ~w~n',[regepx_match(String,Pattern,IgnoreCase)]), | |
| 66 | fail. | |
| 67 | ||
| 68 | % check if an atom is a valid regular expression pattern | |
| 69 | is_regexp(Pattern) :- atom(Pattern), | |
| 70 | regexp_init, | |
| 71 | !, | |
| 72 | cpp_regex_match('',Pattern,0,Res), % TO DO: will still print error messages | |
| 73 | Res<2. % 0 or 1 means pattern was ok | |
| 74 | is_regexp(Pattern) :- | |
| 75 | format(user_error,'Illegal call: ~w~n',[is_regexp(Pattern)]), | |
| 76 | fail. | |
| 77 | ||
| 78 | regexp_replace(String,Pattern,ReplStr,Result) :- | |
| 79 | regexp_replace(String,Pattern,match_case,ReplStr,Result). | |
| 80 | ||
| 81 | % match an atom agains a regular expression in ECMA syntax also represented as an atom | |
| 82 | regexp_replace(String,Pattern,IgnoreCase,ReplStr,Result) :- | |
| 83 | atom(String), atom(Pattern), atom(ReplStr), regexp_init, | |
| 84 | !, | |
| 85 | (ignore_case(IgnoreCase) | |
| 86 | -> cpp_regex_ireplace(String,Pattern,ReplStr,Result) | |
| 87 | ; cpp_regex_replace(String,Pattern,ReplStr,Result) | |
| 88 | ). | |
| 89 | %,format('Replace ~w for Pattern ~w with ~w: Result=~w~n',[String,Pattern,ReplStr,Result]). | |
| 90 | regexp_replace(String,Pattern,IC,ReplStr,Result) :- | |
| 91 | format(user_error,'Illegal call: ~w~n',[regexp_replace(String,Pattern,IC,ReplStr,Result)]), | |
| 92 | fail. | |
| 93 | ||
| 94 | regexp_search_first(String,Pattern,Result) :- regexp_search_first(String,Pattern,match_case,Result). | |
| 95 | ||
| 96 | % search for a first match of a pattern; returns empty string if no match found | |
| 97 | regexp_search_first(String,Pattern,IgnoreCase,Result) :- | |
| 98 | atom(String), atom(Pattern), regexp_init, | |
| 99 | !, | |
| 100 | (ignore_case(IgnoreCase) | |
| 101 | -> cpp_regex_isearch_first_str(String,Pattern,Result) | |
| 102 | ; cpp_regex_search_first_str(String,Pattern,Result) | |
| 103 | ). | |
| 104 | %,format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]). | |
| 105 | regexp_search_first(String,Pattern,IgnoreCase,Result) :- | |
| 106 | format(user_error,'Illegal call: ~w~n',[regexp_search_first(String,Pattern,Result,IgnoreCase)]), | |
| 107 | fail. | |
| 108 | ||
| 109 | regexp_search_first_detailed(String,FromPosition,Pattern,Result) :- | |
| 110 | regexp_search_first_detailed(String,FromPosition,Pattern,match_case,Result). | |
| 111 | % search for a first match of a pattern; | |
| 112 | % returns either match(Pos,Len,[MatchAtom|SubMatches]) or no-match | |
| 113 | regexp_search_first_detailed(String,FromPosition,Pattern,IgnoreCase,Result) :- | |
| 114 | atom(String), atom(Pattern), FromPosition>0, | |
| 115 | regexp_init, | |
| 116 | !, | |
| 117 | Offset is FromPosition-1, | |
| 118 | (ignore_case(IgnoreCase) | |
| 119 | -> cpp_regex_isearch_first(String,Offset,Pattern,Result) | |
| 120 | ; cpp_regex_search_first(String,Offset,Pattern,Result) | |
| 121 | ). | |
| 122 | %, format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]). | |
| 123 | regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase) :- From<1,!, | |
| 124 | format(user_error,'From index must be at least 1: ~w~n',[regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase)]), | |
| 125 | fail. | |
| 126 | regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase) :- | |
| 127 | format(user_error,'Illegal call: ~w~n',[regexp_search_first_detailed(String,From,Pattern,Result,IgnoreCase)]), | |
| 128 | fail. | |
| 129 | ||
| 130 | % --------------------- | |
| 131 | ||
| 132 | :- use_module(library(lists),[reverse/2]). | |
| 133 | % search for a first match of a pattern; returns empty string if no match found | |
| 134 | regexp_search_all(String,Pattern,Result) :- regexp_search_all(String,Pattern,match_case,Result). | |
| 135 | regexp_isearch_all(String,Pattern,Result) :- regexp_search_all(String,Pattern,ignore_case,Result). | |
| 136 | ||
| 137 | regexp_search_all(String,Pattern,IgnoreCase,Result) :- | |
| 138 | atom(String), atom(Pattern), regexp_init, | |
| 139 | !, | |
| 140 | (ignore_case(IgnoreCase) | |
| 141 | -> cpp_regex_isearch_all_str(String,Pattern,CRes) | |
| 142 | ; cpp_regex_search_all_str(String,Pattern,CRes) | |
| 143 | ), | |
| 144 | reverse(CRes,Result). | |
| 145 | %,format('Search ~w for Pattern ~w: Result=~w~n',[String,Pattern,Result]). | |
| 146 | regexp_search_all(String,Pattern,IC,Result) :- | |
| 147 | format(user_error,'Illegal call: ~w~n',[regexp_search_all(String,Pattern,IC,Result)]), | |
| 148 | fail. | |
| 149 | ||
| 150 | ignore_case(ignore_case) :- !. | |
| 151 | ignore_case(match_case) :- !,fail. | |
| 152 | ignore_case(IgnoreCase) :- format(user_error,'Illegal case: ~w~n',[IgnoreCase]),fail. | |
| 153 | ||
| 154 | % --------------------- | |
| 155 | ||
| 156 | %get_cpp_version(unknown) :- !. | |
| 157 | get_cpp_version(Version) :- regexp_init, | |
| 158 | cpp_version(Version). | |
| 159 | ||
| 160 | % --------------------- | |
| 161 | ||
| 162 | :- dynamic loaded/0. | |
| 163 | ||
| 164 | :- use_module(probsrc(pathes_lib),[safe_load_foreign_resource/2]). | |
| 165 | ||
| 166 | regexp_init :- | |
| 167 | (loaded -> true | |
| 168 | ; assertz(loaded), | |
| 169 | %assert_dir, | |
| 170 | %assertz(user:library_directory(prob_home('.'))), | |
| 171 | %print(loading_foreign_resource(myheap)),nl, | |
| 172 | safe_load_foreign_resource(regexp,regexp) | |
| 173 | ). | |
| 174 | ||
| 175 | %assert_dir :- (user:library_directory('.') -> true ; assertz(user:library_directory('.'))). | |
| 176 | ||
| 177 |