| 1 | % (c) 2009-2025 Lehrstuhl fuer Softwaretechnik und Programmiersprachen, | |
| 2 | % Heinrich Heine Universitaet Duesseldorf | |
| 3 | % This software is licenced under EPL 1.0 (http://www.eclipse.org/org/documents/epl-v10.html) | |
| 4 | ||
| 5 | :- module(hashing,[my_term_hash/2, term_depth_hash/3, sdbm_term_hash/2, efficient_index/1]). | |
| 6 | ||
| 7 | :- use_module(library(types),[must_be/4]). | |
| 8 | :- use_module(tools_platform, [is_tagged_integer/1, platform_is_64_bit/0]). | |
| 9 | ||
| 10 | :- use_module(module_information,[module_info/2]). | |
| 11 | :- module_info(group,state_space). | |
| 12 | :- module_info(description,'This module computes hash values (mainly for states) for efficient indexing.'). | |
| 13 | ||
| 14 | % -------------------------------------------- | |
| 15 | ||
| 16 | :- use_module(self_check). | |
| 17 | ||
| 18 | :- assert_must_succeed(( hashing:my_term_hash([1,2,3],X), hashing:efficient_index(X) )). | |
| 19 | :- assert_must_succeed(( hashing:my_term_hash([1,2,3],X), hashing:my_term_hash([1,2,4],Y), X\=Y )). | |
| 20 | ||
| 21 | :- if(current_prolog_flag(dialect, sicstus)). | |
| 22 | ||
| 23 | % On SICStus, use term_hash/3 with specific options. | |
| 24 | :- use_module(library(terms),[term_hash/3]). | |
| 25 | :- if(platform_is_64_bit). | |
| 26 | my_term_hash(T,Res) :- super_hash(T,Res). | |
| 27 | :- else. | |
| 28 | my_term_hash(T,Res) :- term_hash(T, | |
| 29 | [range(smallint), /* will use full 32 bit on 64 bit systems, 28 bit on 32 bit system */ | |
| 30 | algorithm(sdbm), % other options jenkins, hsieh, default | |
| 31 | depth(infinite),if_var(ignore)],Res). | |
| 32 | % collisions seem with more recent versions of SICStus to no longer be a problem !? | |
| 33 | % sdbm seems to generate less collisions | |
| 34 | :- endif. | |
| 35 | ||
| 36 | % On 64-bit machines we can use the following for efficient indexing: | |
| 37 | :- public super_hash/2. | |
| 38 | super_hash(Term,Hash) :- %write(super_hash(Term)),nl, | |
| 39 | %term_hash(Term,[range(smallint),algorithm(sdbm),depth(infinite),if_var(ignore)],H1), | |
| 40 | %term_hash(Term,[range(smallint32),algorithm(jenkins),depth(infinite),if_var(ignore)],H2), | |
| 41 | sdbm_term_hash(Term,H1), | |
| 42 | jenkins_term_hash(Term,H2), | |
| 43 | Hash is H1 + (H2 << 32). % should be in range 0.. 2**60 | |
| 44 | % we could still try and use negative numbers | |
| 45 | ||
| 46 | % see B/PerformanceTests/ModelChecking/EnumSetLookups.mch or B/SysML2B/BModules_081214/OutputInterface_1.mch | |
| 47 | % for examples where the optimisations below make a noticeable difference: | |
| 48 | ||
| 49 | % an optimized version of term_hash(Term,[range(smallint),algorithm(sdbm),depth(infinite),if_var(ignore)],HashValue): | |
| 50 | % to avoid running term_hash_options which can add considerable overhead for many small terms: | |
| 51 | % WARNING: needs to be updated in case SICStus changes internals of library(terms)!! | |
| 52 | sdbm_term_hash(Term, HashValue) :- | |
| 53 | Goal = sdbm_term_hash(Term, HashValue), | |
| 54 | Which=3, Range = -4, Depth = -1, VarHandling = ignore, TermArgNo = 1, | |
| 55 | terms:term_hash1(Term, HashValue, Which, Range, Depth, VarHandling, Goal, TermArgNo). | |
| 56 | ||
| 57 | % an optimized version of term_hash(Term,[range(smallint32),algorithm(jenkins),depth(infinite),if_var(ignore)],HashValue): | |
| 58 | % WARNING: needs to be updated in case SICStus changes internals of library(terms)!! | |
| 59 | jenkins_term_hash(Term, HashValue) :- | |
| 60 | Goal = jenkins_term_hash(Term, HashValue), | |
| 61 | Which=4, Range = -2, Depth = -1, VarHandling = ignore, TermArgNo = 1, | |
| 62 | terms:term_hash1(Term, HashValue, Which, Range, Depth, VarHandling, Goal, TermArgNo). | |
| 63 | ||
| 64 | % the code from terms.pl for term_hash/3 with a write to see the options: | |
| 65 | %term_hash(Term, Options, HashValue) :- | |
| 66 | % Goal = term_hash(Term, Options, HashValue), | |
| 67 | % OptionsArgNo = 2, | |
| 68 | % terms:term_hash_options(Options, Which, Range, Depth, VarHandling, Goal, OptionsArgNo), | |
| 69 | % TermArgNo = 1, | |
| 70 | % write(term_hash1(Which,Range,Depth,VarHandling,Goal)),nl,trace, | |
| 71 | % terms:term_hash1(Term, HashValue, Which, Range, Depth, VarHandling, Goal, TermArgNo). | |
| 72 | ||
| 73 | ||
| 74 | term_depth_hash(Term, Depth, Hash) :- | |
| 75 | term_hash(Term,[range(smallint),algorithm(sdbm),depth(Depth),if_var(ignore)],H1), | |
| 76 | term_hash(Term,[range(smallint32),algorithm(jenkins),depth(Depth),if_var(ignore)],H2), | |
| 77 | Hash is H1 + (H2 << 32). | |
| 78 | %term_depth_hash(Term, Depth, Hash) :- | |
| 79 | % term_hash(Term, [if_var(ignore), depth(Depth)], Hash). | |
| 80 | ||
| 81 | :- else. % end of sicstus | |
| 82 | ||
| 83 | :- if(predicate_property(variant_hash(_, _), _)). | |
| 84 | % Use SWI's variant_hash/2 if possible, which can handle variables in T. | |
| 85 | my_term_hash(T,Res) :- variant_hash(T,Res). | |
| 86 | % Note: in ProB's state space only ground terms should be stored; but my_term_hash is also used in other circumstances | |
| 87 | % One could also use variant_sha1; reduces collisions but is more expensive | |
| 88 | % currently SWI's term_hash generates collisions, e.g., when model checking: | |
| 89 | % prob_examples/public_examples/B/PerformanceTests/ModelChecking/Lift_1Million_v2.mch | |
| 90 | :- else. | |
| 91 | % Fallback to basic term_hash/2 if no version with variable support is available. | |
| 92 | my_term_hash(T,Res) :- | |
| 93 | term_hash(T,Res), | |
| 94 | (var(Res) -> must_be(T, ground, my_term_hash(T,Res), 1) ; true). | |
| 95 | :- endif. | |
| 96 | ||
| 97 | sdbm_term_hash(T,Res) :- my_term_hash(T,Res). | |
| 98 | ||
| 99 | % term_depth_hash/3 allows hashing with a depth limit like term_hash/4, | |
| 100 | % but tries to not limit the range. | |
| 101 | :- use_module(library(terms),[term_hash/4]). | |
| 102 | :- if((current_prolog_flag(dialect, swi), current_prolog_flag(max_tagged_integer, MaxTI), MaxTI >= 2147483647)). | |
| 103 | term_depth_hash(Term, Depth, Hash) :- | |
| 104 | % 2**31 - 1 is the maximum Range allowed by SWI. | |
| 105 | % (We only use this maximum if max_tagged_integer is large enough to handle it, i. e. on 64-bit.) | |
| 106 | term_hash(Term, Depth, 2147483647, Hash), | |
| 107 | (var(Hash) -> must_be(Term, ground, term_depth_hash(Term,Depth,Hash), 1) ; true). | |
| 108 | :- else. | |
| 109 | term_depth_hash(Term, Depth, Hash) :- | |
| 110 | % 2**24 - 1 is a relatively safe portable maximum | |
| 111 | % and is also max_tagged_integer on 32-bit SWI. | |
| 112 | term_hash(Term, Depth, 16777215, Hash), | |
| 113 | (var(Hash) -> must_be(Term, ground, term_depth_hash(Term,Depth,Hash), 1) ; true). | |
| 114 | :- endif. | |
| 115 | ||
| 116 | :- endif. | |
| 117 | ||
| 118 | % check if a hash value is an efficient for first argument indexing | |
| 119 | efficient_index(X) :- number(X), is_tagged_integer(X). | |
| 120 |