| 1 | :- module(code2vec, [ | |
| 2 | leaf_paths/2 | |
| 3 | ]). | |
| 4 | ||
| 5 | % The code2vec module contains predicates to split a B AST into a list of | |
| 6 | % paths leading from leaf to leaf. | |
| 7 | % Such paths are the base for the code2vec algorithm by Alon et al. | |
| 8 | % | |
| 9 | % Uri Alon, Meital Zilberstein, Omer Levy, and Eran Yahav. 2019. | |
| 10 | % Code2vec: learning distributed representations of code. | |
| 11 | % Proc. ACM Program. Lang. 3, POPL, Article 40 (January 2019), 29 pages. | |
| 12 | % https://doi.org/10.1145/3290353 | |
| 13 | ||
| 14 | ||
| 15 | :- use_module(probsrc(bsyntaxtree), [safe_syntaxelement/5, | |
| 16 | syntaxtraversion/6]). | |
| 17 | :- use_module(library(lists)). | |
| 18 | ||
| 19 | ||
| 20 | %% leaf_paths(+AST, -Paths). | |
| 21 | % | |
| 22 | % AST is a B AST. Paths is a list of paths leading from leaf to leaf. | |
| 23 | leaf_paths(BAst, Paths) :- | |
| 24 | unwrapped_b(BAst, Expr, Subs, Constants), | |
| 25 | paths_and_fragments(Expr, Subs, Constants, Paths, _). | |
| 26 | ||
| 27 | %% paths_and_fragments(RootExpression, Subexpressions, Constants, Paths, Fragments). | |
| 28 | % | |
| 29 | % RootExpression: Node type of the currently considered AST's root node. | |
| 30 | % Subexpressions: List of sub-ASTs from the current node. | |
| 31 | % constants: Constants that belong to the current root node. | |
| 32 | % Paths: List of paths from leaf to leaf in the considered AST. | |
| 33 | % Fragments: List of path fragments (only leaf to root sequences). | |
| 34 | % | |
| 35 | % Note: If no constants are given, the empty list is used. | |
| 36 | % However, if a constant is given, it is usually not wrapped in a list. | |
| 37 | paths_and_fragments(Expr, [], [], [], [Expr]). | |
| 38 | paths_and_fragments(Expr, [], C, [], Fragments) :- | |
| 39 | constant_fragments(Expr, C, Fragments). | |
| 40 | paths_and_fragments(Expr, [Sub], [], Paths, Fragments) :- | |
| 41 | % Here we only have a linear sub-path, so no new paths are generated. | |
| 42 | unwrapped_b(Sub, SubExpr, SubSubs, SubConstants), | |
| 43 | paths_and_fragments(SubExpr, SubSubs, SubConstants, Paths, SubFragments), | |
| 44 | % Need to add the expression to all of the fragments. | |
| 45 | maplist(couple_fragment(Expr), SubFragments, Fragments). | |
| 46 | paths_and_fragments(Expr, [Sub1W, Sub2W | Subs], Constants, Paths, Fragments) :- | |
| 47 | % Here we have multiple sub-paths so we need to actually generate new paths. | |
| 48 | % Idea is to get all paths and fragments, then join their fragments over the root. | |
| 49 | sub_paths_and_fragments(Expr, [Sub1W, Sub2W | Subs], SubPaths, SubFragments), | |
| 50 | joined_sub_paths(Expr, SubFragments, NewPaths), | |
| 51 | % Consider constants if necessary. | |
| 52 | (Constants \= [] -> | |
| 53 | paths_and_fragments(Expr, [], Constants, _, CFragments) | |
| 54 | ; | |
| 55 | CFragments = [] | |
| 56 | ), | |
| 57 | % Note: SubFragments is list of lists. | |
| 58 | append([CFragments | SubFragments], Fragments), | |
| 59 | append(SubPaths, NewPaths, Paths). | |
| 60 | ||
| 61 | sub_paths_and_fragments(Expression, Subs, Paths, Fragments) :- | |
| 62 | sub_paths_and_fragments_aux(Subs, Expression, Paths, Fragments). | |
| 63 | ||
| 64 | sub_paths_and_fragments_aux([], _, [], []). | |
| 65 | sub_paths_and_fragments_aux([Sub1 | Subs], Expr, SubPaths, [Sub1Fragments | SubFragments]) :- | |
| 66 | paths_and_fragments(Expr, [Sub1], [], Sub1Paths, Sub1Fragments), | |
| 67 | % Append to add variable at end of list, then finish list in tail recursion. | |
| 68 | append(Sub1Paths, SubRPaths, SubPaths), | |
| 69 | sub_paths_and_fragments_aux(Subs, Expr, SubRPaths, SubFragments). | |
| 70 | ||
| 71 | ||
| 72 | %% unwrapped_b(BAst, Expr, Subs, Constants). | |
| 73 | % | |
| 74 | % BAst: B AST to unwrap. | |
| 75 | % Expr: Node type of the currently considered AST's root node, e.g. 'member'. | |
| 76 | % Subs: List of sub-ASTs from the current node. | |
| 77 | % Constants: Constants that belong to the current root node. | |
| 78 | unwrapped_b(Sub, Expr, Subs, Constants) :- | |
| 79 | syntaxtraversion(Sub, BExpr, _, _, _, _), | |
| 80 | safe_syntaxelement(BExpr, Subs, _, _, Constants), | |
| 81 | BExpr =.. [Expr | _]. | |
| 82 | ||
| 83 | ||
| 84 | % Needed solely for the maplist call in paths_and_fragments/5. | |
| 85 | couple_fragment(F1, [F2], F2-F1) :- !. % Special case that can arise. | |
| 86 | couple_fragment(F1, F2, F2-F1). | |
| 87 | ||
| 88 | %% joined_paths_over_root(Root, Fragments1, Fragments2, Paths). | |
| 89 | % | |
| 90 | % Generates paths from the given fragments. | |
| 91 | % This works as joined_paths/4, but only considers fragments of the form | |
| 92 | % 'a-...-n-Root'. | |
| 93 | % The fragments are then joined at the given root into paths. | |
| 94 | joined_paths_over_root(Root, Sub1Fragments, Sub2Fragments, Sub12Paths) :- | |
| 95 | % Strip the root and pass it to joined_paths/4. | |
| 96 | findall(Frag1, member(Frag1-Root, Sub1Fragments), Frag1s), | |
| 97 | findall(Frag2, member(Frag2-Root, Sub2Fragments), Frag2s), | |
| 98 | joined_paths(Root, Frag1s, Frag2s, Sub12Paths). | |
| 99 | ||
| 100 | ||
| 101 | %% joined_sub_paths(Root, SubFragments, Paths). | |
| 102 | % | |
| 103 | % Joins each set of sub fragments with each other over the root as path. | |
| 104 | % | |
| 105 | % Root: Root node of the AST. | |
| 106 | % SubFragments: List of list of fragments of the form 'a-...-n-Root'. | |
| 107 | % Paths: List of resulting paths connected at the Root. | |
| 108 | joined_sub_paths(Root, [Right | Left], Paths) :- | |
| 109 | joined_sub_paths_aux([], Right, Left, [], Root, Paths). | |
| 110 | ||
| 111 | %% joined_sub_paths_aux(Remaining, Curr, LeftOrCurr, RightOfCurr, Root, Paths). | |
| 112 | joined_sub_paths_aux([], _, [], _, _, []). | |
| 113 | joined_sub_paths_aux([], OldCurrent, [NewCurrent | Left], Right, Root, Paths) :- | |
| 114 | NewRight = [OldCurrent | Right], | |
| 115 | joined_sub_paths_aux(NewRight, NewCurrent, Left, NewRight, Root, Paths). | |
| 116 | joined_sub_paths_aux([Frags|Rest], Cur, Left, Right, Root, Paths) :- | |
| 117 | joined_paths_over_root(Root, Cur, Frags, NewPaths), | |
| 118 | % Append to add variable at end of list, then finish list in tail recursion. | |
| 119 | append(NewPaths, NextPaths, Paths), | |
| 120 | joined_sub_paths_aux(Rest, Cur, Left, Right, Root, NextPaths). | |
| 121 | ||
| 122 | ||
| 123 | ||
| 124 | %% joined_paths(Root, LeftFragments, RightFragments, Paths). | |
| 125 | % | |
| 126 | % Generates paths from the given fragments. | |
| 127 | % It is assumed that all fragments on the left should lead up to the root | |
| 128 | % and the right fragments should lead down from the root. | |
| 129 | % | |
| 130 | % For a root 'q', left fragment 'l-frag' and right fragment 'r-frag', | |
| 131 | % the generated path is 'l-frag-root(q)-(frag-r)'. | |
| 132 | joined_paths(Expr, Sub1Fragments, Sub2Fragments, Sub12Paths) :- | |
| 133 | joined_paths_aux([], [], Expr, Sub1Fragments, Sub2Fragments, Sub12Paths). | |
| 134 | ||
| 135 | joined_paths_aux([], _, _, _, [], []). | |
| 136 | joined_paths_aux([], _, Expr, F1s, [F2|F2s], Paths) :- | |
| 137 | % Still some F2 fragments left, so we need to couple them with all F1s. | |
| 138 | joined_paths_aux(F1s, [F2|F2s], Expr, F1s, F2s, Paths). | |
| 139 | joined_paths_aux([_|F1s], [], Expr, AllF1s, F2s, Paths) :- | |
| 140 | % Exhausted all F2s for our current F1, so consider remaining F1s. | |
| 141 | joined_paths_aux(F1s, F2s, Expr, AllF1s, F2s, Paths). | |
| 142 | joined_paths_aux([F1 | Sub1Fragments], [F2 | Sub2Fragments], Expr, AllSub1, RestSub2, [Path | Paths]) :- | |
| 143 | reverse_fragment(F1, RevF1), | |
| 144 | Path = (F2)-root(Expr)-(RevF1), | |
| 145 | joined_paths_aux(Sub1Fragments, [F2 | Sub2Fragments], Expr, AllSub1, RestSub2, Paths). | |
| 146 | ||
| 147 | ||
| 148 | %% reverse_fragment(Fragment, ReversedFragment). | |
| 149 | % | |
| 150 | % Reverses a fragment of the form a-...-n to n-...-a. | |
| 151 | reverse_fragment(Fs-F, RevFragment) :- | |
| 152 | !, | |
| 153 | reverse_fragment_aux(Fs, F, RevFragment). | |
| 154 | reverse_fragment(F, F). | |
| 155 | ||
| 156 | reverse_fragment_aux(Fs-F, Rev, RevFragment) :- | |
| 157 | !, | |
| 158 | reverse_fragment_aux(Fs, Rev-F, RevFragment). | |
| 159 | reverse_fragment_aux(F, Rev, Rev-F). | |
| 160 | ||
| 161 | ||
| 162 | %% constant_fragments(Expr, Constants, Fragments). | |
| 163 | ||
| 164 | % Generates fragments for the given expression with all constants. | |
| 165 | % As constants via bsyntaxtree:syntaxelement/5 may be wrapped as a list, | |
| 166 | % the case of unwrapping constants in such cases is handled here as well. | |
| 167 | constant_fragments(_, [], []) :- !. | |
| 168 | constant_fragments(Expr, [C|Cs], [C-Expr|Fragments]) :- | |
| 169 | !, | |
| 170 | constant_fragments(Expr, Cs, Fragments). | |
| 171 | constant_fragments(Expr, C, [C-Expr]). |