Filename | /usr/local/src/github.com/foswiki/core/lib/Foswiki/Query/HoistREs.pm |
Statements | Executed 20 statements in 3.26ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 59µs | 192µs | BEGIN@58 | Foswiki::Query::HoistREs::
1 | 1 | 1 | 34µs | 43µs | hoist | Foswiki::Query::HoistREs::
1 | 1 | 1 | 28µs | 35µs | BEGIN@52 | Foswiki::Query::HoistREs::
1 | 1 | 1 | 16µs | 34µs | BEGIN@53 | Foswiki::Query::HoistREs::
1 | 1 | 1 | 9µs | 9µs | _hoistAND | Foswiki::Query::HoistREs::
1 | 1 | 1 | 9µs | 9µs | BEGIN@55 | Foswiki::Query::HoistREs::
1 | 1 | 1 | 9µs | 9µs | BEGIN@56 | Foswiki::Query::HoistREs::
0 | 0 | 0 | 0s | 0s | _hoistConstant | Foswiki::Query::HoistREs::
0 | 0 | 0 | 0s | 0s | _hoistDOT | Foswiki::Query::HoistREs::
0 | 0 | 0 | 0s | 0s | _hoistEQ | Foswiki::Query::HoistREs::
0 | 0 | 0 | 0s | 0s | _hoistOR | Foswiki::Query::HoistREs::
0 | 0 | 0 | 0s | 0s | _monTerm | Foswiki::Query::HoistREs::
0 | 0 | 0 | 0s | 0s | _monitor | Foswiki::Query::HoistREs::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # See bottom of file for license and copyright information | ||||
2 | |||||
3 | =begin TML | ||||
4 | |||||
5 | ---+ package Foswiki::Query::HoistREs | ||||
6 | |||||
7 | Static functions to extract regular expressions from queries. The REs can | ||||
8 | be used in caching stores that use the Foswiki standard inline meta-data | ||||
9 | representation to pre-filter topic lists for more efficient query matching. | ||||
10 | |||||
11 | See =Store/QueryAlgorithms/BruteForce.pm= for an example of usage. | ||||
12 | |||||
13 | Note that this hoisting is very crude. At this point of time the | ||||
14 | functions don't attempt to do anything complicated, like re-ordering | ||||
15 | the query. They simply hoist up expressions on either side of an AND, | ||||
16 | where the expressions apply to a single domain. | ||||
17 | |||||
18 | The ideal would be to rewrite the query for AND/OR evaluation i.e. an | ||||
19 | expression of the form (A and B) or (C and D). However this is | ||||
20 | complicated by the fact that there are three search domains (the web | ||||
21 | name, the topic name, and the topic text) that may be freely | ||||
22 | intermixed in the query, but cannot be mixed in the generated search | ||||
23 | expressions. The problem becomes one of rewriting the query to | ||||
24 | separate these three sets. For example, a query such as: | ||||
25 | |||||
26 | name='Topic' OR Field='maes' OR web='Trash' | ||||
27 | |||||
28 | requires three searches. We have to filter on name='Topic', and | ||||
29 | separately filter on Field='maes' and then union the sets. | ||||
30 | |||||
31 | This gets complicated when the sets are intermixed; for example, | ||||
32 | |||||
33 | (name='Topic' OR Field='maes') AND (web='Trash' OR Maes="field") | ||||
34 | |||||
35 | Because the Field= terms on each side of the AND could potentially | ||||
36 | match any topic, we can't usefully hoist the name= or web= sub-terms. | ||||
37 | We can, however, hoist the Field subqueries. Now, what happens when we | ||||
38 | have an expression like this? | ||||
39 | |||||
40 | (name='Topic' OR Field='maes') AND (web='Trash') | ||||
41 | |||||
42 | Obviously we can pre-filter on the web='Trash' term, but we can't | ||||
43 | filter on name="Topic" because it is part of an OR. | ||||
44 | |||||
45 | If you think I'm making this too complicated, please feel free to | ||||
46 | implement your own superior heuristics! | ||||
47 | |||||
48 | =cut | ||||
49 | |||||
50 | package Foswiki::Query::HoistREs; | ||||
51 | |||||
52 | 2 | 50µs | 2 | 43µs | # spent 35µs (28+8) within Foswiki::Query::HoistREs::BEGIN@52 which was called:
# once (28µs+8µs) by Foswiki::Store::Interfaces::QueryAlgorithm::BEGIN@17 at line 52 # spent 35µs making 1 call to Foswiki::Query::HoistREs::BEGIN@52
# spent 8µs making 1 call to strict::import |
53 | 2 | 43µs | 2 | 52µs | # spent 34µs (16+18) within Foswiki::Query::HoistREs::BEGIN@53 which was called:
# once (16µs+18µs) by Foswiki::Store::Interfaces::QueryAlgorithm::BEGIN@17 at line 53 # spent 34µs making 1 call to Foswiki::Query::HoistREs::BEGIN@53
# spent 18µs making 1 call to warnings::import |
54 | |||||
55 | 2 | 42µs | 1 | 9µs | # spent 9µs within Foswiki::Query::HoistREs::BEGIN@55 which was called:
# once (9µs+0s) by Foswiki::Store::Interfaces::QueryAlgorithm::BEGIN@17 at line 55 # spent 9µs making 1 call to Foswiki::Query::HoistREs::BEGIN@55 |
56 | 2 | 78µs | 1 | 9µs | # spent 9µs within Foswiki::Query::HoistREs::BEGIN@56 which was called:
# once (9µs+0s) by Foswiki::Store::Interfaces::QueryAlgorithm::BEGIN@17 at line 56 # spent 9µs making 1 call to Foswiki::Query::HoistREs::BEGIN@56 |
57 | |||||
58 | 2 | 2.98ms | 2 | 324µs | # spent 192µs (59+133) within Foswiki::Query::HoistREs::BEGIN@58 which was called:
# once (59µs+133µs) by Foswiki::Store::Interfaces::QueryAlgorithm::BEGIN@17 at line 58 # spent 192µs making 1 call to Foswiki::Query::HoistREs::BEGIN@58
# spent 133µs making 1 call to constant::import |
59 | |||||
60 | 1 | 2µs | our $indent = 0; | ||
61 | |||||
62 | sub _monitor { | ||||
63 | my @p = map { ref($_) ? $_->stringify() : $_ } @_; | ||||
64 | print STDERR ( ' ' x $indent ) . join( ' ', @p ) . "\n"; | ||||
65 | } | ||||
66 | |||||
67 | =begin TML | ||||
68 | |||||
69 | ---++ StaticMethod hoist($query) -> \%regex_lists | ||||
70 | |||||
71 | Main entry point for the hoister. | ||||
72 | |||||
73 | Returns a hash where the keys are the aspects to be tested | ||||
74 | (web|name|text) and the AND terms represented as lists of regexes, | ||||
75 | each of which is one OR term. | ||||
76 | |||||
77 | There are also keys named "(web|name|text)_source" where the list | ||||
78 | contains what the user entered for that term. | ||||
79 | |||||
80 | =cut | ||||
81 | |||||
82 | # spent 43µs (34+9) within Foswiki::Query::HoistREs::hoist which was called:
# once (34µs+9µs) by Foswiki::Store::QueryAlgorithms::BruteForce::_webQuery at line 104 of /usr/local/src/github.com/foswiki/core/lib/Foswiki/Store/QueryAlgorithms/BruteForce.pm | ||||
83 | 5 | 34µs | my $node = shift; | ||
84 | my %collation; | ||||
85 | |||||
86 | # Gather up all the terms applicable to a particular field | ||||
87 | 1 | 9µs | my @terms = _hoistAND($node); # spent 9µs making 1 call to Foswiki::Query::HoistREs::_hoistAND | ||
88 | foreach my $term (@terms) { | ||||
89 | push( @{ $collation{ $term->{field} } }, $term->{regex} ); | ||||
90 | push( @{ $collation{ $term->{field} . '_source' } }, $term->{source} ); | ||||
91 | } | ||||
92 | |||||
93 | #use Data::Dumper; | ||||
94 | #print STDERR "--- hoisted: ".Dumper(%collation)."\n" if MONITOR_HOIST; | ||||
95 | return \%collation; | ||||
96 | } | ||||
97 | |||||
98 | # Used for MONITOR_HOIST | ||||
99 | sub _monTerm { | ||||
100 | my $term = shift; | ||||
101 | return "$term->{field} => /$term->{regex}/"; | ||||
102 | } | ||||
103 | |||||
104 | # Each collection object in the result contains the field the regex is for, a | ||||
105 | # regex string, and the source string that the user entered. e.g. | ||||
106 | # { | ||||
107 | # field => 'web|name|text', | ||||
108 | # regex => 'Web.*' | ||||
109 | # source => 'Web*' | ||||
110 | # } | ||||
111 | # spent 9µs within Foswiki::Query::HoistREs::_hoistAND which was called:
# once (9µs+0s) by Foswiki::Query::HoistREs::hoist at line 87 | ||||
112 | 2 | 16µs | my $node = shift; | ||
113 | |||||
114 | return () unless ref( $node->{op} ); | ||||
115 | |||||
116 | if ( $node->{op}->{name} eq '(' ) { | ||||
117 | return _hoistAND( $node->{params}[0] ); | ||||
118 | } | ||||
119 | |||||
120 | if ( $node->{op}->{name} eq 'and' ) { | ||||
121 | |||||
122 | # An 'and' conjunction yields a set of individual expressions, | ||||
123 | # each of which must match the data | ||||
124 | my @list = @{ $node->{params} }; | ||||
125 | $indent++; | ||||
126 | my @collect = _hoistAND( shift(@list) ); | ||||
127 | while ( scalar(@list) ) { | ||||
128 | my $term = _hoistOR( shift @list ); | ||||
129 | next unless $term; | ||||
130 | push( @collect, $term ); | ||||
131 | } | ||||
132 | $indent--; | ||||
133 | _monitor( "hoistAND ", $node, | ||||
134 | join( ', ', map { _monTerm($_) } @collect ) ) | ||||
135 | if MONITOR_HOIST; | ||||
136 | return @collect; | ||||
137 | } | ||||
138 | else { | ||||
139 | my $or = _hoistOR($node); | ||||
140 | return ($or) if $or; | ||||
141 | } | ||||
142 | |||||
143 | _monitor( "hoistAND ", $node, " FAILED" ) if MONITOR_HOIST; | ||||
144 | return (); | ||||
145 | } | ||||
146 | |||||
147 | # depth 1; we can handle a sequence of ORs, which we collapse into | ||||
148 | # a common regular expression when they apply to the same field. | ||||
149 | sub _hoistOR { | ||||
150 | my $node = shift; | ||||
151 | |||||
152 | return unless ref( $node->{op} ); | ||||
153 | |||||
154 | if ( $node->{op}->{name} eq '(' ) { | ||||
155 | return _hoistOR( $node->{params}[0] ); | ||||
156 | } | ||||
157 | |||||
158 | if ( $node->{op}->{name} eq 'or' ) { | ||||
159 | my @list = @{ $node->{params} }; | ||||
160 | $indent++; | ||||
161 | my %collection; | ||||
162 | while ( scalar(@list) ) { | ||||
163 | my $term = _hoistEQ( shift(@list) ); | ||||
164 | |||||
165 | # If we fail to hoist the subexpression then it can't | ||||
166 | # be expressed using simple regexes. In this event we can't | ||||
167 | # account for this term in a top-level and, so we have | ||||
168 | # to abort the entire hoist. | ||||
169 | unless ($term) { | ||||
170 | %collection = (); | ||||
171 | last; | ||||
172 | } | ||||
173 | my $collect = $collection{ $term->{field} }; | ||||
174 | if ($collect) { | ||||
175 | |||||
176 | # Combine with previous | ||||
177 | $collect->{regex} .= '|' . $term->{regex}; | ||||
178 | $collect->{source} .= ',' . $term->{source}; | ||||
179 | } | ||||
180 | else { | ||||
181 | $collection{ $term->{field} } = $term; | ||||
182 | } | ||||
183 | } | ||||
184 | $indent--; | ||||
185 | _monitor( "hoistOR ", $node, | ||||
186 | join( ', ', map { _monTerm($_) } values %collection ) ) | ||||
187 | if MONITOR_HOIST; | ||||
188 | |||||
189 | # At this point we have collected terms for all the domains, and | ||||
190 | # if there is only one we can just return it. However if the | ||||
191 | # expression involved more than one domain, we have a "mixed or" | ||||
192 | # and we can't hoist. | ||||
193 | if ( scalar( keys %collection ) == 1 ) { | ||||
194 | return ( values(%collection) )[0]; | ||||
195 | } | ||||
196 | } | ||||
197 | else { | ||||
198 | return _hoistEQ($node); | ||||
199 | } | ||||
200 | |||||
201 | _monitor( "hoistOR ", $node, " FAILED" ) if MONITOR_HOIST; | ||||
202 | return; | ||||
203 | } | ||||
204 | |||||
205 | 1 | 2µs | our $PHOLD = "\000RHS\001"; | ||
206 | |||||
207 | # depth 2: can handle = and ~ expressions | ||||
208 | sub _hoistEQ { | ||||
209 | my $node = shift; | ||||
210 | |||||
211 | return unless ref( $node->{op} ); | ||||
212 | |||||
213 | if ( $node->{op}->{name} eq '(' ) { | ||||
214 | return _hoistEQ( $node->{params}[0] ); | ||||
215 | } | ||||
216 | |||||
217 | # $PHOLD is a placeholder for the RHS term in the regex | ||||
218 | if ( $node->{op}->{name} eq '=' ) { | ||||
219 | $indent++; | ||||
220 | my $lhs = _hoistDOT( $node->{params}[0] ); | ||||
221 | my $rhs = _hoistConstant( $node->{params}[1] ); | ||||
222 | $indent--; | ||||
223 | if ( $lhs && defined $rhs ) { | ||||
224 | $rhs = quotemeta($rhs); | ||||
225 | $lhs->{regex} =~ s/$PHOLD/$rhs/g; | ||||
226 | $lhs->{source} = _hoistConstant( $node->{params}[1] ); | ||||
227 | _monitor( "hoistEQ ", $node, " =>" ) if MONITOR_HOIST; | ||||
228 | return $lhs; | ||||
229 | } | ||||
230 | |||||
231 | # = is symmetric, so try the other order | ||||
232 | $indent++; | ||||
233 | $lhs = _hoistDOT( $node->{params}[1] ); | ||||
234 | $rhs = _hoistConstant( $node->{params}[0] ); | ||||
235 | $indent--; | ||||
236 | if ( $lhs && defined $rhs ) { | ||||
237 | $rhs = quotemeta($rhs); | ||||
238 | $lhs->{regex} =~ s/$PHOLD/$rhs/g; | ||||
239 | $lhs->{source} = _hoistConstant( $node->{params}[0] ); | ||||
240 | _monitor( "hoistEQ ", $node, " <=" ) | ||||
241 | if MONITOR_HOIST; | ||||
242 | return $lhs; | ||||
243 | } | ||||
244 | } | ||||
245 | elsif ( $node->{op}->{name} eq '~' ) { | ||||
246 | $indent++; | ||||
247 | my $lhs = _hoistDOT( $node->{params}[0] ); | ||||
248 | my $rhs = _hoistConstant( $node->{params}[1] ); | ||||
249 | $indent--; | ||||
250 | if ( $lhs && defined $rhs ) { | ||||
251 | $rhs = quotemeta($rhs); | ||||
252 | $rhs =~ s/\\\?/./g; | ||||
253 | $rhs =~ s/\\\*/.*/g; | ||||
254 | $lhs->{regex} =~ s/$PHOLD/$rhs/g; | ||||
255 | $lhs->{source} = _hoistConstant( $node->{params}[1] ); | ||||
256 | _monitor( "hoistEQ ", $node, " ~" ) | ||||
257 | if MONITOR_HOIST; | ||||
258 | return $lhs; | ||||
259 | } | ||||
260 | } | ||||
261 | elsif ( $node->{op}->{name} eq '=~' ) { | ||||
262 | $indent++; | ||||
263 | my $lhs = _hoistDOT( $node->{params}[0] ); | ||||
264 | my $rhs = _hoistConstant( $node->{params}[1] ); | ||||
265 | $indent--; | ||||
266 | if ( $lhs && defined $rhs ) { | ||||
267 | |||||
268 | #need to detect if its a field, or in a text, and if its a field, remove the ^$ chars... | ||||
269 | #or if there are no ^$, add .*'s if they are not present | ||||
270 | if ( $lhs->{regex} ne $PHOLD ) { | ||||
271 | if ( ( not( $rhs =~ /^\^/ ) ) | ||||
272 | and ( not( $rhs =~ /^\.\*/ ) ) ) | ||||
273 | { | ||||
274 | $rhs = '.*' . $rhs; | ||||
275 | } | ||||
276 | |||||
277 | if ( ( not( $rhs =~ /\$$/ ) ) | ||||
278 | and ( not( $rhs =~ /\.\*$/ ) ) ) | ||||
279 | { | ||||
280 | $rhs = $rhs . '.*'; | ||||
281 | } | ||||
282 | |||||
283 | #if we're embedding the regex into another, then remove the ^'s | ||||
284 | $rhs =~ s/^\^//; | ||||
285 | $rhs =~ s/\$$//; | ||||
286 | } | ||||
287 | $lhs->{regex} =~ s/$PHOLD/$rhs/g; | ||||
288 | $lhs->{source} = _hoistConstant( $node->{params}[1] ); | ||||
289 | _monitor( "hoistEQ ", $node, " =~" ) | ||||
290 | if MONITOR_HOIST; | ||||
291 | return $lhs; | ||||
292 | } | ||||
293 | } | ||||
294 | |||||
295 | _monitor( "hoistEQ ", $node, " FAILED" ) if MONITOR_HOIST; | ||||
296 | return; | ||||
297 | } | ||||
298 | |||||
299 | # Expecting a (root level) field access expression. This must be of the form | ||||
300 | # <name> | ||||
301 | # or | ||||
302 | # <rootfield>.<name> | ||||
303 | # <rootfield> may be aliased | ||||
304 | sub _hoistDOT { | ||||
305 | my $node = shift; | ||||
306 | |||||
307 | if ( ref( $node->{op} ) && $node->{op}->{name} eq '(' ) { | ||||
308 | return _hoistDOT( $node->{params}[0] ); | ||||
309 | } | ||||
310 | |||||
311 | if ( ref( $node->{op} ) && $node->{op}->{name} eq '.' ) { | ||||
312 | my $lhs = $node->{params}[0]; | ||||
313 | my $rhs = $node->{params}[1]; | ||||
314 | if ( !ref( $lhs->{op} ) | ||||
315 | && !ref( $rhs->{op} ) | ||||
316 | && $lhs->{op} eq Foswiki::Infix::Node::NAME | ||||
317 | && $rhs->{op} eq Foswiki::Infix::Node::NAME ) | ||||
318 | { | ||||
319 | $lhs = $lhs->{params}[0]; | ||||
320 | $rhs = $rhs->{params}[0]; | ||||
321 | if ( $Foswiki::Query::Node::aliases{$lhs} ) { | ||||
322 | $lhs = $Foswiki::Query::Node::aliases{$lhs}; | ||||
323 | } | ||||
324 | if ( $lhs =~ /^META:/ ) { | ||||
325 | |||||
326 | _monitor( "hoist DOT ", $node, " => $rhs" ) | ||||
327 | if MONITOR_HOIST; | ||||
328 | |||||
329 | # $PHOLD is a placholder for the RHS term | ||||
330 | return { | ||||
331 | field => 'text', | ||||
332 | regex => '^%' . $lhs . '{.*\\b' . $rhs . "=\\\"$PHOLD\\\"" | ||||
333 | }; | ||||
334 | } | ||||
335 | |||||
336 | # Otherwise assume the term before the dot is the form name | ||||
337 | if ( $rhs eq 'text' ) { | ||||
338 | |||||
339 | _monitor( "hoist DOT ", $node, " => formname" ) | ||||
340 | if MONITOR_HOIST; | ||||
341 | |||||
342 | # Special case for the text body | ||||
343 | return { field => 'text', regex => $PHOLD }; | ||||
344 | } | ||||
345 | else { | ||||
346 | _monitor( "hoist DOT ", $node, " => fieldname" ) | ||||
347 | if MONITOR_HOIST; | ||||
348 | return { | ||||
349 | field => 'text', | ||||
350 | regex => | ||||
351 | "^%META:FIELD{name=\\\"$rhs\\\".*\\bvalue=\\\"$PHOLD\\\"" | ||||
352 | }; | ||||
353 | } | ||||
354 | |||||
355 | } | ||||
356 | } | ||||
357 | elsif ( !ref( $node->{op} ) && $node->{op} eq Foswiki::Infix::Node::NAME ) { | ||||
358 | if ( $node->{params}[0] eq 'name' ) { | ||||
359 | |||||
360 | # Special case for the topic name | ||||
361 | _monitor( "hoist DOT ", $node, " => topic" ) | ||||
362 | if MONITOR_HOIST; | ||||
363 | return { field => 'name', regex => $PHOLD }; | ||||
364 | } | ||||
365 | elsif ( $node->{params}[0] eq 'web' ) { | ||||
366 | |||||
367 | # Special case for the web name | ||||
368 | _monitor( "hoist DOT ", $node, " => web" ) | ||||
369 | if MONITOR_HOIST; | ||||
370 | return { field => 'web', regex => $PHOLD }; | ||||
371 | } | ||||
372 | elsif ( $node->{params}[0] eq 'text' ) { | ||||
373 | |||||
374 | # Special case for the text body | ||||
375 | _monitor( "hoist DOT ", $node, " => text" ) | ||||
376 | if MONITOR_HOIST; | ||||
377 | return { field => 'text', regex => $PHOLD }; | ||||
378 | } | ||||
379 | else { | ||||
380 | _monitor( "hoist DOT ", $node, " => field" ) | ||||
381 | if MONITOR_HOIST; | ||||
382 | return { | ||||
383 | field => 'text', | ||||
384 | regex => | ||||
385 | "^%META:FIELD{name=\\\"$node->{params}[0]\\\".*\\bvalue=\\\"$PHOLD\\\"" | ||||
386 | }; | ||||
387 | } | ||||
388 | } | ||||
389 | |||||
390 | _monitor( "hoistDOT ", $node, " FAILED" ) if MONITOR_HOIST; | ||||
391 | return; | ||||
392 | } | ||||
393 | |||||
394 | # Expecting a constant | ||||
395 | sub _hoistConstant { | ||||
396 | my $node = shift; | ||||
397 | |||||
398 | if ( | ||||
399 | !ref( $node->{op} ) | ||||
400 | && ( $node->{op} eq Foswiki::Infix::Node::STRING | ||||
401 | || $node->{op} eq Foswiki::Infix::Node::NUMBER ) | ||||
402 | ) | ||||
403 | { | ||||
404 | _monitor( "hoist CONST ", $node, " => $node->{params}[0]" ) | ||||
405 | if MONITOR_HOIST; | ||||
406 | return $node->{params}[0]; | ||||
407 | } | ||||
408 | return; | ||||
409 | } | ||||
410 | |||||
411 | 1 | 6µs | 1; | ||
412 | __END__ |