@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement the "present" and "absent" operators in the Ferret execution engine

Summary:
Ref T13509. Now that the compiler can parse these queries, actually implement them.

These are fairly easy to implement:

- For present, just "JOIN". If it works, the field is present.
- For absent, we "LEFT JOIN" and then "WHERE any_column IS NULL".

Test Plan: Searched for various documents with and without fields present, got sensible results in Maniphest. For example, "body:-" finds tasks with no body, "body:- duck" finds tasks with no body and "duck" elsewhere in the content, and so on.

Maniphest Tasks: T13509

Differential Revision: https://secure.phabricator.com/D21110

+52 -8
+52 -8
src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
··· 1801 1801 $this->ferretEngine = $engine; 1802 1802 $this->ferretTokens = $fulltext_tokens; 1803 1803 1804 + $op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT; 1805 + 1804 1806 $default_function = $engine->getDefaultFunctionKey(); 1805 1807 $table_map = array(); 1806 1808 $idx = 1; 1807 1809 foreach ($this->ferretTokens as $fulltext_token) { 1808 1810 $raw_token = $fulltext_token->getToken(); 1809 - $function = $raw_token->getFunction(); 1810 1811 1812 + $function = $raw_token->getFunction(); 1811 1813 if ($function === null) { 1812 1814 $function = $default_function; 1813 1815 } 1814 1816 1815 1817 $raw_field = $engine->getFieldForFunction($function); 1816 1818 1819 + // NOTE: The query compiler guarantees that a query can not make a 1820 + // field both "present" and "absent", so it's safe to just use the 1821 + // first operator we encounter to determine whether the table is 1822 + // optional or not. 1823 + 1824 + $operator = $raw_token->getOperator(); 1825 + $is_optional = ($operator === $op_absent); 1826 + 1817 1827 if (!isset($table_map[$function])) { 1818 1828 $alias = 'ftfield_'.$idx++; 1819 1829 $table_map[$function] = array( 1820 1830 'alias' => $alias, 1821 1831 'key' => $raw_field, 1832 + 'optional' => $is_optional, 1822 1833 ); 1823 1834 } 1824 1835 } ··· 1966 1977 1967 1978 $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; 1968 1979 $op_not = PhutilSearchQueryCompiler::OPERATOR_NOT; 1980 + $op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT; 1981 + $op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT; 1969 1982 1970 1983 $engine = $this->ferretEngine; 1971 1984 $stemmer = $engine->newStemmer(); ··· 1976 1989 foreach ($this->ferretTokens as $fulltext_token) { 1977 1990 $raw_token = $fulltext_token->getToken(); 1978 1991 1992 + $operator = $raw_token->getOperator(); 1993 + 1979 1994 // If this is a negated term like "-pomegranate", don't join the ngram 1980 1995 // table since we aren't looking for documents with this term. (We could 1981 1996 // LEFT JOIN the table and require a NULL row, but this is probably more 1982 1997 // trouble than it's worth.) 1983 - if ($raw_token->getOperator() == $op_not) { 1998 + if ($operator === $op_not) { 1999 + continue; 2000 + } 2001 + 2002 + // Neither the "present" or "absent" operators benefit from joining 2003 + // the ngram table. 2004 + if ($operator === $op_absent || $operator === $op_present) { 1984 2005 continue; 1985 2006 } 1986 2007 ··· 2143 2164 $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; 2144 2165 $op_not = PhutilSearchQueryCompiler::OPERATOR_NOT; 2145 2166 $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT; 2167 + $op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT; 2168 + $op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT; 2146 2169 2147 2170 $where = array(); 2148 - $current_function = 'all'; 2171 + $default_function = $engine->getDefaultFunctionKey(); 2149 2172 foreach ($this->ferretTokens as $fulltext_token) { 2150 2173 $raw_token = $fulltext_token->getToken(); 2151 2174 $value = $raw_token->getValue(); 2152 2175 2153 2176 $function = $raw_token->getFunction(); 2154 2177 if ($function === null) { 2155 - $function = $current_function; 2178 + $function = $default_function; 2156 2179 } 2157 - $current_function = $function; 2180 + 2181 + $operator = $raw_token->getOperator(); 2158 2182 2159 2183 $table_alias = $table_map[$function]['alias']; 2160 2184 2161 - $is_not = ($raw_token->getOperator() == $op_not); 2185 + // If this is a "field is present" operator, we've already implicitly 2186 + // guaranteed this by JOINing the table. We don't need to do any 2187 + // more work. 2188 + $is_present = ($operator === $op_present); 2189 + if ($is_present) { 2190 + continue; 2191 + } 2162 2192 2163 - if ($raw_token->getOperator() == $op_sub) { 2193 + // If this is a "field is absent" operator, we just want documents 2194 + // which failed to match to a row when we LEFT JOINed the table. This 2195 + // means there's no index for the field. 2196 + $is_absent = ($operator === $op_absent); 2197 + if ($is_absent) { 2198 + $where[] = qsprintf( 2199 + $conn, 2200 + '(%T.rawCorpus IS NULL)', 2201 + $table_alias); 2202 + continue; 2203 + } 2204 + 2205 + $is_not = ($operator === $op_not); 2206 + 2207 + if ($operator == $op_sub) { 2164 2208 $is_substring = true; 2165 2209 } else { 2166 2210 $is_substring = false; 2167 2211 } 2168 2212 2169 2213 // If we're doing exact search, just test the raw corpus. 2170 - $is_exact = ($raw_token->getOperator() == $op_exact); 2214 + $is_exact = ($operator === $op_exact); 2171 2215 if ($is_exact) { 2172 2216 if ($is_not) { 2173 2217 $where[] = qsprintf(