search.extender.inc

Search query extender and helper functions.

File

modules/search/search.extender.inc

View source
<?php


/**
 * @file
 * Search query extender and helper functions.
 */

/**
 * Do a query on the full-text search index for a word or words.
 *
 * This function is normally only called by each module that supports the
 * indexed search (and thus, implements hook_update_index()).
 *
 * Results are retrieved in two logical passes. However, the two passes are
 * joined together into a single query. And in the case of most simple
 * queries the second pass is not even used.
 *
 * The first pass selects a set of all possible matches, which has the benefit
 * of also providing the exact result set for simple "AND" or "OR" searches.
 *
 * The second portion of the query further refines this set by verifying
 * advanced text conditions (such as negative or phrase matches).
 *
 * The used query object has the tag 'search_$module' and can be further
 * extended with hook_query_alter().
 */
class SearchQuery extends SelectQueryExtender {
    
    /**
     * The search query that is used for searching.
     *
     * @var string
     */
    protected $searchExpression;
    
    /**
     * Type of search (search module).
     *
     * This maps to the value of the type column in search_index, and is equal
     * to the machine-readable name of the module that implements
     * hook_search_info().
     *
     * @var string
     */
    protected $type;
    
    /**
     * Positive and negative search keys.
     *
     * @var array
     */
    protected $keys = array(
        'positive' => array(),
        'negative' => array(),
    );
    
    /**
     * Indicates whether the first pass query requires complex conditions (LIKE).
     *
     * @var boolean.
     */
    protected $simple = TRUE;
    
    /**
     * Conditions that are used for exact searches.
     *
     * This is always used for the second pass query but not for the first pass,
     * unless $this->simple is FALSE.
     *
     * @var DatabaseCondition
     */
    protected $conditions;
    
    /**
     * Indicates how many matches for a search query are necessary.
     *
     * @var int
     */
    protected $matches = 0;
    
    /**
     * Array of search words.
     *
     * These words have to match against {search_index}.word.
     *
     * @var array
     */
    protected $words = array();
    
    /**
     * Multiplier for the normalized search score.
     *
     * This value is calculated by the first pass query and multiplied with the
     * actual score of a specific word to make sure that the resulting calculated
     * score is between 0 and 1.
     *
     * @var float
     */
    protected $normalize;
    
    /**
     * Indicates whether the first pass query has been executed.
     *
     * @var boolean
     */
    protected $executedFirstPass = FALSE;
    
    /**
     * Stores score expressions.
     *
     * @var array
     *
     * @see addScore()
     */
    protected $scores = array();
    
    /**
     * Stores arguments for score expressions.
     *
     * @var array
     */
    protected $scoresArguments = array();
    
    /**
     * Stores multipliers for score expressions.
     *
     * @var array
     */
    protected $multiply = array();
    
    /**
     * Whether or not search expressions were ignored.
     *
     * The maximum number of AND/OR combinations exceeded can be configured to
     * avoid Denial-of-Service attacks. Expressions beyond the limit are ignored.
     *
     * @var boolean
     */
    protected $expressionsIgnored = FALSE;
    
    /**
     * Sets up the search query expression.
     *
     * @param $query
     *   A search query string, which can contain options.
     * @param $module
     *   The search module. This maps to {search_index}.type in the database.
     *
     * @return
     *   The SearchQuery object.
     */
    public function searchExpression($expression, $module) {
        $this->searchExpression = $expression;
        $this->type = $module;
        // Add a search_* tag. This needs to be added before any preExecute methods
        // for decorated queries are called, as $this->prepared will be set to TRUE
        // and tags added in the execute method will never get used. For example,
        // if $query is extended by 'SearchQuery' then 'PagerDefault', the
        // search-specific tag will be added too late (when preExecute() has
        // already been called from the PagerDefault extender), and as a
        // consequence will not be available to hook_query_alter() implementations,
        // nor will the correct hook_query_TAG_alter() implementations get invoked.
        // See node_search_execute().
        $this->addTag('search_' . $module);
        return $this;
    }
    
    /**
     * Applies a search option and removes it from the search query string.
     *
     * These options are in the form option:value,value2,value3.
     *
     * @param $option
     *   Name of the option.
     * @param $column
     *   Name of the database column to which the value should be applied.
     *
     * @return
     *   TRUE if a value for that option was found, FALSE if not.
     */
    public function setOption($option, $column) {
        if ($values = search_expression_extract($this->searchExpression, $option)) {
            $or = db_or();
            foreach (explode(',', $values) as $value) {
                $or->condition($column, $value);
            }
            $this->condition($or);
            $this->searchExpression = search_expression_insert($this->searchExpression, $option);
            return TRUE;
        }
        return FALSE;
    }
    
    /**
     * Parses the search query into SQL conditions.
     *
     * We build two queries that match the dataset bodies.
     */
    protected function parseSearchExpression() {
        // Matchs words optionally prefixed by a dash. A word in this case is
        // something between two spaces, optionally quoted.
        preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression, $keywords, PREG_SET_ORDER);
        if (count($keywords) == 0) {
            return;
        }
        // Classify tokens.
        $or = FALSE;
        $warning = '';
        $limit_combinations = variable_get('search_and_or_limit', 7);
        // The first search expression does not count as AND.
        $and_count = -1;
        $or_count = 0;
        foreach ($keywords as $match) {
            if ($or_count && $and_count + $or_count >= $limit_combinations) {
                // Ignore all further search expressions to prevent Denial-of-Service
                // attacks using a high number of AND/OR combinations.
                $this->expressionsIgnored = TRUE;
                break;
            }
            $phrase = FALSE;
            // Strip off phrase quotes.
            if ($match[2][0] == '"') {
                $match[2] = substr($match[2], 1, -1);
                $phrase = TRUE;
                $this->simple = FALSE;
            }
            // Simplify keyword according to indexing rules and external
            // preprocessors. Use same process as during search indexing, so it
            // will match search index.
            $words = search_simplify($match[2]);
            // Re-explode in case simplification added more words, except when
            // matching a phrase.
            $words = $phrase ? array(
                $words,
            ) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
            // Negative matches.
            if ($match[1] == '-') {
                $this->keys['negative'] = array_merge($this->keys['negative'], $words);
            }
            elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
                $last = array_pop($this->keys['positive']);
                // Starting a new OR?
                if (!is_array($last)) {
                    $last = array(
                        $last,
                    );
                }
                $this->keys['positive'][] = $last;
                $or = TRUE;
                $or_count++;
                continue;
            }
            elseif ($match[2] == 'AND' || $match[2] == 'and') {
                $warning = $match[2];
                continue;
            }
            else {
                if ($match[2] == 'or') {
                    $warning = $match[2];
                }
                if ($or) {
                    // Add to last element (which is an array).
                    $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
                }
                else {
                    $this->keys['positive'] = array_merge($this->keys['positive'], $words);
                    $and_count++;
                }
            }
            $or = FALSE;
        }
        // Convert keywords into SQL statements.
        $this->conditions = db_and();
        $simple_and = FALSE;
        $simple_or = FALSE;
        // Positive matches.
        foreach ($this->keys['positive'] as $key) {
            // Group of ORed terms.
            if (is_array($key) && count($key)) {
                $simple_or = TRUE;
                $any = FALSE;
                $queryor = db_or();
                foreach ($key as $or) {
                    list($num_new_scores) = $this->parseWord($or);
                    $any |= $num_new_scores;
                    $queryor->condition('d.data', "% {$or} %", 'LIKE');
                }
                if (count($queryor)) {
                    $this->conditions
                        ->condition($queryor);
                    // A group of OR keywords only needs to match once.
                    $this->matches += $any > 0;
                }
            }
            else {
                $simple_and = TRUE;
                list($num_new_scores, $num_valid_words) = $this->parseWord($key);
                $this->conditions
                    ->condition('d.data', "% {$key} %", 'LIKE');
                if (!$num_valid_words) {
                    $this->simple = FALSE;
                }
                // Each AND keyword needs to match at least once.
                $this->matches += $num_new_scores;
            }
        }
        if ($simple_and && $simple_or) {
            $this->simple = FALSE;
        }
        // Negative matches.
        foreach ($this->keys['negative'] as $key) {
            $this->conditions
                ->condition('d.data', "% {$key} %", 'NOT LIKE');
            $this->simple = FALSE;
        }
        if ($warning == 'or') {
            drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
        }
    }
    
    /**
     * Helper function for parseQuery().
     */
    protected function parseWord($word) {
        $num_new_scores = 0;
        $num_valid_words = 0;
        // Determine the scorewords of this word/phrase.
        $split = explode(' ', $word);
        foreach ($split as $s) {
            $num = is_numeric($s);
            if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
                if (!isset($this->words[$s])) {
                    $this->words[$s] = $s;
                    $num_new_scores++;
                }
                $num_valid_words++;
            }
        }
        // Return matching snippet and number of added words.
        return array(
            $num_new_scores,
            $num_valid_words,
        );
    }
    
    /**
     * Executes the first pass query.
     *
     * This can either be done explicitly, so that additional scores and
     * conditions can be applied to the second pass query, or implicitly by
     * addScore() or execute().
     *
     * @return
     *   TRUE if search items exist, FALSE if not.
     */
    public function executeFirstPass() {
        $this->parseSearchExpression();
        if (count($this->words) == 0) {
            form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
            return FALSE;
        }
        if ($this->expressionsIgnored) {
            drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array(
                '@count' => variable_get('search_and_or_limit', 7),
            )), 'warning');
        }
        $this->executedFirstPass = TRUE;
        if (!empty($this->words)) {
            $or = db_or();
            foreach ($this->words as $word) {
                $or->condition('i.word', $word);
            }
            $this->condition($or);
        }
        // Build query for keyword normalization.
        $this->join('search_total', 't', 'i.word = t.word');
        $this->condition('i.type', $this->type)
            ->groupBy('i.type')
            ->groupBy('i.sid')
            ->having('COUNT(*) >= :matches', array(
            ':matches' => $this->matches,
        ));
        // Clone the query object to do the firstPass query;
        $first = clone $this->query;
        // For complex search queries, add the LIKE conditions to the first pass query.
        if (!$this->simple) {
            $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
            $first->condition($this->conditions);
        }
        // Calculate maximum keyword relevance, to normalize it.
        $first->addExpression('SUM(i.score * t.count)', 'calculated_score');
        $this->normalize = $first->range(0, 1)
            ->orderBy('calculated_score', 'DESC')
            ->execute()
            ->fetchField();
        if ($this->normalize) {
            return TRUE;
        }
        return FALSE;
    }
    
    /**
     * Adds a custom score expression to the search query.
     *
     * Score expressions are used to order search results. If no calls to
     * addScore() have taken place, a default keyword relevance score will be
     * used. However, if at least one call to addScore() has taken place, the
     * keyword relevance score is not automatically added.
     *
     * Note that you must use this method to add ordering to your searches, and
     * not call orderBy() directly, when using the SearchQuery extender. This is
     * because of the two-pass system the SearchQuery class uses to normalize
     * scores.
     *
     * @param $score
     *   The score expression, which should evaluate to a number between 0 and 1.
     *   The string 'i.relevance' in a score expression will be replaced by a
     *   measure of keyword relevance between 0 and 1.
     * @param $arguments
     *   Query arguments needed to provide values to the score expression.
     * @param $multiply
     *   If set, the score is multiplied with this value. However, all scores
     *   with multipliers are then divided by the total of all multipliers, so
     *   that overall, the normalization is maintained.
     *
     * @return object
     *   The updated query object.
     */
    public function addScore($score, $arguments = array(), $multiply = FALSE) {
        if ($multiply) {
            $i = count($this->multiply);
            // Modify the score expression so it is multiplied by the multiplier,
            // with a divisor to renormalize.
            $score = "CAST(:multiply_{$i} AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_{$i} AS DECIMAL)";
            // Add an argument for the multiplier. The :total_$i argument is taken
            // care of in the execute() method, which is when the total divisor is
            // calculated.
            $arguments[':multiply_' . $i] = $multiply;
            $this->multiply[] = $multiply;
        }
        $this->scores[] = $score;
        $this->scoresArguments += $arguments;
        return $this;
    }
    
    /**
     * Executes the search.
     *
     * If not already done, this executes the first pass query. Then the complex
     * conditions are applied to the query including score expressions and
     * ordering.
     *
     * @return
     *   FALSE if the first pass query returned no results, and a database result
     *   set if there were results.
     */
    public function execute() {
        if (!$this->executedFirstPass) {
            $this->executeFirstPass();
        }
        if (!$this->normalize) {
            return new DatabaseStatementEmpty();
        }
        // Add conditions to query.
        $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
        $this->condition($this->conditions);
        if (empty($this->scores)) {
            // Add default score.
            $this->addScore('i.relevance');
        }
        if (count($this->multiply)) {
            // Re-normalize scores with multipliers by dividing by the total of all
            // multipliers. The expressions were altered in addScore(), so here just
            // add the arguments for the total.
            $i = 0;
            $sum = array_sum($this->multiply);
            foreach ($this->multiply as $total) {
                $this->scoresArguments[':total_' . $i] = $sum;
                $i++;
            }
        }
        // Replace the pseudo-expression 'i.relevance' with a measure of keyword
        // relevance in all score expressions, using string replacement. Careful
        // though! If you just print out a float, some locales use ',' as the
        // decimal separator in PHP, while SQL always uses '.'. So, make sure to
        // set the number format correctly.
        $relevance = number_format(1.0 / $this->normalize, 10, '.', '');
        $this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores);
        // Add all scores together to form a query field.
        $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
        // If an order has not yet been set for this query, add a default order
        // that sorts by the calculated sum of scores.
        if (count($this->getOrderBy()) == 0) {
            $this->orderBy('calculated_score', 'DESC');
        }
        // Add useful metadata.
        $this->addMetaData('normalize', $this->normalize)
            ->fields('i', array(
            'type',
            'sid',
        ));
        return $this->query
            ->execute();
    }
    
    /**
     * Builds the default count query for SearchQuery.
     *
     * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
     * also add the same conditions as execute() because countQuery() is called
     * first.
     */
    public function countQuery() {
        // Clone the inner query.
        $inner = clone $this->query;
        // Add conditions to query.
        $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
        $inner->condition($this->conditions);
        // Remove existing fields and expressions, they are not needed for a count
        // query.
        $fields =& $inner->getFields();
        $fields = array();
        $expressions =& $inner->getExpressions();
        $expressions = array();
        // Add the sid as the only field and count them as a subquery.
        $count = db_select($inner->fields('i', array(
            'sid',
        )), NULL, array(
            'target' => 'slave',
        ));
        // Add the COUNT() expression.
        $count->addExpression('COUNT(*)');
        return $count;
    }

}

Classes

Title Deprecated Summary
SearchQuery Do a query on the full-text search index for a word or words.

Buggy or inaccurate documentation? Please file an issue. Need support? Need help programming? Connect with the Drupal community.