function taxonomy_update_7005

Migrate {taxonomy_term_node} table to field storage.

@todo: This function can possibly be made much faster by wrapping a transaction around all the inserts.

File

modules/taxonomy/taxonomy.install, line 527

Code

function taxonomy_update_7005(&$sandbox) {
    // $sandbox contents:
    // - total: The total number of term_node relationships to migrate.
    // - count: The number of term_node relationships that have been
    //   migrated so far.
    // - last: The db_query_range() offset to use when querying
    //   term_node; this field is incremented in quantities of $batch
    //   (1000) but at the end of each call to this function, last and
    //   count are the same.
    // - vocabularies: An associative array mapping vocabulary id and node
    //   type to field name. If a voc id/node type pair does not appear
    //   in this array but a term_node relationship exists mapping a
    //   term in voc id to node of that type, the relationship is
    //   assigned to the taxonomymyextra field which allows terms of all
    //   vocabularies.
    // - cursor[values], cursor[deltas]: The contents of $values and
    //   $deltas at the end of the previous call to this function. These
    //   need to be preserved across calls because a single batch of
    //   1000 rows from term_node may end in the middle of the terms for
    //   a single node revision.
    //
    // $values is the array of values about to be/most recently inserted
    // into the SQL data table for the taxonomy_term_reference
    // field. Before $values is constructed for each record, the
    // $values from the previous insert is checked to see if the two
    // records are for the same node revision id; this enables knowing
    // when to reset the delta counters which are incremented across all
    // terms for a single field on a single revision, but reset for each
    // new field and revision.
    //
    // $deltas is an associative array mapping field name to the number
    // of term references stored so far for the current revision, which
    // provides the delta value for each term reference data insert. The
    // deltas are reset for each new revision.
    $conditions = array(
        'type' => 'taxonomy_term_reference',
        'deleted' => 0,
    );
    $field_info = _update_7000_field_read_fields($conditions, 'field_name');
    // This is a multi-pass update. On the first call we need to initialize some
    // variables.
    if (!isset($sandbox['total'])) {
        $sandbox['last'] = 0;
        $sandbox['count'] = 0;
        // Run the same joins as the query that is used later to retrieve the
        // term_node data, this ensures that bad records in that table - for
        // tids which aren't in taxonomy_term_data or nids which aren't in {node}
        // are not included in the count.
        $sandbox['total'] = db_query('SELECT COUNT(*) FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid INNER JOIN {node} n ON tn.nid = n.nid LEFT JOIN {node} n2 ON tn.vid = n2.vid')->fetchField();
        // Use an inline version of Drupal 6 taxonomy_get_vocabularies() here since
        // we can no longer rely on $vocabulary->nodes from the API function.
        $result = db_query('SELECT v.vid, v.machine_name, n.type FROM {taxonomy_vocabulary} v INNER JOIN {taxonomy_vocabulary_node_type} n ON v.vid = n.vid');
        $vocabularies = array();
        foreach ($result as $record) {
            // If no node types are associated with a vocabulary, the LEFT JOIN will
            // return a NULL value for type.
            if (isset($record->type)) {
                $vocabularies[$record->vid][$record->type] = 'taxonomy_' . $record->machine_name;
            }
        }
        if (!empty($vocabularies)) {
            $sandbox['vocabularies'] = $vocabularies;
        }
        db_create_table('taxonomy_update_7005', array(
            'description' => 'Stores temporary data for taxonomy_update_7005.',
            'fields' => array(
                'n' => array(
                    'description' => 'Preserve order.',
                    'type' => 'serial',
                    'unsigned' => TRUE,
                    'not null' => TRUE,
                ),
                'vocab_id' => array(
                    'type' => 'int',
                    'unsigned' => TRUE,
                    'not null' => TRUE,
                    'default' => 0,
                ),
                'tid' => array(
                    'type' => 'int',
                    'unsigned' => TRUE,
                    'not null' => TRUE,
                ),
                'nid' => array(
                    'type' => 'int',
                    'unsigned' => TRUE,
                    'not null' => TRUE,
                ),
                'vid' => array(
                    'type' => 'int',
                    'unsigned' => TRUE,
                    'not null' => FALSE,
                    'default' => NULL,
                ),
                'type' => array(
                    'type' => 'varchar',
                    'length' => 32,
                    'not null' => TRUE,
                    'default' => '',
                ),
                'created' => array(
                    'type' => 'int',
                    'not null' => FALSE,
                ),
                'sticky' => array(
                    'type' => 'int',
                    'not null' => FALSE,
                ),
                'status' => array(
                    'type' => 'int',
                    'not null' => FALSE,
                ),
                'is_current' => array(
                    'type' => 'int',
                    'unsigned' => TRUE,
                    'not null' => FALSE,
                ),
            ),
            'primary key' => array(
                'n',
            ),
        ));
        // Query selects all revisions at once and processes them in revision and
        // term weight order.
        $query = db_select('taxonomy_term_data', 'td');
        // We are migrating term-node relationships. If there are none for a
        // term, we do not need the term_data row.
        $query->join('taxonomy_term_node', 'tn', 'td.tid = tn.tid');
        // If a term-node relationship exists for a nid that does not exist, we
        // cannot migrate it as we have no node to relate it to; thus we do not
        // need that row from term_node.
        $query->join('node', 'n', 'tn.nid = n.nid');
        // If the current term-node relationship is for the current revision of
        // the node, this left join will match and is_current will be non-NULL
        // (we also get the current sticky and created in this case). This
        // tells us whether to insert into the current data tables in addition
        // to the revision data tables.
        $query->leftJoin('node', 'n2', 'tn.vid = n2.vid');
        $query->addField('td', 'vid', 'vocab_id');
        $query->addField('td', 'tid');
        $query->addField('tn', 'nid');
        $query->addField('tn', 'vid');
        $query->addField('n', 'type');
        $query->addField('n2', 'created');
        $query->addField('n2', 'sticky');
        $query->addField('n2', 'status');
        $query->addField('n2', 'nid', 'is_current');
        // This query must return a consistent ordering across multiple calls.
        // We need them ordered by node vid (since we use that to decide when
        // to reset the delta counters) and by term weight so they appear
        // within each node in weight order. However, tn.vid,td.weight is not
        // guaranteed to be unique, so we add tn.tid as an additional sort key
        // because tn.tid,tn.vid is the primary key of the D6 term_node table
        // and so is guaranteed unique. Unfortunately it also happens to be in
        // the wrong order which is less efficient, but c'est la vie.
        $query->orderBy('tn.vid');
        $query->orderBy('td.weight');
        $query->orderBy('tn.tid');
        // Work around a bug in the PostgreSQL driver that would result in fatal
        // errors when this subquery is used in the insert query below. See
        // https://drupal.org/node/2057693.
        $fields =& $query->getFields();
        unset($fields['td.weight']);
        unset($fields['tn.tid']);
        db_insert('taxonomy_update_7005')->from($query)
            ->execute();
    }
    else {
        // We do each pass in batches of 1000.
        $batch = 1000;
        $result = db_query_range('SELECT vocab_id, tid, nid, vid, type, created, sticky, status, is_current FROM {taxonomy_update_7005} ORDER BY n', $sandbox['last'], $batch);
        if (isset($sandbox['cursor'])) {
            $values = $sandbox['cursor']['values'];
            $deltas = $sandbox['cursor']['deltas'];
        }
        else {
            $deltas = array();
        }
        foreach ($result as $record) {
            $sandbox['count'] += 1;
            // Use the valid field for this vocabulary and node type or use the
            // overflow vocabulary if there is no valid field.
            $field_name = isset($sandbox['vocabularies'][$record->vocab_id][$record->type]) ? $sandbox['vocabularies'][$record->vocab_id][$record->type] : 'taxonomyextra';
            $field = $field_info[$field_name];
            // Start deltas from 0, and increment by one for each term attached to a
            // node.
            if (!isset($deltas[$field_name])) {
                $deltas[$field_name] = 0;
            }
            if (isset($values)) {
                // If the last inserted revision_id is the same as the current record,
                // use the previous deltas to calculate the next delta.
                if ($record->vid == $values[2]) {
                    // For limited cardinality fields, the delta must not be allowed to
                    // exceed the cardinality during the update. So ensure that the
                    // delta about to be inserted is within this limit.
                    // @see field_default_validate().
                    if ($field['cardinality'] != FIELD_CARDINALITY_UNLIMITED && $deltas[$field_name] + 1 > $field['cardinality']) {
                        // For excess values of a single-term vocabulary, switch over to
                        // the overflow field.
                        $field_name = 'taxonomyextra';
                        $field = $field_info[$field_name];
                        if (!isset($deltas[$field_name])) {
                            $deltas[$field_name] = 0;
                        }
                    }
                }
                else {
                    // When the record is a new revision, empty the deltas array.
                    $deltas = array(
                        $field_name => 0,
                    );
                }
            }
            // Table and column found in the field's storage details. During upgrades,
            // it's always SQL.
            $table_name = "field_data_{$field_name}";
            $revision_name = "field_revision_{$field_name}";
            $value_column = $field_name . '_tid';
            // Column names and values in field storage are the same for current and
            // revision.
            $columns = array(
                'entity_type',
                'entity_id',
                'revision_id',
                'bundle',
                'language',
                'delta',
                $value_column,
            );
            $values = array(
                'node',
                $record->nid,
                $record->vid,
                $record->type,
                LANGUAGE_NONE,
                $deltas[$field_name]++,
                $record->tid,
            );
            // Insert rows into the revision table.
            db_insert($revision_name)->fields($columns)
                ->values($values)
                ->execute();
            // is_current column is a node ID if this revision is also current.
            if ($record->is_current) {
                db_insert($table_name)->fields($columns)
                    ->values($values)
                    ->execute();
                // Only insert a record in the taxonomy index if the node is published.
                if ($record->status) {
                    // Update the {taxonomy_index} table.
                    db_insert('taxonomy_index')->fields(array(
                        'nid',
                        'tid',
                        'sticky',
                        'created',
                    ))
                        ->values(array(
                        $record->nid,
                        $record->tid,
                        $record->sticky,
                        $record->created,
                    ))
                        ->execute();
                }
            }
        }
        // Store the set of inserted values and the current revision's deltas in the
        // sandbox.
        $sandbox['cursor'] = array(
            'values' => $values,
            'deltas' => $deltas,
        );
        $sandbox['last'] += $batch;
    }
    if ($sandbox['count'] < $sandbox['total']) {
        $sandbox['#finished'] = FALSE;
    }
    else {
        db_drop_table('taxonomy_vocabulary_node_type');
        db_drop_table('taxonomy_term_node');
        // If there are no vocabs, we're done.
        db_drop_table('taxonomy_update_7005');
        $sandbox['#finished'] = TRUE;
        // Determine necessity of taxonomyextras field.
        $field = $field_info['taxonomyextra'];
        $revision_name = 'field_revision_' . $field['field_name'];
        $node_types = db_select($revision_name)->distinct()
            ->fields($revision_name, array(
            'bundle',
        ))
            ->execute()
            ->fetchCol();
        if (empty($node_types)) {
            // Delete the overflow field if there are no rows in the revision table.
            _update_7000_field_delete_field('taxonomyextra');
        }
        else {
            // Remove instances which are not actually used.
            $bundles = db_query('SELECT bundle FROM {field_config_instance} WHERE field_name = :field_name', array(
                ':field_name' => 'taxonomyextra',
            ))->fetchCol();
            $bundles = array_diff($bundles, $node_types);
            foreach ($bundles as $bundle) {
                _update_7000_field_delete_instance('taxonomyextra', 'node', $bundle);
            }
        }
    }
}

Buggy or inaccurate documentation? Please file an issue. Need support? Need help programming? Connect with the Drupal community.