function Xss::attributes

Same name in other branches
  1. 9 core/lib/Drupal/Component/Utility/Xss.php \Drupal\Component\Utility\Xss::attributes()
  2. 8.9.x core/lib/Drupal/Component/Utility/Xss.php \Drupal\Component\Utility\Xss::attributes()
  3. 10 core/lib/Drupal/Component/Utility/Xss.php \Drupal\Component\Utility\Xss::attributes()

Processes a string of HTML attributes.

Parameters

string $attributes: The html attribute to process.

Return value

string Cleaned up version of the HTML attributes.

File

core/lib/Drupal/Component/Utility/Xss.php, line 202

Class

Xss
Provides helper to filter for cross-site scripting.

Namespace

Drupal\Component\Utility

Code

protected static function attributes($attributes) {
    $attributes_array = [];
    $mode = 0;
    $attribute_name = '';
    $skip = FALSE;
    $skip_protocol_filtering = FALSE;
    while (strlen($attributes) != 0) {
        // Was the last operation successful?
        $working = 0;
        switch ($mode) {
            case 0:
                // Attribute name, href for instance.
                if (preg_match('/^([-a-zA-Z][-a-zA-Z0-9]*)/', $attributes, $match)) {
                    $attribute_name = strtolower($match[1]);
                    $skip = $attribute_name == 'style' || str_starts_with($attribute_name, 'on') || str_starts_with($attribute_name, '-') || strlen($attribute_name) > 96;
                    // Values for attributes of type URI should be filtered for
                    // potentially malicious protocols (for example, an href-attribute
                    // starting with "javascript:"). However, for some non-URI
                    // attributes performing this filtering causes valid and safe data
                    // to be mangled. We prevent this by skipping protocol filtering on
                    // such attributes.
                    // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol()
                    // @see https://www.w3.org/TR/html4/index/attributes.html
                    $skip_protocol_filtering = str_starts_with($attribute_name, 'data-') || in_array($attribute_name, [
                        'title',
                        'alt',
                        'rel',
                        'property',
                        'class',
                        'datetime',
                    ]);
                    $working = $mode = 1;
                    $attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes);
                }
                break;
            case 1:
                // Equals sign or valueless ("selected").
                if (preg_match('/^\\s*=\\s*/', $attributes)) {
                    $working = 1;
                    $mode = 2;
                    $attributes = preg_replace('/^\\s*=\\s*/', '', $attributes);
                    break;
                }
                if (preg_match('/^\\s+/', $attributes)) {
                    $working = 1;
                    $mode = 0;
                    if (!$skip) {
                        $attributes_array[] = $attribute_name;
                    }
                    $attributes = preg_replace('/^\\s+/', '', $attributes);
                }
                break;
            case 2:
                // Once we've finished processing the attribute value continue to look
                // for attributes.
                $mode = 0;
                $working = 1;
                // Attribute value, a URL after href= for instance.
                if (preg_match('/^"([^"]*)"(\\s+|$)/', $attributes, $match)) {
                    $value = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
                    if (!$skip) {
                        $attributes_array[] = "{$attribute_name}=\"{$value}\"";
                    }
                    $attributes = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attributes);
                    break;
                }
                if (preg_match("/^'([^']*)'(\\s+|\$)/", $attributes, $match)) {
                    $value = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
                    if (!$skip) {
                        $attributes_array[] = "{$attribute_name}='{$value}'";
                    }
                    $attributes = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attributes);
                    break;
                }
                if (preg_match("%^([^\\s\"']+)(\\s+|\$)%", $attributes, $match)) {
                    $value = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
                    if (!$skip) {
                        $attributes_array[] = "{$attribute_name}=\"{$value}\"";
                    }
                    $attributes = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attributes);
                }
                break;
        }
        if ($working == 0) {
            // Not well-formed; remove and try again.
            $attributes = preg_replace('/
          ^
          (
          "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
          |               # or
          \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
          |               # or
          \\S              # - a non-whitespace character
          )*              # any number of the above three
          \\s*             # any number of whitespaces
          /x', '', $attributes);
            $mode = 0;
        }
    }
    // The attribute list ends with a valueless attribute like "selected".
    if ($mode == 1 && !$skip) {
        $attributes_array[] = $attribute_name;
    }
    return $attributes_array;
}

Buggy or inaccurate documentation? Please file an issue. Need support? Need help programming? Connect with the Drupal community.