Просмотр архива SilverStripe


<?php
/**
 * Represents a large text field that contains HTML content.
 * This behaves similarly to {@link Text}, but the template processor won't escape any HTML content within it.
 *
 * @see HTMLVarchar
 * @see Text
 * @see Varchar
 *
 * @package framework
 * @subpackage model
 */
class HTMLText extends Text {
    private static $escape_type = 'xml';

    private static $casting = array(
        "AbsoluteLinks" => "HTMLText",
        "BigSummary" => "HTMLText",
        "ContextSummary" => "HTMLText",
        "FirstParagraph" => "HTMLText",
        "FirstSentence" => "HTMLText",
        "LimitCharacters" => "HTMLText",
        "LimitSentences" => "HTMLText",
        "Lower" => "HTMLText",
        "LowerCase" => "HTMLText",
        "Summary" => "HTMLText",
        "Upper" => "HTMLText",
        "UpperCase" => "HTMLText",
        'EscapeXML' => 'HTMLText',
        'LimitWordCount' => 'HTMLText',
        'LimitWordCountXML' => 'HTMLText',
        'NoHTML' => 'Text',
    );

    protected $processShortcodes = true;

    protected $whitelist = false;

    public function __construct($name = null, $options = array()) {
        if(is_string($options)) {
            $options = array('whitelist' => $options);
        }

        return parent::__construct($name, $options);
    }

    /**
     * @param array $options
     *
     * Options accepted in addition to those provided by Text:
     *
     *   - shortcodes: If true, shortcodes will be turned into the appropriate HTML.
     *                 If false, shortcodes will not be processed.
     *
     *   - whitelist: If provided, a comma-separated list of elements that will be allowed to be stored
     *                (be careful on relying on this for XSS protection - some seemingly-safe elements allow
     *                attributes that can be exploited, for instance <img onload="exploiting_code();" src="..." />)
     *                Text nodes outside of HTML tags are filtered out by default, but may be included by adding
     *                the text() directive. E.g. 'link,meta,text()' will allow only <link /> <meta /> and text at
     *                the root level.
     */
    public function setOptions(array $options = array()) {
        parent::setOptions($options);

        if(array_key_exists("shortcodes", $options)) {
            $this->processShortcodes = !!$options["shortcodes"];
        }

        if(array_key_exists("whitelist", $options)) {
            if(is_array($options['whitelist'])) {
                $this->whitelist = $options['whitelist'];
            }
            else {
                $this->whitelist = preg_split('/,s*/', $options['whitelist']);
            }
        }
    }

    /**
     * Create a summary of the content. This will be some section of the first paragraph, limited by
     * $maxWords. All internal tags are stripped out - the return value is a string
     *
     * This is sort of the HTML aware equivilent to Text#Summary, although the logic for summarising is not exactly
     * the same
     *
     * @param int $maxWords Maximum number of words to return - may return less, but never more. Pass -1 for no limit
     * @param int $flex Number of words to search through when looking for a nice cut point
     * @param string $add What to add to the end of the summary if we cut at a less-than-ideal cut point
     * @return string A nice(ish) summary with no html tags (but possibly still some html entities)
     *
     * @see framework/core/model/fieldtypes/Text#Summary($maxWords)
     */
    public function Summary($maxWords = 50, $flex = 15, $add = '...') {
        $str = false;

        /* First we need the text of the first paragraph, without tags. Try using SimpleXML first */
        if (class_exists('SimpleXMLElement')) {
            $doc = new DOMDocument();

            // Catch warnings thrown by loadHTML and turn them into a failure boolean rather than a SilverStripe error
            set_error_handler(create_function('$no, $str', 'throw new Exception("HTML Parse Error: ".$str);'), E_ALL);
            //  Nonbreaking spaces get converted into weird characters, so strip them
            $value = str_replace('&nbsp;', ' ', $this->value);
            try {
                $res = $doc->loadHTML('<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>' . $value);
            }
            catch (Exception $e) { $res = false; }
            restore_error_handler();

            if ($res) {
                $xml = simplexml_import_dom($doc);
                $res = $xml->xpath('//p');
                if (!empty($res)) $str = strip_tags($res[0]->asXML());
            }
        }

        /* If that failed, most likely the passed HTML is broken. use a simple regex + a custom more brutal strip_tags.
         * We don't use strip_tags because that does very badly on broken HTML */
        if (!$str) {
            /* See if we can pull a paragraph out*/

            // Strip out any images in case there's one at the beginning. Not doing this will return a blank paragraph
            $str = preg_replace('{^s*(<.+?>)*<img[^>]*>}', '', $this->value);
            if (preg_match('{<p(s[^<>]*)?>(.*[A-Za-z]+.*)</p>}', $str, $matches)) $str = $matches[2];

            /* If _that_ failed, just use the whole text */
            if (!$str) $str = $this->value;

            /* Now pull out all the html-alike stuff */
            /* Take out anything that is obviously a tag */
            $str = preg_replace('{</?[a-zA-Z]+[^<>]*>}', '', $str);
            /* Strip out any left over looking bits. Textual < or > should already be encoded to &lt; or &gt; */
            $str = preg_replace('{</|<|>}', '', $str);
        }

        /* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for
         * whitespace normalization) */
        $words = preg_split('/s+/', $str);
        if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words);

        /* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't
         * very good at it) */
        for ($i = $maxWords; $i >= $maxWords - $flex && $i >= 0; $i--) {
            if (preg_match('/.$/', $words[$i]) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No).$/i', $words[$i])) {
                return implode(' ', array_slice($words, 0, $i+1));
            }
        }

        // If we didn't find a sentence ending quickly enough, just cut at the maxWords point and add '...' to the end
        return implode(' ', array_slice($words, 0, $maxWords)) . $add;
    }

    /**
     * Returns the first sentence from the first paragraph. If it can't figure out what the first paragraph is (or
     * there isn't one), it returns the same as Summary()
     *
     * This is the HTML aware equivilent to Text#FirstSentence
     *
     * @see framework/core/model/fieldtypes/Text#FirstSentence()
     */
    public function FirstSentence() {
        /* Use summary's html processing logic to get the first paragraph */
        $paragraph = $this->Summary(-1);

        /* Then look for the first sentence ending. We could probably use a nice regex, but for now this will do */
        $words = preg_split('/s+/', $paragraph);
        foreach ($words as $i => $word) {
            if (preg_match('/(!|?|.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No).$/i', $word)) {
                return implode(' ', array_slice($words, 0, $i+1));
            }
        }

        /* If we didn't find a sentence ending, use the summary. We re-call rather than using paragraph so that
         * Summary will limit the result this time */
        return $this->Summary();
    }

    /**
     * Return the value of the field with relative links converted to absolute urls (with placeholders parsed).
     * @return string
     */
    public function AbsoluteLinks() {
        return HTTP::absoluteURLs($this->forTemplate());
    }

    public function forTemplate() {
        if ($this->processShortcodes) {
            return ShortcodeParser::get_active()->parse($this->value);
        }
        else {
            return $this->value;
        }
    }

    public function prepValueForDB($value) {
        return parent::prepValueForDB($this->whitelistContent($value));
    }

    /**
     * Filter the given $value string through the whitelist filter
     *
     * @param string $value Input html content
     * @return string Value with all non-whitelisted content stripped (if applicable)
     */
    public function whitelistContent($value) {
        if($this->whitelist) {
            $dom = Injector::inst()->create('HTMLValue', $value);

            $query = array();
            $textFilter = ' | //body/text()';
            foreach ($this->whitelist as $tag) {
                if($tag === 'text()') {
                    $textFilter = ''; // Disable text filter if allowed
                } else {
                    $query[] = 'not(self::'.$tag.')';
                }
            }

            foreach($dom->query('//body//*['.implode(' and ', $query).']'.$textFilter) as $el) {
                if ($el->parentNode) $el->parentNode->removeChild($el);
            }

            $value = $dom->getContent();
        }
        return $value;
    }

    /**
     * Returns true if the field has meaningful content.
     * Excludes null content like <h1></h1>, <p></p> ,etc
     *
     * @return boolean
     */
    public function exists() {
        // If it's blank, it's blank
        if(!parent::exists()) {
            return false;
        }

        // If it's got a content tag
        if(preg_match('/<(img|embed|object|iframe|meta|source|link)[^>]*>/i', $this->value)) {
            return true;
        }

        // If it's just one or two tags on its own (and not the above) it's empty.
        // This might be <p></p> or <h1></h1> or whatever.
        if(preg_match('/^[\s]*(<[^>]+>[\s]*){1,2}$/', $this->value)) {
            return false;
        }

        // Otherwise its content is genuine content
        return true;
    }

    public function scaffoldFormField($title = null, $params = null) {
        return new HtmlEditorField($this->name, $title);
    }

    public function scaffoldSearchField($title = null, $params = null) {
        return new TextField($this->name, $title);
    }

}