Вход Регистрация
Файл: htmlpurifier-4.3.0/tests/HTMLPurifier/LexerTest.php
Строк: 895
<?php

class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
{

    protected 
$_has_pear false;

    public function 
__construct() {
        
parent::__construct();
        if (
$GLOBALS['HTMLPurifierTest']['PEAR'] &&
        
// PEARSax3 is not maintained and throws loads of DEPRECATED
        // errors in PHP 5.3
        
version_compare(PHP_VERSION'5.3''<')) {
            require_once 
'HTMLPurifier/Lexer/PEARSax3.php';
            
$this->_has_pear true;
        }
        if (
$GLOBALS['HTMLPurifierTest']['PH5P']) {
            require_once 
'HTMLPurifier/Lexer/PH5P.php';
        }
    }

    
// HTMLPurifier_Lexer::create() --------------------------------------------

    
function test_create() {
        
$this->config->set('Core.MaintainLineNumbers'true);
        
$lexer HTMLPurifier_Lexer::create($this->config);
        
$this->assertIsA($lexer'HTMLPurifier_Lexer_DirectLex');
    }

    function 
test_create_objectLexerImpl() {
        
$this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_DirectLex());
        
$lexer HTMLPurifier_Lexer::create($this->config);
        
$this->assertIsA($lexer'HTMLPurifier_Lexer_DirectLex');
    }

    function 
test_create_unknownLexer() {
        
$this->config->set('Core.LexerImpl''AsdfAsdf');
        
$this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
        
HTMLPurifier_Lexer::create($this->config);
    }

    function 
test_create_incompatibleLexer() {
        
$this->config->set('Core.LexerImpl''DOMLex');
        
$this->config->set('Core.MaintainLineNumbers'true);
        
$this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
        
HTMLPurifier_Lexer::create($this->config);
    }

    
// HTMLPurifier_Lexer->parseData() -----------------------------------------

    
function assertParseData($input$expect true) {
        if (
$expect === true$expect $input;
        
$lexer = new HTMLPurifier_Lexer();
        
$this->assertIdentical($expect$lexer->parseData($input));
    }

    function 
test_parseData_plainText() {
        
$this->assertParseData('asdf');
    }

    function 
test_parseData_ampersandEntity() {
        
$this->assertParseData('&amp;''&');
    }

    function 
test_parseData_quotEntity() {
        
$this->assertParseData('&quot;''"');
    }

    function 
test_parseData_aposNumericEntity() {
        
$this->assertParseData('&#039;'"'");
    }

    function 
test_parseData_aposCompactNumericEntity() {
        
$this->assertParseData('&#39;'"'");
    }

    function 
test_parseData_adjacentAmpersandEntities() {
        
$this->assertParseData('&amp;&amp;&amp;''&&&');
    }

    function 
test_parseData_trailingUnescapedAmpersand() {
        
$this->assertParseData('&amp;&''&&');
    }

    function 
test_parseData_internalUnescapedAmpersand() {
        
$this->assertParseData('Procter & Gamble');
    }

    function 
test_parseData_improperEntityFaultToleranceTest() {
        
$this->assertParseData('&#x2D;');
    }

    
// HTMLPurifier_Lexer->extractBody() ---------------------------------------

    
function assertExtractBody($text$extract true) {
        
$lexer = new HTMLPurifier_Lexer();
        
$result $lexer->extractBody($text);
        if (
$extract === true$extract $text;
        
$this->assertIdentical($extract$result);
    }

    function 
test_extractBody_noBodyTags() {
        
$this->assertExtractBody('<b>Bold</b>');
    }

    function 
test_extractBody_lowercaseBodyTags() {
        
$this->assertExtractBody('<html><body><b>Bold</b></body></html>''<b>Bold</b>');
    }

    function 
test_extractBody_uppercaseBodyTags() {
        
$this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>''<B>Bold</B>');
    }

    function 
test_extractBody_realisticUseCase() {
        
$this->assertExtractBody(
'<?xml version="1.0"
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
      <title>xyz</title>
   </head>
   <body>
      <form method="post" action="whatever1">
         <div>
            <input type="text" name="username" />
            <input type="text" name="password" />
            <input type="submit" />
         </div>
      </form>
   </body>
</html>'
,
    
'
      <form method="post" action="whatever1">
         <div>
            <input type="text" name="username" />
            <input type="text" name="password" />
            <input type="submit" />
         </div>
      </form>
   '
);
    }

    function 
test_extractBody_bodyWithAttributes() {
        
$this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>''<b>Bold</b>');
    }

    function 
test_extractBody_preserveUnclosedBody() {
        
$this->assertExtractBody('<body>asdf'); // not closed, don't accept
    
}

    function 
test_extractBody_useLastBody() {
        
$this->assertExtractBody('<body>foo</body>bar</body>''foo</body>bar');
    }

    
// HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------

    
function assertTokenization($input$expect$alt_expect = array()) {
        
$lexers = array();
        
$lexers['DirectLex']  = new HTMLPurifier_Lexer_DirectLex();
        if (
$this->_has_pear$lexers['PEARSax3']   = new HTMLPurifier_Lexer_PEARSax3();
        if (
class_exists('DOMDocument')) {
            
$lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
            
$lexers['PH5P']   = new HTMLPurifier_Lexer_PH5P();
        }
        foreach (
$lexers as $name => $lexer) {
            
$result $lexer->tokenizeHTML($input$this->config$this->context);
            if (isset(
$alt_expect[$name])) {
                if (
$alt_expect[$name] === false) continue;
                
$t_expect $alt_expect[$name];
                
$this->assertIdentical($result$alt_expect[$name], "$name: %s");
            } else {
                
$t_expect $expect;
                
$this->assertIdentical($result$expect"$name: %s");
            }
            if (
$t_expect != $result) {
                
printTokens($result);
            }
        }
    }

    function 
test_tokenizeHTML_emptyInput() {
        
$this->assertTokenization('', array());
    }

    function 
test_tokenizeHTML_plainText() {
        
$this->assertTokenization(
            
'This is regular text.',
            array(
                new 
HTMLPurifier_Token_Text('This is regular text.')
            )
        );
    }

    function 
test_tokenizeHTML_textAndTags() {
        
$this->assertTokenization(
            
'This is <b>bold</b> text',
            array(
                new 
HTMLPurifier_Token_Text('This is '),
                new 
HTMLPurifier_Token_Start('b', array()),
                new 
HTMLPurifier_Token_Text('bold'),
                new 
HTMLPurifier_Token_End('b'),
                new 
HTMLPurifier_Token_Text(' text'),
            )
        );
    }

    function 
test_tokenizeHTML_normalizeCase() {
        
$this->assertTokenization(
            
'<DIV>Totally rad dude. <b>asdf</b></div>',
            array(
                new 
HTMLPurifier_Token_Start('DIV', array()),
                new 
HTMLPurifier_Token_Text('Totally rad dude. '),
                new 
HTMLPurifier_Token_Start('b', array()),
                new 
HTMLPurifier_Token_Text('asdf'),
                new 
HTMLPurifier_Token_End('b'),
                new 
HTMLPurifier_Token_End('div'),
            )
        );
    }

    function 
test_tokenizeHTML_notWellFormed() {
        
$this->assertTokenization(
            
'<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
            array(
                new 
HTMLPurifier_Token_Start('asdf'),
                new 
HTMLPurifier_Token_End('asdf'),
                new 
HTMLPurifier_Token_Start('d'),
                new 
HTMLPurifier_Token_End('d'),
                new 
HTMLPurifier_Token_Start('poOloka'),
                new 
HTMLPurifier_Token_Start('poolasdf'),
                new 
HTMLPurifier_Token_Start('ds'),
                new 
HTMLPurifier_Token_End('asdf'),
                new 
HTMLPurifier_Token_End('ASDF'),
            ),
            array(
                
'DOMLex' => $alt = array(
                    new 
HTMLPurifier_Token_Empty('asdf'),
                    new 
HTMLPurifier_Token_Empty('d'),
                    new 
HTMLPurifier_Token_Start('pooloka'),
                    new 
HTMLPurifier_Token_Start('poolasdf'),
                    new 
HTMLPurifier_Token_Empty('ds'),
                    new 
HTMLPurifier_Token_End('poolasdf'),
                    new 
HTMLPurifier_Token_End('pooloka'),
                ),
                
'PH5P' => $alt,
            )
        );
    }

    function 
test_tokenizeHTML_whitespaceInTag() {
        
$this->assertTokenization(
            
'<a'."t".'href="foobar.php"'."n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
            array(
                new 
HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
                new 
HTMLPurifier_Token_Text('Link to '),
                new 
HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
                new 
HTMLPurifier_Token_Text('foobar'),
                new 
HTMLPurifier_Token_End('b'),
                new 
HTMLPurifier_Token_End('a'),
            )
        );
    }

    function 
test_tokenizeHTML_singleAttribute() {
        
$this->assertTokenization(
            
'<br style="&amp;" />',
            array(
                new 
HTMLPurifier_Token_Empty('br', array('style' => '&'))
            )
        );
    }

    function 
test_tokenizeHTML_emptyTag() {
        
$this->assertTokenization(
            
'<br />',
            array( new 
HTMLPurifier_Token_Empty('br') )
        );
    }

    function 
test_tokenizeHTML_comment() {
        
$this->assertTokenization(
            
'<!-- Comment -->',
            array( new 
HTMLPurifier_Token_Comment(' Comment ') )
        );
    }

    function 
test_tokenizeHTML_malformedComment() {
        
$this->assertTokenization(
            
'<!-- not so well formed --->',
            array( new 
HTMLPurifier_Token_Comment(' not so well formed -') )
        );
    }

    function 
test_tokenizeHTML_unterminatedTag() {
        
$this->assertTokenization(
            
'<a href=""',
            array( new 
HTMLPurifier_Token_Text('<a href=""') ),
            array(
                
// I like our behavior better, but it's non-standard
                
'DOMLex'   => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
                
'PEARSax3' => array( new HTMLPurifier_Token_Start('a', array('href'=>'')) ),
                
'PH5P' => false// total barfing, grabs scaffolding too
            
)
        );
    }

    function 
test_tokenizeHTML_specialEntities() {
        
$this->assertTokenization(
            
'&lt;b&gt;',
            array(
                new 
HTMLPurifier_Token_Text('<b>')
            ),
            array(
                
// some parsers will separate entities out
                
'PEARSax3' => $split = array(
                    new 
HTMLPurifier_Token_Text('<'),
                    new 
HTMLPurifier_Token_Text('b'),
                    new 
HTMLPurifier_Token_Text('>'),
                ),
                
'PH5P' => $split,
            )
        );
    }

    function 
test_tokenizeHTML_earlyQuote() {
        
$this->assertTokenization(
            
'<a "=>',
            array( new 
HTMLPurifier_Token_Empty('a') ),
            array(
                
// we barf on this input
                
'DirectLex' => $tokens = array(
                    new 
HTMLPurifier_Token_Start('a', array('"' => ''))
                ),
                
'PEARSax3' => $tokens,
                
'PH5P' => false// behavior varies; handle this personally
            
)
        );
    }

    function 
test_tokenizeHTML_earlyQuote_PH5P() {
        if (!
class_exists('DOMDocument')) return;
        
$lexer = new HTMLPurifier_Lexer_PH5P();
        
$result $lexer->tokenizeHTML('<a "=>'$this->config$this->context);
        if (
$this->context->get('PH5PError'true)) {
            
$this->assertIdentical(array(
                new 
HTMLPurifier_Token_Start('a', array('"' => ''))
            ), 
$result);
        } else {
            
$this->assertIdentical(array(
                new 
HTMLPurifier_Token_Empty('a', array('"' => ''))
            ), 
$result);
        }
    }

    function 
test_tokenizeHTML_unescapedQuote() {
        
$this->assertTokenization(
            
'"',
            array( new 
HTMLPurifier_Token_Text('"') )
        );
    }

    function 
test_tokenizeHTML_escapedQuote() {
        
$this->assertTokenization(
            
'&quot;',
            array( new 
HTMLPurifier_Token_Text('"') ),
            array(
                
'PEARSax3' => false// PEAR barfs on this
            
)
        );
    }

    function 
test_tokenizeHTML_cdata() {
        
$this->assertTokenization(
            
'<![CDATA[You <b>can&#39;t</b> get me!]]>',
            array( new 
HTMLPurifier_Token_Text('You <b>can&#39;t</b> get me!') ),
            array(
                
// PEAR splits up all of the CDATA
                
'PEARSax3' => $split = array(
                    new 
HTMLPurifier_Token_Text('You '),
                    new 
HTMLPurifier_Token_Text('<'),
                    new 
HTMLPurifier_Token_Text('b'),
                    new 
HTMLPurifier_Token_Text('>'),
                    new 
HTMLPurifier_Token_Text('can'),
                    new 
HTMLPurifier_Token_Text('&'),
                    new 
HTMLPurifier_Token_Text('#39;t'),
                    new 
HTMLPurifier_Token_Text('<'),
                    new 
HTMLPurifier_Token_Text('/b'),
                    new 
HTMLPurifier_Token_Text('>'),
                    new 
HTMLPurifier_Token_Text(' get me!'),
                ),
                
'PH5P' => $split,
            )
        );
    }

    function 
test_tokenizeHTML_characterEntity() {
        
$this->assertTokenization(
            
'&theta;',
            array( new 
HTMLPurifier_Token_Text("xCExB8") )
        );
    }

    function 
test_tokenizeHTML_characterEntityInCDATA() {
        
$this->assertTokenization(
            
'<![CDATA[&rarr;]]>',
            array( new 
HTMLPurifier_Token_Text("&rarr;") ),
            array(
                
'PEARSax3' => $split = array(
                    new 
HTMLPurifier_Token_Text('&'),
                    new 
HTMLPurifier_Token_Text('rarr;'),
                ),
                
'PH5P' => $split,
            )
        );
    }

    function 
test_tokenizeHTML_entityInAttribute() {
        
$this->assertTokenization(
            
'<a href="index.php?title=foo&amp;id=bar">Link</a>',
            array(
                new 
HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
                new 
HTMLPurifier_Token_Text('Link'),
                new 
HTMLPurifier_Token_End('a'),
            )
        );
    }

    function 
test_tokenizeHTML_preserveUTF8() {
        
$this->assertTokenization(
            
"xCExB8",
            array( new 
HTMLPurifier_Token_Text("xCExB8") )
        );
    }

    function 
test_tokenizeHTML_specialEntityInAttribute() {
        
$this->assertTokenization(
            
'<br test="x &lt; 6" />',
            array( new 
HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
        );
    }

    function 
test_tokenizeHTML_emoticonProtection() {
        
$this->assertTokenization(
            
'<b>Whoa! <3 That's not good >.></b>',
            array(
                new HTMLPurifier_Token_Start('b'),
                new HTMLPurifier_Token_Text('
Whoa'),
                new HTMLPurifier_Token_Text('
<'),
                new HTMLPurifier_Token_Text('
3 That's not good >.>'),
                new 
HTMLPurifier_Token_End('b')
            ),
            array(
                
// text is absorbed together
                
'DOMLex' => array(
                    new 
HTMLPurifier_Token_Start('b'),
                    new 
HTMLPurifier_Token_Text('Whoa! <3 That's not good >.>'),
                    new HTMLPurifier_Token_End('b'),
                ),
                '
PEARSax3' => false, // totally mangled
                '
PH5P' => array( // interesting grouping
                    new HTMLPurifier_Token_Start('b'),
                    new HTMLPurifier_Token_Text('
Whoa'),
                    new HTMLPurifier_Token_Text('
<'),
                    new HTMLPurifier_Token_Text('
3 That's not good >.>'),
                    new 
HTMLPurifier_Token_End('b'),
                ),
            )
        );
    }

    function 
test_tokenizeHTML_commentWithFunkyChars() {
        
$this->assertTokenization(
            
'<!-- This >< comment --><br />',
            array(
                new 
HTMLPurifier_Token_Comment(' This >< comment '),
                new 
HTMLPurifier_Token_Empty('br'),
            ),
            array(
                
'PEARSax3' => false,
            )
        );
    }

    function 
test_tokenizeHTML_unterminatedComment() {
        
$this->assertTokenization(
            
'<!-- This >< comment',
            array( new 
HTMLPurifier_Token_Comment(' This >< comment') ),
            array(
                
'DOMLex'   => false,
                
'PEARSax3' => false,
                
'PH5P'     => false,
            )
        );
    }

    function 
test_tokenizeHTML_scriptCDATAContents() {
        
$this->config->set('HTML.Trusted'true);
        
$this->assertTokenization(
            
'Foo: <script>alert("<foo>");</script>',
            array(
                new 
HTMLPurifier_Token_Text('Foo: '),
                new 
HTMLPurifier_Token_Start('script'),
                new 
HTMLPurifier_Token_Text('alert("<foo>");'),
                new 
HTMLPurifier_Token_End('script'),
            ),
            array(
                
'PEARSax3' => false,
                
// PH5P, for some reason, bubbles the script to <head>
                
'PH5P' => false,
            )
        );
    }

    function 
test_tokenizeHTML_entitiesInComment() {
        
$this->assertTokenization(
            
'<!-- This comment < &lt; & -->',
            array( new 
HTMLPurifier_Token_Comment(' This comment < &lt; & ') ),
            array(
                
'PEARSax3' => false
            
)
        );
    }

    function 
test_tokenizeHTML_attributeWithSpecialCharacters() {
        
$this->assertTokenization(
            
'<a href="><>">',
            array( new 
HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
            array(
                
'DirectLex' => array(
                    new 
HTMLPurifier_Token_Start('a', array('href' => '')),
                    new 
HTMLPurifier_Token_Text('<'),
                    new 
HTMLPurifier_Token_Text('">'),
                ),
                
'PEARSax3' => false,
            )
        );
    }

    function 
test_tokenizeHTML_emptyTagWithSlashInAttribute() {
        
$this->assertTokenization(
            
'<param name="src" value="http://example.com/video.wmv" />',
            array( new 
HTMLPurifier_Token_Empty('param', array('name' => 'src''value' => 'http://example.com/video.wmv')) )
        );
    }

    function 
test_tokenizeHTML_style() {
        
$extra = array(
                
// PH5P doesn't seem to like style tags
                
'PH5P' => false,
                
// DirectLex defers to RemoveForeignElements for textification
                
'DirectLex' => array(
                    new 
HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
                    new 
HTMLPurifier_Token_Comment("ndiv {}n"),
                    new 
HTMLPurifier_Token_End('style'),
                ),
            );
        if (!
defined('LIBXML_VERSION')) {
            
// LIBXML_VERSION is missing in early versions of PHP
            // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
            // this translates to 5.0.x. In such cases, punt the test entirely.
            
return;
        } elseif (
LIBXML_VERSION 20628) {
            
// libxml's behavior is wrong prior to this version, so make
            // appropriate accomodations
            
$extra['DOMLex'] = $extra['DirectLex'];
        }
        
$this->assertTokenization(
'<style type="text/css"><!--
div {}
--></style>'
,
            array(
                new 
HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
                new 
HTMLPurifier_Token_Text("ndiv {}n"),
                new 
HTMLPurifier_Token_End('style'),
            ),
            
$extra
        
);
    }

    function 
test_tokenizeHTML_tagWithAtSignAndExtraGt() {
        
$alt_expect = array(
            
// Technically this is invalid, but it won't be a
            // problem with invalid element removal; also, this
            // mimics Mozilla's parsing of the tag.
            
new HTMLPurifier_Token_Start('a@'),
            new 
HTMLPurifier_Token_Text('>'),
        );
        
$this->assertTokenization(
            
'<a@>>',
            array(
                new 
HTMLPurifier_Token_Start('a'),
                new 
HTMLPurifier_Token_Text('>'),
                new 
HTMLPurifier_Token_End('a'),
            ),
            array(
                
'DirectLex' => $alt_expect,
                
'PEARSax3' => $alt_expect,
            )
        );
    }

    function 
test_tokenizeHTML_emoticonHeart() {
        
$this->assertTokenization(
            
'<br /><3<br />',
            array(
                new 
HTMLPurifier_Token_Empty('br'),
                new 
HTMLPurifier_Token_Text('<'),
                new 
HTMLPurifier_Token_Text('3'),
                new 
HTMLPurifier_Token_Empty('br'),
            ),
            array(
                
'DOMLex' => array(
                    new 
HTMLPurifier_Token_Empty('br'),
                    new 
HTMLPurifier_Token_Text('<3'),
                    new 
HTMLPurifier_Token_Empty('br'),
                ),
                
'PEARSax3' => array(
                    
// bah too lazy to fix this
                    
new HTMLPurifier_Token_Empty('br'),
                    new 
HTMLPurifier_Token_Empty('3<br'),
                ),
            )
        );
    }

    function 
test_tokenizeHTML_emoticonShiftyEyes() {
        
$this->assertTokenization(
            
'<b><<</b>',
            array(
                new 
HTMLPurifier_Token_Start('b'),
                new 
HTMLPurifier_Token_Text('<'),
                new 
HTMLPurifier_Token_Text('<'),
                new 
HTMLPurifier_Token_End('b'),
            ),
            array(
                
'DOMLex' => array(
                    new 
HTMLPurifier_Token_Start('b'),
                    new 
HTMLPurifier_Token_Text('<<'),
                    new 
HTMLPurifier_Token_End('b'),
                ),
                
'PEARSax3' => array(
                    
// also too lazy to fix
                    
new HTMLPurifier_Token_Start('b'),
                    new 
HTMLPurifier_Token_Empty('<<'),
                    new 
HTMLPurifier_Token_Text('b>'),
                ),
            )
        );
    }

    function 
test_tokenizeHTML_eon1996() {
        
$this->assertTokenization(
            
'< <b>test</b>',
            array(
                new 
HTMLPurifier_Token_Text('<'),
                new 
HTMLPurifier_Token_Text(' '),
                new 
HTMLPurifier_Token_Start('b'),
                new 
HTMLPurifier_Token_Text('test'),
                new 
HTMLPurifier_Token_End('b'),
            ),
            array(
                
'DOMLex' => array(
                    new 
HTMLPurifier_Token_Text('< '),
                    new 
HTMLPurifier_Token_Start('b'),
                    new 
HTMLPurifier_Token_Text('test'),
                    new 
HTMLPurifier_Token_End('b'),
                ),
                
'PEARSax3' => array(
                    
// totally doing the wrong thing here
                    
new HTMLPurifier_Token_Text(' '),
                    new 
HTMLPurifier_Token_Start('b'),
                    new 
HTMLPurifier_Token_Text('test'),
                    new 
HTMLPurifier_Token_End('b'),
                ),
            )
        );
    }

    function 
test_tokenizeHTML_bodyInCDATA() {
        
$alt_tokens = array(
            new 
HTMLPurifier_Token_Text('<'),
            new 
HTMLPurifier_Token_Text('body'),
            new 
HTMLPurifier_Token_Text('>'),
            new 
HTMLPurifier_Token_Text('Foo'),
            new 
HTMLPurifier_Token_Text('<'),
            new 
HTMLPurifier_Token_Text('/body'),
            new 
HTMLPurifier_Token_Text('>'),
        );
        
$this->assertTokenization(
            
'<![CDATA[<body>Foo</body>]]>',
            array(
                new 
HTMLPurifier_Token_Text('<body>Foo</body>'),
            ),
            array(
                
'PH5P' => $alt_tokens,
                
'PEARSax3' => $alt_tokens,
            )
        );
    }

    function 
test_tokenizeHTML_() {
        
$this->assertTokenization(
            
'<a><img /></a>',
            array(
                new 
HTMLPurifier_Token_Start('a'),
                new 
HTMLPurifier_Token_Empty('img'),
                new 
HTMLPurifier_Token_End('a'),
            )
        );
    }

    function 
test_tokenizeHTML_ignoreIECondComment() {
        
$this->assertTokenization(
            
'<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',
            array()
        );
    }

    function 
test_tokenizeHTML_removeProcessingInstruction() {
        
$this->config->set('Core.RemoveProcessingInstructions'true);
        
$this->assertTokenization(
            
'<?xml blah blah ?>',
            array()
        );
    }

   function 
test_tokenizeHTML_removeNewline() {
        
$this->config->set('Core.NormalizeNewlines'true);
        
$this->assertTokenization(
            
"plainrtextrn",
            array(
                new 
HTMLPurifier_Token_Text("plainntextn")
            )
        );
   }

   function 
test_tokenizeHTML_noRemoveNewline() {
        
$this->config->set('Core.NormalizeNewlines'false);
        
$this->assertTokenization(
            
"plainrtextrn",
            array(
                new 
HTMLPurifier_Token_Text("plainrtextrn")
            )
        );
     }

    function 
test_tokenizeHTML_conditionalCommentUngreedy() {
        
$this->assertTokenization(
            
'<!--[if gte mso 9]>a<![endif]-->b<!--[if gte mso 9]>c<![endif]-->',
            array(
                new 
HTMLPurifier_Token_Text("b")
            )
        );
    }

    function 
test_tokenizeHTML_imgTag() {
        
$start = array(
                        new 
HTMLPurifier_Token_Start('img',
                            array(
                                
'src' => 'img_11775.jpg',
                                
'alt' => '[Img #11775]',
                                
'id' => 'EMBEDDED_IMG_11775',
                            )
                        )
                    );
        
$this->assertTokenization(
            
'<img src="img_11775.jpg" alt="[Img #11775]" id="EMBEDDED_IMG_11775" >',
            array(
                new 
HTMLPurifier_Token_Empty('img',
                    array(
                        
'src' => 'img_11775.jpg',
                        
'alt' => '[Img #11775]',
                        
'id' => 'EMBEDDED_IMG_11775',
                    )
                )
            ),
            array(
                
'DirectLex' => $start,
                
'PEARSax3' => $start,
                )
        );
    }


    
/*

    function test_tokenizeHTML_() {
        $this->assertTokenization(
            ,
            array(

            )
        );
    }
    */

}

// vim: et sw=4 sts=4
Онлайн: 1
Реклама