Вход Регистрация
Файл: error-kitty/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
Строк: 651
<?php
module
.exports Tokenizer;

var 
decodeCodePoint = require("entities/lib/decode_codepoint.js"),
    
entityMap = require("entities/maps/entities.json"),
    
legacyMap = require("entities/maps/legacy.json"),
    
xmlMap    = require("entities/maps/xml.json"),

    
0,

    
TEXT                      i++,
    
BEFORE_TAG_NAME           i++, //after <
    
IN_TAG_NAME               i++,
    
IN_SELF_CLOSING_TAG       i++,
    
BEFORE_CLOSING_TAG_NAME   i++,
    
IN_CLOSING_TAG_NAME       i++,
    
AFTER_CLOSING_TAG_NAME    i++,

    
//attributes
    
BEFORE_ATTRIBUTE_NAME     i++,
    
IN_ATTRIBUTE_NAME         i++,
    
AFTER_ATTRIBUTE_NAME      i++,
    
BEFORE_ATTRIBUTE_VALUE    i++,
    
IN_ATTRIBUTE_VALUE_DQ     i++, // "
    
IN_ATTRIBUTE_VALUE_SQ     i++, // '
    
IN_ATTRIBUTE_VALUE_NQ     i++,

    
//declarations
    
BEFORE_DECLARATION        i++, // !
    
IN_DECLARATION            i++,

    
//processing instructions
    
IN_PROCESSING_INSTRUCTION i++, // ?

    //comments
    
BEFORE_COMMENT            i++,
    
IN_COMMENT                i++,
    
AFTER_COMMENT_1           i++,
    
AFTER_COMMENT_2           i++,

    
//cdata
    
BEFORE_CDATA_1            i++, // [
    
BEFORE_CDATA_2            i++, // C
    
BEFORE_CDATA_3            i++, // D
    
BEFORE_CDATA_4            i++, // A
    
BEFORE_CDATA_5            i++, // T
    
BEFORE_CDATA_6            i++, // A
    
IN_CDATA                  i++, // [
    
AFTER_CDATA_1             i++, // ]
    
AFTER_CDATA_2             i++, // ]

    //special tags
    
BEFORE_SPECIAL            i++, //S
    
BEFORE_SPECIAL_END        i++,   //S

    
BEFORE_SCRIPT_1           i++, //C
    
BEFORE_SCRIPT_2           i++, //R
    
BEFORE_SCRIPT_3           i++, //I
    
BEFORE_SCRIPT_4           i++, //P
    
BEFORE_SCRIPT_5           i++, //T
    
AFTER_SCRIPT_1            i++, //C
    
AFTER_SCRIPT_2            i++, //R
    
AFTER_SCRIPT_3            i++, //I
    
AFTER_SCRIPT_4            i++, //P
    
AFTER_SCRIPT_5            i++, //T

    
BEFORE_STYLE_1            i++, //T
    
BEFORE_STYLE_2            i++, //Y
    
BEFORE_STYLE_3            i++, //L
    
BEFORE_STYLE_4            i++, //E
    
AFTER_STYLE_1             i++, //T
    
AFTER_STYLE_2             i++, //Y
    
AFTER_STYLE_3             i++, //L
    
AFTER_STYLE_4             i++, //E

    
BEFORE_ENTITY             i++, //&
    
BEFORE_NUMERIC_ENTITY     i++, //#
    
IN_NAMED_ENTITY           i++,
    
IN_NUMERIC_ENTITY         i++,
    
IN_HEX_ENTITY             i++, //X

    
0,

    
SPECIAL_NONE              j++,
    
SPECIAL_SCRIPT            j++,
    
SPECIAL_STYLE             j++;

function 
whitespace(c){
    return 
=== " " || === "n" || === "t" || === "f" || === "r";
}

function 
characterState(charSUCCESS){
    return function(
c){
        if(
=== charthis._state SUCCESS;
    };
}

function 
ifElseState(upperSUCCESSFAILURE){
    var 
lower upper.toLowerCase();

    if(
upper === lower){
        return function(
c){
            if(
=== lower){
                
this._state SUCCESS;
            } else {
                
this._state FAILURE;
                
this._index--;
            }
        };
    } else {
        return function(
c){
            if(
=== lower || === upper){
                
this._state SUCCESS;
            } else {
                
this._state FAILURE;
                
this._index--;
            }
        };
    }
}

function 
consumeSpecialNameChar(upperNEXT_STATE){
    var 
lower upper.toLowerCase();

    return function(
c){
        if(
=== lower || === upper){
            
this._state NEXT_STATE;
        } else {
            
this._state IN_TAG_NAME;
            
this._index--; //consume the token again
        
}
    };
}

function 
Tokenizer(optionscbs){
    
this._state TEXT;
    
this._buffer "";
    
this._sectionStart 0;
    
this._index 0;
    
this._baseState TEXT;
    
this._special SPECIAL_NONE;
    
this._cbs cbs;
    
this._running true;
    
this._ended false;
    
this._xmlMode = !!(options && options.xmlMode);
    
this._decodeEntities = !!(options && options.decodeEntities);
}

Tokenizer.prototype._stateText = function(c){
    if(
=== "<"){
        if(
this._index this._sectionStart){
            
this._cbs.ontext(this._getSection());
        }
        
this._state BEFORE_TAG_NAME;
        
this._sectionStart this._index;
    } else if(
this._decodeEntities && this._special === SPECIAL_NONE && === "&"){
        if(
this._index this._sectionStart){
            
this._cbs.ontext(this._getSection());
        }
        
this._baseState TEXT;
        
this._state BEFORE_ENTITY;
        
this._sectionStart this._index;
    }
};

Tokenizer.prototype._stateBeforeTagName = function(c){
    if(
=== "/"){
        
this._state BEFORE_CLOSING_TAG_NAME;
    } else if(
=== ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
        
this._state TEXT;
    } else if(
=== "!"){
        
this._state BEFORE_DECLARATION;
        
this._sectionStart this._index 1;
    } else if(
=== "?"){
        
this._state IN_PROCESSING_INSTRUCTION;
        
this._sectionStart this._index 1;
    } else if(
=== "<"){
        
this._cbs.ontext(this._getSection());
        
this._sectionStart this._index;
    } else {
        
this._state = (!this._xmlMode && (=== "s" || === "S")) ?
                        
BEFORE_SPECIAL IN_TAG_NAME;
        
this._sectionStart this._index;
    }
};

Tokenizer.prototype._stateInTagName = function(c){
    if(
=== "/" || === ">" || whitespace(c)){
        
this._emitToken("onopentagname");
        
this._state BEFORE_ATTRIBUTE_NAME;
        
this._index--;
    }
};

Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
    if(
whitespace(c));
    else if(
=== ">"){
        
this._state TEXT;
    } else if(
this._special !== SPECIAL_NONE){
        if(
=== "s" || === "S"){
            
this._state BEFORE_SPECIAL_END;
        } else {
            
this._state TEXT;
            
this._index--;
        }
    } else {
        
this._state IN_CLOSING_TAG_NAME;
        
this._sectionStart this._index;
    }
};

Tokenizer.prototype._stateInCloseingTagName = function(c){
    if(
=== ">" || whitespace(c)){
        
this._emitToken("onclosetag");
        
this._state AFTER_CLOSING_TAG_NAME;
        
this._index--;
    }
};

Tokenizer.prototype._stateAfterCloseingTagName = function(c){
    
//skip everything until ">"
    
if(=== ">"){
        
this._state TEXT;
        
this._sectionStart this._index 1;
    }
};

Tokenizer.prototype._stateBeforeAttributeName = function(c){
    if(
=== ">"){
        
this._cbs.onopentagend();
        
this._state TEXT;
        
this._sectionStart this._index 1;
    } else if(
=== "/"){
        
this._state IN_SELF_CLOSING_TAG;
    } else if(!
whitespace(c)){
        
this._state IN_ATTRIBUTE_NAME;
        
this._sectionStart this._index;
    }
};

Tokenizer.prototype._stateInSelfClosingTag = function(c){
    if(
=== ">"){
        
this._cbs.onselfclosingtag();
        
this._state TEXT;
        
this._sectionStart this._index 1;
    } else if(!
whitespace(c)){
        
this._state BEFORE_ATTRIBUTE_NAME;
        
this._index--;
    }
};

Tokenizer.prototype._stateInAttributeName = function(c){
    if(
=== "=" || === "/" || === ">" || whitespace(c)){
        
this._cbs.onattribname(this._getSection());
        
this._sectionStart = -1;
        
this._state AFTER_ATTRIBUTE_NAME;
        
this._index--;
    }
};

Tokenizer.prototype._stateAfterAttributeName = function(c){
    if(
=== "="){
        
this._state BEFORE_ATTRIBUTE_VALUE;
    } else if(
=== "/" || === ">"){
        
this._cbs.onattribend();
        
this._state BEFORE_ATTRIBUTE_NAME;
        
this._index--;
    } else if(!
whitespace(c)){
        
this._cbs.onattribend();
        
this._state IN_ATTRIBUTE_NAME;
        
this._sectionStart this._index;
    }
};

Tokenizer.prototype._stateBeforeAttributeValue = function(c){
    if(
=== """){
        this._state = IN_ATTRIBUTE_VALUE_DQ;
        this._sectionStart = this._index + 1;
    } else if(c === "'"){
        this._state = IN_ATTRIBUTE_VALUE_SQ;
        this._sectionStart = this._index + 1;
    } else if(!whitespace(c)){
        this._state = IN_ATTRIBUTE_VALUE_NQ;
        this._sectionStart = this._index;
        this._index--; //reconsume token
    }
};

Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c){
    if(c === """){
        this._emitToken("onattribdata");
        this._cbs.onattribend();
        this._state = BEFORE_ATTRIBUTE_NAME;
    } else if(this._decodeEntities && c === "&"){
        this._emitToken("onattribdata");
        this._baseState = this._state;
        this._state = BEFORE_ENTITY;
        this._sectionStart = this._index;
    }
};

Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c){
    if(c === "'"){
        this._emitToken("
onattribdata");
        this._cbs.onattribend();
        this._state = BEFORE_ATTRIBUTE_NAME;
    } else if(this._decodeEntities && c === "
&"){
        this._emitToken("
onattribdata");
        this._baseState = this._state;
        this._state = BEFORE_ENTITY;
        this._sectionStart = this._index;
    }
};

Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c){
    if(whitespace(c) || c === "
>"){
        this._emitToken("
onattribdata");
        this._cbs.onattribend();
        this._state = BEFORE_ATTRIBUTE_NAME;
        this._index--;
    } else if(this._decodeEntities && c === "
&"){
        this._emitToken("
onattribdata");
        this._baseState = this._state;
        this._state = BEFORE_ENTITY;
        this._sectionStart = this._index;
    }
};

Tokenizer.prototype._stateBeforeDeclaration = function(c){
    this._state = c === "
[" ? BEFORE_CDATA_1 :
                    c === "
-" ? BEFORE_COMMENT :
                        IN_DECLARATION;
};

Tokenizer.prototype._stateInDeclaration = function(c){
    if(c === "
>"){
        this._cbs.ondeclaration(this._getSection());
        this._state = TEXT;
        this._sectionStart = this._index + 1;
    }
};

Tokenizer.prototype._stateInProcessingInstruction = function(c){
    if(c === "
>"){
        this._cbs.onprocessinginstruction(this._getSection());
        this._state = TEXT;
        this._sectionStart = this._index + 1;
    }
};

Tokenizer.prototype._stateBeforeComment = function(c){
    if(c === "
-"){
        this._state = IN_COMMENT;
        this._sectionStart = this._index + 1;
    } else {
        this._state = IN_DECLARATION;
    }
};

Tokenizer.prototype._stateInComment = function(c){
    if(c === "
-") this._state = AFTER_COMMENT_1;
};

Tokenizer.prototype._stateAfterComment1 = function(c){
    if(c === "
-"){
        this._state = AFTER_COMMENT_2;
    } else {
        this._state = IN_COMMENT;
    }
};

Tokenizer.prototype._stateAfterComment2 = function(c){
    if(c === "
>"){
        //remove 2 trailing chars
        this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
        this._state = TEXT;
        this._sectionStart = this._index + 1;
    } else if(c !== "
-"){
        this._state = IN_COMMENT;
    }
    // else: stay in AFTER_COMMENT_2 (`--->`)
};

Tokenizer.prototype._stateBeforeCdata1 = ifElseState("
C", BEFORE_CDATA_2, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata2 = ifElseState("
D", BEFORE_CDATA_3, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata3 = ifElseState("
A", BEFORE_CDATA_4, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata4 = ifElseState("
T", BEFORE_CDATA_5, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata5 = ifElseState("
A", BEFORE_CDATA_6, IN_DECLARATION);

Tokenizer.prototype._stateBeforeCdata6 = function(c){
    if(c === "
["){
        this._state = IN_CDATA;
        this._sectionStart = this._index + 1;
    } else {
        this._state = IN_DECLARATION;
        this._index--;
    }
};

Tokenizer.prototype._stateInCdata = function(c){
    if(c === "
]") this._state = AFTER_CDATA_1;
};

Tokenizer.prototype._stateAfterCdata1 = characterState("
]", AFTER_CDATA_2);

Tokenizer.prototype._stateAfterCdata2 = function(c){
    if(c === "
>"){
        //remove 2 trailing chars
        this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
        this._state = TEXT;
        this._sectionStart = this._index + 1;
    } else if (c !== "
]") {
        this._state = IN_CDATA;
    }
    //else: stay in AFTER_CDATA_2 (`]]]>`)
};

Tokenizer.prototype._stateBeforeSpecial = function(c){
    if(c === "
c" || c === "C"){
        this._state = BEFORE_SCRIPT_1;
    } else if(c === "
t" || c === "T"){
        this._state = BEFORE_STYLE_1;
    } else {
        this._state = IN_TAG_NAME;
        this._index--; //consume the token again
    }
};

Tokenizer.prototype._stateBeforeSpecialEnd = function(c){
    if(this._special === SPECIAL_SCRIPT && (c === "
c" || c === "C")){
        this._state = AFTER_SCRIPT_1;
    } else if(this._special === SPECIAL_STYLE && (c === "
t" || c === "T")){
        this._state = AFTER_STYLE_1;
    }
    else this._state = TEXT;
};

Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("
R", BEFORE_SCRIPT_2);
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("
I", BEFORE_SCRIPT_3);
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("
P", BEFORE_SCRIPT_4);
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("
T", BEFORE_SCRIPT_5);

Tokenizer.prototype._stateBeforeScript5 = function(c){
    if(c === "
/" || c === ">" || whitespace(c)){
        this._special = SPECIAL_SCRIPT;
    }
    this._state = IN_TAG_NAME;
    this._index--; //consume the token again
};

Tokenizer.prototype._stateAfterScript1 = ifElseState("
R", AFTER_SCRIPT_2, TEXT);
Tokenizer.prototype._stateAfterScript2 = ifElseState("
I", AFTER_SCRIPT_3, TEXT);
Tokenizer.prototype._stateAfterScript3 = ifElseState("
P", AFTER_SCRIPT_4, TEXT);
Tokenizer.prototype._stateAfterScript4 = ifElseState("
T", AFTER_SCRIPT_5, TEXT);

Tokenizer.prototype._stateAfterScript5 = function(c){
    if(c === "
>" || whitespace(c)){
        this._special = SPECIAL_NONE;
        this._state = IN_CLOSING_TAG_NAME;
        this._sectionStart = this._index - 6;
        this._index--; //reconsume the token
    }
    else this._state = TEXT;
};

Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("
Y", BEFORE_STYLE_2);
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("
L", BEFORE_STYLE_3);
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("
E", BEFORE_STYLE_4);

Tokenizer.prototype._stateBeforeStyle4 = function(c){
    if(c === "
/" || c === ">" || whitespace(c)){
        this._special = SPECIAL_STYLE;
    }
    this._state = IN_TAG_NAME;
    this._index--; //consume the token again
};

Tokenizer.prototype._stateAfterStyle1 = ifElseState("
Y", AFTER_STYLE_2, TEXT);
Tokenizer.prototype._stateAfterStyle2 = ifElseState("
L", AFTER_STYLE_3, TEXT);
Tokenizer.prototype._stateAfterStyle3 = ifElseState("
E", AFTER_STYLE_4, TEXT);

Tokenizer.prototype._stateAfterStyle4 = function(c){
    if(c === "
>" || whitespace(c)){
        this._special = SPECIAL_NONE;
        this._state = IN_CLOSING_TAG_NAME;
        this._sectionStart = this._index - 5;
        this._index--; //reconsume the token
    }
    else this._state = TEXT;
};

Tokenizer.prototype._stateBeforeEntity = ifElseState("
#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY);
Tokenizer.prototype._stateBeforeNumericEntity ifElseState("X"IN_HEX_ENTITYIN_NUMERIC_ENTITY);

//for entities terminated with a semicolon
Tokenizer.prototype._parseNamedEntityStrict = function(){
    
//offset = 1
    
if(this._sectionStart this._index){
        var 
entity this._buffer.substring(this._sectionStart 1this._index),
            
map this._xmlMode xmlMap entityMap;

        if(
map.hasOwnProperty(entity)){
            
this._emitPartial(map[entity]);
            
this._sectionStart this._index 1;
        }
    }
};


//parses legacy entities (without trailing semicolon)
Tokenizer.prototype._parseLegacyEntity = function(){
    var 
start this._sectionStart 1,
        
limit this._index start;

    if(
limit 6limit 6//the max length of legacy entities is 6

    
while(limit >= 2){ //the min length of legacy entities is 2
        
var entity this._buffer.substr(startlimit);

        if(
legacyMap.hasOwnProperty(entity)){
            
this._emitPartial(legacyMap[entity]);
            
this._sectionStart += limit 1;
            return;
        } else {
            
limit--;
        }
    }
};

Tokenizer.prototype._stateInNamedEntity = function(c){
    if(
=== ";"){
        
this._parseNamedEntityStrict();
        if(
this._sectionStart this._index && !this._xmlMode){
            
this._parseLegacyEntity();
        }
        
this._state this._baseState;
    } else if((
"a" || "z") && ("A" || "Z") && ("0" || "9")){
        if(
this._xmlMode);
        else if(
this._sectionStart === this._index);
        else if(
this._baseState !== TEXT){
            if(
!== "="){
                
this._parseNamedEntityStrict();
            }
        } else {
            
this._parseLegacyEntity();
        }

        
this._state this._baseState;
        
this._index--;
    }
};

Tokenizer.prototype._decodeNumericEntity = function(offsetbase){
    var 
sectionStart this._sectionStart offset;

    if(
sectionStart !== this._index){
        
//parse entity
        
var entity this._buffer.substring(sectionStartthis._index);
        var 
parsed parseInt(entitybase);

        
this._emitPartial(decodeCodePoint(parsed));
        
this._sectionStart this._index;
    } else {
        
this._sectionStart--;
    }

    
this._state this._baseState;
};

Tokenizer.prototype._stateInNumericEntity = function(c){
    if(
=== ";"){
        
this._decodeNumericEntity(210);
        
this._sectionStart++;
    } else if(
"0" || "9"){
        if(!
this._xmlMode){
            
this._decodeNumericEntity(210);
        } else {
            
this._state this._baseState;
        }
        
this._index--;
    }
};

Tokenizer.prototype._stateInHexEntity = function(c){
    if(
=== ";"){
        
this._decodeNumericEntity(316);
        
this._sectionStart++;
    } else if((
"a" || "f") && ("A" || "F") && ("0" || "9")){
        if(!
this._xmlMode){
            
this._decodeNumericEntity(316);
        } else {
            
this._state this._baseState;
        }
        
this._index--;
    }
};

Tokenizer.prototype._cleanup = function () {
    if(
this._sectionStart 0){
        
this._buffer "";
        
this._index 0;
    } else if(
this._running){
        if(
this._state === TEXT){
            if(
this._sectionStart !== this._index){
                
this._cbs.ontext(this._buffer.substr(this._sectionStart));
            }
            
this._buffer "";
            
this._index 0;
        } else if(
this._sectionStart === this._index){
            
//the section just started
            
this._buffer "";
            
this._index 0;
        } else {
            
//remove everything unnecessary
            
this._buffer this._buffer.substr(this._sectionStart);
            
this._index -= this._sectionStart;
        }

        
this._sectionStart 0;
    }
};

//TODO make events conditional
Tokenizer.prototype.write = function(chunk){
    if(
this._endedthis._cbs.onerror(Error(".write() after done!"));

    
this._buffer += chunk;
    
this._parse();
};

Tokenizer.prototype._parse = function(){
    while(
this._index this._buffer.length && this._running){
        var 
this._buffer.charAt(this._index);
        if(
this._state === TEXT) {
            
this._stateText(c);
        } else if(
this._state === BEFORE_TAG_NAME){
            
this._stateBeforeTagName(c);
        } else if(
this._state === IN_TAG_NAME) {
            
this._stateInTagName(c);
        } else if(
this._state === BEFORE_CLOSING_TAG_NAME){
            
this._stateBeforeCloseingTagName(c);
        } else if(
this._state === IN_CLOSING_TAG_NAME){
            
this._stateInCloseingTagName(c);
        } else if(
this._state === AFTER_CLOSING_TAG_NAME){
            
this._stateAfterCloseingTagName(c);
        } else if(
this._state === IN_SELF_CLOSING_TAG){
            
this._stateInSelfClosingTag(c);
        }

        
/*
        *    attributes
        */
        
else if(this._state === BEFORE_ATTRIBUTE_NAME){
            
this._stateBeforeAttributeName(c);
        } else if(
this._state === IN_ATTRIBUTE_NAME){
            
this._stateInAttributeName(c);
        } else if(
this._state === AFTER_ATTRIBUTE_NAME){
            
this._stateAfterAttributeName(c);
        } else if(
this._state === BEFORE_ATTRIBUTE_VALUE){
            
this._stateBeforeAttributeValue(c);
        } else if(
this._state === IN_ATTRIBUTE_VALUE_DQ){
            
this._stateInAttributeValueDoubleQuotes(c);
        } else if(
this._state === IN_ATTRIBUTE_VALUE_SQ){
            
this._stateInAttributeValueSingleQuotes(c);
        } else if(
this._state === IN_ATTRIBUTE_VALUE_NQ){
            
this._stateInAttributeValueNoQuotes(c);
        }

        
/*
        *    declarations
        */
        
else if(this._state === BEFORE_DECLARATION){
            
this._stateBeforeDeclaration(c);
        } else if(
this._state === IN_DECLARATION){
            
this._stateInDeclaration(c);
        }

        
/*
        *    processing instructions
        */
        
else if(this._state === IN_PROCESSING_INSTRUCTION){
            
this._stateInProcessingInstruction(c);
        }

        
/*
        *    comments
        */
        
else if(this._state === BEFORE_COMMENT){
            
this._stateBeforeComment(c);
        } else if(
this._state === IN_COMMENT){
            
this._stateInComment(c);
        } else if(
this._state === AFTER_COMMENT_1){
            
this._stateAfterComment1(c);
        } else if(
this._state === AFTER_COMMENT_2){
            
this._stateAfterComment2(c);
        }

        
/*
        *    cdata
        */
        
else if(this._state === BEFORE_CDATA_1){
            
this._stateBeforeCdata1(c);
        } else if(
this._state === BEFORE_CDATA_2){
            
this._stateBeforeCdata2(c);
        } else if(
this._state === BEFORE_CDATA_3){
            
this._stateBeforeCdata3(c);
        } else if(
this._state === BEFORE_CDATA_4){
            
this._stateBeforeCdata4(c);
        } else if(
this._state === BEFORE_CDATA_5){
            
this._stateBeforeCdata5(c);
        } else if(
this._state === BEFORE_CDATA_6){
            
this._stateBeforeCdata6(c);
        } else if(
this._state === IN_CDATA){
            
this._stateInCdata(c);
        } else if(
this._state === AFTER_CDATA_1){
            
this._stateAfterCdata1(c);
        } else if(
this._state === AFTER_CDATA_2){
            
this._stateAfterCdata2(c);
        }

        
/*
        * special tags
        */
        
else if(this._state === BEFORE_SPECIAL){
            
this._stateBeforeSpecial(c);
        } else if(
this._state === BEFORE_SPECIAL_END){
            
this._stateBeforeSpecialEnd(c);
        }

        
/*
        * script
        */
        
else if(this._state === BEFORE_SCRIPT_1){
            
this._stateBeforeScript1(c);
        } else if(
this._state === BEFORE_SCRIPT_2){
            
this._stateBeforeScript2(c);
        } else if(
this._state === BEFORE_SCRIPT_3){
            
this._stateBeforeScript3(c);
        } else if(
this._state === BEFORE_SCRIPT_4){
            
this._stateBeforeScript4(c);
        } else if(
this._state === BEFORE_SCRIPT_5){
            
this._stateBeforeScript5(c);
        }

        else if(
this._state === AFTER_SCRIPT_1){
            
this._stateAfterScript1(c);
        } else if(
this._state === AFTER_SCRIPT_2){
            
this._stateAfterScript2(c);
        } else if(
this._state === AFTER_SCRIPT_3){
            
this._stateAfterScript3(c);
        } else if(
this._state === AFTER_SCRIPT_4){
            
this._stateAfterScript4(c);
        } else if(
this._state === AFTER_SCRIPT_5){
            
this._stateAfterScript5(c);
        }

        
/*
        * style
        */
        
else if(this._state === BEFORE_STYLE_1){
            
this._stateBeforeStyle1(c);
        } else if(
this._state === BEFORE_STYLE_2){
            
this._stateBeforeStyle2(c);
        } else if(
this._state === BEFORE_STYLE_3){
            
this._stateBeforeStyle3(c);
        } else if(
this._state === BEFORE_STYLE_4){
            
this._stateBeforeStyle4(c);
        }

        else if(
this._state === AFTER_STYLE_1){
            
this._stateAfterStyle1(c);
        } else if(
this._state === AFTER_STYLE_2){
            
this._stateAfterStyle2(c);
        } else if(
this._state === AFTER_STYLE_3){
            
this._stateAfterStyle3(c);
        } else if(
this._state === AFTER_STYLE_4){
            
this._stateAfterStyle4(c);
        }

        
/*
        * entities
        */
        
else if(this._state === BEFORE_ENTITY){
            
this._stateBeforeEntity(c);
        } else if(
this._state === BEFORE_NUMERIC_ENTITY){
            
this._stateBeforeNumericEntity(c);
        } else if(
this._state === IN_NAMED_ENTITY){
            
this._stateInNamedEntity(c);
        } else if(
this._state === IN_NUMERIC_ENTITY){
            
this._stateInNumericEntity(c);
        } else if(
this._state === IN_HEX_ENTITY){
            
this._stateInHexEntity(c);
        }

        else {
            
this._cbs.onerror(Error("unknown _state"), this._state);
        }

        
this._index++;
    }

    
this._cleanup();
};

Tokenizer.prototype.pause = function(){
    
this._running false;
};
Tokenizer.prototype.resume = function(){
    
this._running true;

    if(
this._index this._buffer.length){
        
this._parse();
    }
    if(
this._ended){
        
this._finish();
    }
};

Tokenizer.prototype.end = function(chunk){
    if(
this._endedthis._cbs.onerror(Error(".end() after done!"));
    if(
chunkthis.write(chunk);

    
this._ended true;

    if(
this._runningthis._finish();
};

Tokenizer.prototype._finish = function(){
    
//if there is remaining data, emit it in a reasonable way
    
if(this._sectionStart this._index){
        
this._handleTrailingData();
    }

    
this._cbs.onend();
};

Tokenizer.prototype._handleTrailingData = function(){
    var 
data this._buffer.substr(this._sectionStart);

    if(
this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
        
this._cbs.oncdata(data);
    } else if(
this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
        
this._cbs.oncomment(data);
    } else if(
this._state === IN_NAMED_ENTITY && !this._xmlMode){
        
this._parseLegacyEntity();
        if(
this._sectionStart this._index){
            
this._state this._baseState;
            
this._handleTrailingData();
        }
    } else if(
this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
        
this._decodeNumericEntity(210);
        if(
this._sectionStart this._index){
            
this._state this._baseState;
            
this._handleTrailingData();
        }
    } else if(
this._state === IN_HEX_ENTITY && !this._xmlMode){
        
this._decodeNumericEntity(316);
        if(
this._sectionStart this._index){
            
this._state this._baseState;
            
this._handleTrailingData();
        }
    } else if(
        
this._state !== IN_TAG_NAME &&
        
this._state !== BEFORE_ATTRIBUTE_NAME &&
        
this._state !== BEFORE_ATTRIBUTE_VALUE &&
        
this._state !== AFTER_ATTRIBUTE_NAME &&
        
this._state !== IN_ATTRIBUTE_NAME &&
        
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
        
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
        
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
        
this._state !== IN_CLOSING_TAG_NAME
    
){
        
this._cbs.ontext(data);
    }
    
//else, ignore remaining data
    //TODO add a way to remove current tag
};

Tokenizer.prototype.reset = function(){
    
Tokenizer.call(this, {xmlModethis._xmlModedecodeEntitiesthis._decodeEntities}, this._cbs);
};

Tokenizer.prototype._getSection = function(){
    return 
this._buffer.substring(this._sectionStartthis._index);
};

Tokenizer.prototype._emitToken = function(name){
    
this._cbs[name](this._getSection());
    
this._sectionStart = -1;
};

Tokenizer.prototype._emitPartial = function(value){
    if(
this._baseState !== TEXT){
        
this._cbs.onattribdata(value); //TODO implement the new event
    
} else {
        
this._cbs.ontext(value);
    }
};
?>
Онлайн: 1
Реклама