2 sax.parser =
function (strict, opt) {
return new SAXParser(strict, opt) }
3 sax.SAXParser = SAXParser
4 sax.SAXStream = SAXStream
5 sax.createStream = createStream
16 sax.MAX_BUFFER_LENGTH = 64 * 1024
19 'comment',
'sgmlDecl',
'textNode',
'tagName',
'doctype',
20 'procInstName',
'procInstBody',
'entity',
'attribName',
21 'attribValue',
'cdata',
'script'
26 'processinginstruction',
45 function SAXParser (strict, opt) {
46 if (!(
this instanceof SAXParser)) {
47 return new SAXParser(strict, opt)
52 parser.q = parser.c = ''
53 parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
54 parser.opt = opt || {}
55 parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
56 parser.looseCase = parser.opt.lowercase ?
'toLowerCase' :
'toUpperCase'
58 parser.closed = parser.closedRoot = parser.sawRoot =
false
59 parser.tag = parser.error = null
60 parser.strict = !!strict
61 parser.noscript = !!(strict || parser.opt.noscript)
62 parser.state = S.BEGIN
63 parser.strictEntities = parser.opt.strictEntities
64 parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
65 parser.attribList = []
70 if (parser.opt.xmlns) {
71 parser.ns = Object.create(rootNS)
75 parser.trackPosition = parser.opt.position !==
false
76 if (parser.trackPosition) {
77 parser.position = parser.line = parser.column = 0
79 emit(parser,
'onready')
83 Object.create =
function (o) {
92 Object.keys =
function (o) {
94 for (var i in o)
if (o.hasOwnProperty(i)) a.push(i)
99 function checkBufferLength (parser) {
100 var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
102 for (var i = 0, l = buffers.length; i < l; i++) {
103 var len = parser[buffers[i]].length
104 if (len > maxAllowed) {
109 switch (buffers[i]) {
115 emitNode(parser, 'oncdata', parser.cdata)
120 emitNode(parser, 'onscript', parser.script)
125 error(parser, 'Max buffer length exceeded: ' + buffers[i])
128 maxActual = Math.max(maxActual, len)
131 var m = sax.MAX_BUFFER_LENGTH - maxActual
132 parser.bufferCheckPosition = m + parser.position
135 function clearBuffers (parser) {
136 for (var i = 0, l = buffers.length; i < l; i++) {
137 parser[buffers[i]] =
''
141 function flushBuffers (parser) {
143 if (parser.cdata !== '') {
144 emitNode(parser,
'oncdata', parser.cdata)
147 if (parser.script !== '') {
148 emitNode(parser,
'onscript', parser.script)
153 SAXParser.prototype = {
154 end:
function () { end(
this) },
156 resume:
function () { this.error = null;
return this },
157 close:
function () {
return this.write(null) },
158 flush:
function () { flushBuffers(
this) }
163 Stream = require(
'stream').Stream
165 Stream =
function () {}
168 var streamWraps = sax.EVENTS.filter(
function (ev) {
169 return ev !==
'error' && ev !==
'end'
172 function createStream (strict, opt) {
173 return new SAXStream(strict, opt)
176 function SAXStream (strict, opt) {
177 if (!(
this instanceof SAXStream)) {
178 return new SAXStream(strict, opt)
183 this._parser =
new SAXParser(strict, opt)
189 this._parser.onend = function () {
193 this._parser.onerror =
function (er) {
198 me._parser.error = null
203 streamWraps.forEach(
function (ev) {
204 Object.defineProperty(me,
'on' + ev, {
206 return me._parser[
'on' + ev]
210 me.removeAllListeners(ev)
211 me._parser[
'on' + ev] = h
222 SAXStream.prototype = Object.create(Stream.prototype, {
228 SAXStream.prototype.write = function (data) {
229 if (typeof Buffer ===
'function' &&
230 typeof Buffer.isBuffer ===
'function' &&
231 Buffer.isBuffer(data)) {
232 if (!this._decoder) {
233 var SD = require(
'string_decoder').StringDecoder
234 this._decoder =
new SD(
'utf8')
236 data = this._decoder.write(data)
239 this._parser.write(data.toString())
240 this.emit(
'data', data)
244 SAXStream.prototype.end =
function (chunk) {
245 if (chunk && chunk.length) {
252 SAXStream.prototype.on =
function (ev, handler) {
254 if (!me._parser[
'on' + ev] && streamWraps.indexOf(ev) !== -1) {
255 me._parser[
'on' + ev] =
function () {
256 var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
257 args.splice(0, 0, ev)
258 me.emit.apply(me, args)
262 return Stream.prototype.on.call(me, ev, handler)
266 var whitespace =
'\r\n\t '
273 var attribEnd = whitespace + '>'
274 var CDATA = '[CDATA['
275 var DOCTYPE = 'DOCTYPE'
276 var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
277 var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
278 var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
280 // turn all the string character sets into character class objects.
281 whitespace = charClass(whitespace)
283 // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
284 // This implementation works on strings, a single character at a time
285 // as such, it cannot ever support astral-plane characters (10000-EFFFF)
286 // without a significant breaking change to either this parser, or the
287 // JavaScript language. Implementation of an emoji-capable xml parser
288 // is left as an exercise for the reader.
289 var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
291 var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
293 var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
294 var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
296 quote = charClass(quote)
297 attribEnd = charClass(attribEnd)
299 function charClass (str) {
300 return str.split('').reduce(function (s, c) {
306 function isMatch (regex, c) {
310 function is (charclass, c) {
314 function notMatch (regex, c) {
315 return !isMatch(regex, c)
318 function not (charclass, c) {
319 return !is(charclass, c)
324 BEGIN: S++, // leading byte order mark or whitespace
325 BEGIN_WHITESPACE: S++, // leading whitespace
326 TEXT: S++, // general stuff
327 TEXT_ENTITY: S++, // & and such.
329 SGML_DECL: S++, // <!BLARG
330 SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
334 DOCTYPE_DTD_QUOTED: S++,
335 COMMENT_STARTING: S++,
344 PROC_INST_ENDING: S++,
349 ATTRIB_NAME_SAW_WHITE: S++,
351 ATTRIB_VALUE_QUOTED: S++,
352 ATTRIB_VALUE_CLOSED: S++,
353 ATTRIB_VALUE_UNQUOTED: S++,
354 ATTRIB_VALUE_ENTITY_Q: S++,
355 ATTRIB_VALUE_ENTITY_U: S++,
357 CLOSE_TAG_SAW_WHITE: S++,
626 Object.keys(sax.ENTITIES).forEach(
function (key) {
627 var e = sax.ENTITIES[key]
628 var s = typeof e ===
'number' ? String.fromCharCode(e) : e
629 sax.ENTITIES[key] = s
632 for (var s in sax.STATE) {
633 sax.STATE[sax.STATE[s]] = s
639 function emit (parser, event, data) {
640 parser[event] && parser[event](data)
643 function emitNode (parser, nodeType, data) {
644 if (parser.textNode) closeText(parser)
645 emit(parser, nodeType, data)
648 function closeText (parser) {
649 parser.textNode = textopts(parser.opt, parser.textNode)
650 if (parser.textNode) emit(parser, 'ontext', parser.textNode)
654 function textopts (opt, text) {
655 if (opt.trim) text = text.trim()
656 if (opt.normalize) text = text.replace(/\s+/g,
' ')
660 function error (parser, er) {
662 if (parser.trackPosition) {
663 er +=
'\nLine: ' + parser.line +
664 '\nColumn: ' + parser.column +
665 '\nChar: ' + parser.c
669 emit(parser, 'onerror', er)
673 function end (parser) {
674 if (parser.sawRoot && !parser.closedRoot) strictFail(parser,
'Unclosed root tag')
675 if ((parser.state !== S.BEGIN) &&
676 (parser.state !== S.BEGIN_WHITESPACE) &&
677 (parser.state !== S.TEXT)) {
678 error(parser,
'Unexpected end')
683 emit(parser, 'onend')
684 SAXParser.call(parser, parser.strict, parser.opt)
688 function strictFail (parser, message) {
689 if (typeof parser !==
'object' || !(parser instanceof SAXParser)) {
690 throw new Error(
'bad call to strictFail')
693 error(parser, message)
697 function newTag (parser) {
698 if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
699 var parent = parser.tags[parser.tags.length - 1] || parser
700 var tag = parser.tag = { name: parser.tagName, attributes: {} }
703 if (parser.opt.xmlns) {
706 parser.attribList.length = 0
707 emitNode(parser,
'onopentagstart', tag)
710 function qname (name, attribute) {
711 var i = name.indexOf(
':')
712 var qualName = i < 0 ? [
'', name ] : name.split(
':')
713 var prefix = qualName[0]
714 var local = qualName[1]
717 if (attribute && name ===
'xmlns') {
722 return { prefix: prefix, local: local }
725 function attrib (parser) {
726 if (!parser.strict) {
727 parser.attribName = parser.attribName[parser.looseCase]()
730 if (parser.attribList.indexOf(parser.attribName) !== -1 ||
731 parser.tag.attributes.hasOwnProperty(parser.attribName)) {
732 parser.attribName = parser.attribValue =
''
736 if (parser.opt.xmlns) {
737 var qn = qname(parser.attribName,
true)
738 var prefix = qn.prefix
741 if (prefix === 'xmlns') {
743 if (local ===
'xml' && parser.attribValue !== XML_NAMESPACE) {
745 'xml: prefix must be bound to ' + XML_NAMESPACE +
'\n' +
746 'Actual: ' + parser.attribValue)
747 }
else if (local ===
'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
749 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE +
'\n' +
750 'Actual: ' + parser.attribValue)
753 var parent = parser.tags[parser.tags.length - 1] || parser
754 if (tag.ns === parent.ns) {
755 tag.ns = Object.create(parent.ns)
757 tag.ns[local] = parser.attribValue
764 parser.attribList.push([parser.attribName, parser.attribValue])
767 parser.tag.attributes[parser.attribName] = parser.attribValue
768 emitNode(parser,
'onattribute', {
769 name: parser.attribName,
770 value: parser.attribValue
774 parser.attribName = parser.attribValue =
''
777 function openTag (parser, selfClosing) {
778 if (parser.opt.xmlns) {
783 var qn = qname(parser.tagName)
784 tag.prefix = qn.prefix
786 tag.uri = tag.ns[qn.prefix] || ''
788 if (tag.prefix && !tag.uri) {
789 strictFail(parser,
'Unbound namespace prefix: ' +
790 JSON.stringify(parser.tagName))
794 var parent = parser.tags[parser.tags.length - 1] || parser
795 if (tag.ns && parent.ns !== tag.ns) {
796 Object.keys(tag.ns).forEach(
function (p) {
797 emitNode(parser,
'onopennamespace', {
807 for (var i = 0, l = parser.attribList.length; i < l; i++) {
808 var nv = parser.attribList[i]
811 var qualName = qname(name,
true)
812 var prefix = qualName.prefix
813 var local = qualName.local
814 var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
825 if (prefix && prefix !==
'xmlns' && !uri) {
826 strictFail(parser,
'Unbound namespace prefix: ' +
827 JSON.stringify(prefix))
830 parser.tag.attributes[name] = a
831 emitNode(parser,
'onattribute', a)
833 parser.attribList.length = 0
836 parser.tag.isSelfClosing = !!selfClosing
839 parser.sawRoot =
true
840 parser.tags.push(parser.tag)
841 emitNode(parser,
'onopentag', parser.tag)
844 if (!parser.noscript && parser.tagName.toLowerCase() ===
'script') {
845 parser.state = S.SCRIPT
847 parser.state = S.TEXT
852 parser.attribName = parser.attribValue =
''
853 parser.attribList.length = 0
856 function closeTag (parser) {
857 if (!parser.tagName) {
858 strictFail(parser,
'Weird empty close tag.')
859 parser.textNode += '</>'
860 parser.state = S.TEXT
865 if (parser.tagName !==
'script') {
866 parser.script +=
'</' + parser.tagName +
'>'
868 parser.state = S.SCRIPT
871 emitNode(parser,
'onscript', parser.script)
877 var t = parser.tags.length
878 var tagName = parser.tagName
879 if (!parser.strict) {
880 tagName = tagName[parser.looseCase]()
882 var closeTo = tagName
884 var close = parser.tags[t]
885 if (close.name !== closeTo) {
887 strictFail(parser,
'Unexpected close tag')
895 strictFail(parser,
'Unmatched closing tag: ' + parser.tagName)
896 parser.textNode += '</' + parser.tagName + '>'
897 parser.state = S.TEXT
900 parser.tagName = tagName
901 var s = parser.tags.length
903 var tag = parser.tag = parser.tags.pop()
904 parser.tagName = parser.tag.name
905 emitNode(parser,
'onclosetag', parser.tagName)
908 for (var i in tag.ns) {
912 var parent = parser.tags[parser.tags.length - 1] || parser
913 if (parser.opt.xmlns && tag.ns !== parent.ns) {
915 Object.keys(tag.ns).forEach(
function (p) {
917 emitNode(parser,
'onclosenamespace', { prefix: p, uri: n })
921 if (t === 0) parser.closedRoot =
true
922 parser.tagName = parser.attribValue = parser.attribName =
''
923 parser.attribList.length = 0
924 parser.state = S.TEXT
927 function parseEntity (parser) {
928 var entity = parser.entity
929 var entityLC = entity.toLowerCase()
933 if (parser.ENTITIES[entity]) {
934 return parser.ENTITIES[entity]
936 if (parser.ENTITIES[entityLC]) {
937 return parser.ENTITIES[entityLC]
940 if (entity.charAt(0) ===
'#') {
941 if (entity.charAt(1) ===
'x') {
942 entity = entity.slice(2)
943 num = parseInt(entity, 16)
944 numStr = num.toString(16)
946 entity = entity.slice(1)
947 num = parseInt(entity, 10)
948 numStr = num.toString(10)
951 entity = entity.replace(/^0+/, '')
952 if (numStr.toLowerCase() !== entity) {
953 strictFail(parser,
'Invalid character entity')
954 return '&' + parser.entity + ';'
957 return String.fromCodePoint(num)
960 function beginWhiteSpace (parser, c) {
962 parser.state = S.OPEN_WAKA
963 parser.startTagPosition = parser.position
964 }
else if (not(whitespace, c)) {
967 strictFail(parser,
'Non-whitespace before first tag.')
969 parser.state = S.TEXT
973 function charAt (chunk, i) {
975 if (i < chunk.length) {
976 result = chunk.charAt(i)
981 function write (chunk) {
988 'Cannot write after close. Assign an onready handler.')
990 if (chunk === null) {
993 if (typeof chunk ===
'object') {
994 chunk = chunk.toString()
999 c = charAt(chunk, i++)
1006 if (parser.trackPosition) {
1016 switch (parser.state) {
1018 parser.state = S.BEGIN_WHITESPACE
1019 if (c ===
'\uFEFF') {
1022 beginWhiteSpace(parser, c)
1025 case S.BEGIN_WHITESPACE:
1026 beginWhiteSpace(parser, c)
1030 if (parser.sawRoot && !parser.closedRoot) {
1032 while (c && c !==
'<' && c !==
'&') {
1033 c = charAt(chunk, i++)
1034 if (c && parser.trackPosition) {
1044 parser.textNode += chunk.substring(starti, i - 1)
1046 if (c ===
'<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
1047 parser.state = S.OPEN_WAKA
1048 parser.startTagPosition = parser.position
1050 if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
1051 strictFail(parser,
'Text data outside of root node.')
1054 parser.state = S.TEXT_ENTITY
1056 parser.textNode += c
1064 parser.state = S.SCRIPT_ENDING
1070 case S.SCRIPT_ENDING:
1072 parser.state = S.CLOSE_TAG
1074 parser.script +=
'<' + c
1075 parser.state = S.SCRIPT
1082 parser.state = S.SGML_DECL
1083 parser.sgmlDecl =
''
1084 }
else if (is(whitespace, c)) {
1086 }
else if (isMatch(nameStart, c)) {
1087 parser.state = S.OPEN_TAG
1089 }
else if (c ===
'/') {
1090 parser.state = S.CLOSE_TAG
1092 }
else if (c ===
'?') {
1093 parser.state = S.PROC_INST
1094 parser.procInstName = parser.procInstBody =
''
1096 strictFail(parser,
'Unencoded <')
1098 if (parser.startTagPosition + 1 < parser.position) {
1099 var pad = parser.position - parser.startTagPosition
1100 c =
new Array(pad).join(
' ') + c
1102 parser.textNode +=
'<' + c
1103 parser.state = S.TEXT
1108 if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
1109 emitNode(parser,
'onopencdata')
1110 parser.state = S.CDATA
1111 parser.sgmlDecl = ''
1113 } else if (parser.sgmlDecl + c === '--') {
1114 parser.state = S.COMMENT
1116 parser.sgmlDecl =
''
1117 }
else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
1118 parser.state = S.DOCTYPE
1119 if (parser.doctype || parser.sawRoot) {
1121 'Inappropriately located doctype declaration')
1124 parser.sgmlDecl =
''
1125 }
else if (c ===
'>') {
1126 emitNode(parser,
'onsgmldeclaration', parser.sgmlDecl)
1127 parser.sgmlDecl = ''
1128 parser.state = S.TEXT
1129 } else if (is(quote, c)) {
1130 parser.state = S.SGML_DECL_QUOTED
1131 parser.sgmlDecl += c
1133 parser.sgmlDecl += c
1137 case S.SGML_DECL_QUOTED:
1138 if (c === parser.q) {
1139 parser.state = S.SGML_DECL
1142 parser.sgmlDecl += c
1147 parser.state = S.TEXT
1148 emitNode(parser,
'ondoctype', parser.doctype)
1149 parser.doctype = true
1153 parser.state = S.DOCTYPE_DTD
1154 }
else if (is(quote, c)) {
1155 parser.state = S.DOCTYPE_QUOTED
1161 case S.DOCTYPE_QUOTED:
1163 if (c === parser.q) {
1165 parser.state = S.DOCTYPE
1172 parser.state = S.DOCTYPE
1173 }
else if (is(quote, c)) {
1174 parser.state = S.DOCTYPE_DTD_QUOTED
1179 case S.DOCTYPE_DTD_QUOTED:
1181 if (c === parser.q) {
1182 parser.state = S.DOCTYPE_DTD
1189 parser.state = S.COMMENT_ENDING
1195 case S.COMMENT_ENDING:
1197 parser.state = S.COMMENT_ENDED
1198 parser.comment = textopts(parser.opt, parser.comment)
1199 if (parser.comment) {
1200 emitNode(parser,
'oncomment', parser.comment)
1204 parser.comment +=
'-' + c
1205 parser.state = S.COMMENT
1209 case S.COMMENT_ENDED:
1211 strictFail(parser,
'Malformed comment')
1214 parser.comment += '--' + c
1215 parser.state = S.COMMENT
1217 parser.state = S.TEXT
1223 parser.state = S.CDATA_ENDING
1229 case S.CDATA_ENDING:
1231 parser.state = S.CDATA_ENDING_2
1233 parser.cdata +=
']' + c
1234 parser.state = S.CDATA
1238 case S.CDATA_ENDING_2:
1241 emitNode(parser,
'oncdata', parser.cdata)
1243 emitNode(parser,
'onclosecdata')
1245 parser.state = S.TEXT
1246 } else if (c === ']') {
1249 parser.cdata +=
']]' + c
1250 parser.state = S.CDATA
1256 parser.state = S.PROC_INST_ENDING
1257 }
else if (is(whitespace, c)) {
1258 parser.state = S.PROC_INST_BODY
1260 parser.procInstName += c
1264 case S.PROC_INST_BODY:
1265 if (!parser.procInstBody && is(whitespace, c)) {
1267 }
else if (c ===
'?') {
1268 parser.state = S.PROC_INST_ENDING
1270 parser.procInstBody += c
1274 case S.PROC_INST_ENDING:
1276 emitNode(parser,
'onprocessinginstruction', {
1277 name: parser.procInstName,
1278 body: parser.procInstBody
1280 parser.procInstName = parser.procInstBody =
''
1281 parser.state = S.TEXT
1283 parser.procInstBody +=
'?' + c
1284 parser.state = S.PROC_INST_BODY
1289 if (isMatch(nameBody, c)) {
1295 }
else if (c ===
'/') {
1296 parser.state = S.OPEN_TAG_SLASH
1298 if (not(whitespace, c)) {
1299 strictFail(parser,
'Invalid character in tag name')
1301 parser.state = S.ATTRIB
1306 case S.OPEN_TAG_SLASH:
1308 openTag(parser,
true)
1311 strictFail(parser,
'Forward-slash in opening tag not followed by >')
1312 parser.state = S.ATTRIB
1318 if (is(whitespace, c)) {
1320 }
else if (c ===
'>') {
1322 }
else if (c ===
'/') {
1323 parser.state = S.OPEN_TAG_SLASH
1324 }
else if (isMatch(nameStart, c)) {
1325 parser.attribName = c
1326 parser.attribValue =
''
1327 parser.state = S.ATTRIB_NAME
1329 strictFail(parser,
'Invalid attribute name')
1335 parser.state = S.ATTRIB_VALUE
1336 }
else if (c ===
'>') {
1337 strictFail(parser,
'Attribute without value')
1338 parser.attribValue = parser.attribName
1341 } else if (is(whitespace, c)) {
1342 parser.state = S.ATTRIB_NAME_SAW_WHITE
1343 }
else if (isMatch(nameBody, c)) {
1344 parser.attribName += c
1346 strictFail(parser,
'Invalid attribute name')
1350 case S.ATTRIB_NAME_SAW_WHITE:
1352 parser.state = S.ATTRIB_VALUE
1353 }
else if (is(whitespace, c)) {
1356 strictFail(parser,
'Attribute without value')
1357 parser.tag.attributes[parser.attribName] = ''
1358 parser.attribValue = ''
1359 emitNode(parser, 'onattribute', {
1360 name: parser.attribName,
1363 parser.attribName =
''
1366 }
else if (isMatch(nameStart, c)) {
1367 parser.attribName = c
1368 parser.state = S.ATTRIB_NAME
1370 strictFail(parser,
'Invalid attribute name')
1371 parser.state = S.ATTRIB
1376 case S.ATTRIB_VALUE:
1377 if (is(whitespace, c)) {
1379 }
else if (is(quote, c)) {
1381 parser.state = S.ATTRIB_VALUE_QUOTED
1383 strictFail(parser,
'Unquoted attribute value')
1384 parser.state = S.ATTRIB_VALUE_UNQUOTED
1385 parser.attribValue = c
1389 case S.ATTRIB_VALUE_QUOTED:
1390 if (c !== parser.q) {
1392 parser.state = S.ATTRIB_VALUE_ENTITY_Q
1394 parser.attribValue += c
1400 parser.state = S.ATTRIB_VALUE_CLOSED
1403 case S.ATTRIB_VALUE_CLOSED:
1404 if (is(whitespace, c)) {
1405 parser.state = S.ATTRIB
1406 }
else if (c ===
'>') {
1408 }
else if (c ===
'/') {
1409 parser.state = S.OPEN_TAG_SLASH
1410 }
else if (isMatch(nameStart, c)) {
1411 strictFail(parser,
'No whitespace between attributes')
1412 parser.attribName = c
1413 parser.attribValue = ''
1414 parser.state = S.ATTRIB_NAME
1416 strictFail(parser,
'Invalid attribute name')
1420 case S.ATTRIB_VALUE_UNQUOTED:
1421 if (not(attribEnd, c)) {
1423 parser.state = S.ATTRIB_VALUE_ENTITY_U
1425 parser.attribValue += c
1433 parser.state = S.ATTRIB
1438 if (!parser.tagName) {
1439 if (is(whitespace, c)) {
1441 }
else if (notMatch(nameStart, c)) {
1442 if (parser.script) {
1443 parser.script +=
'</' + c
1444 parser.state = S.SCRIPT
1446 strictFail(parser,
'Invalid tagname in closing tag.')
1451 }
else if (c ===
'>') {
1453 }
else if (isMatch(nameBody, c)) {
1455 }
else if (parser.script) {
1456 parser.script +=
'</' + parser.tagName
1458 parser.state = S.SCRIPT
1460 if (not(whitespace, c)) {
1461 strictFail(parser,
'Invalid tagname in closing tag')
1463 parser.state = S.CLOSE_TAG_SAW_WHITE
1467 case S.CLOSE_TAG_SAW_WHITE:
1468 if (is(whitespace, c)) {
1474 strictFail(parser,
'Invalid characters in closing tag')
1479 case S.ATTRIB_VALUE_ENTITY_Q:
1480 case S.ATTRIB_VALUE_ENTITY_U:
1483 switch (parser.state) {
1485 returnState = S.TEXT
1489 case S.ATTRIB_VALUE_ENTITY_Q:
1490 returnState = S.ATTRIB_VALUE_QUOTED
1491 buffer =
'attribValue'
1494 case S.ATTRIB_VALUE_ENTITY_U:
1495 returnState = S.ATTRIB_VALUE_UNQUOTED
1496 buffer =
'attribValue'
1501 parser[buffer] += parseEntity(parser)
1503 parser.state = returnState
1504 } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
1507 strictFail(parser,
'Invalid character in entity name')
1508 parser[buffer] += '&' + parser.entity + c
1510 parser.state = returnState
1516 throw new Error(parser, 'Unknown state: ' + parser.state)
1520 if (parser.position >= parser.bufferCheckPosition) {
1521 checkBufferLength(parser)
1528 if (!String.fromCodePoint) {
1530 var stringFromCharCode = String.fromCharCode
1531 var floor = Math.floor
1532 var fromCodePoint =
function () {
1533 var MAX_SIZE = 0x4000
1538 var length = arguments.length
1543 while (++index < length) {
1544 var codePoint = Number(arguments[index])
1546 !isFinite(codePoint) ||
1548 codePoint > 0x10FFFF ||
1549 floor(codePoint) !== codePoint
1551 throw RangeError(
'Invalid code point: ' + codePoint)
1553 if (codePoint <= 0xFFFF) {
1554 codeUnits.push(codePoint)
1557 codePoint -= 0x10000
1558 highSurrogate = (codePoint >> 10) + 0xD800
1559 lowSurrogate = (codePoint % 0x400) + 0xDC00
1560 codeUnits.push(highSurrogate, lowSurrogate)
1562 if (index + 1 === length || codeUnits.length > MAX_SIZE) {
1563 result += stringFromCharCode.apply(null, codeUnits)
1564 codeUnits.length = 0
1570 if (Object.defineProperty) {
1571 Object.defineProperty(String,
'fromCodePoint', {
1572 value: fromCodePoint,
1577 String.fromCodePoint = fromCodePoint
1581 })(typeof exports ===
'undefined' ? this.sax = {} : exports)