Commit 97fd77de authored by David Sveningsson's avatar David Sveningsson
Browse files

fix(lexer): handle CRLF after xml declaration

parent 6c1b8308
Pipeline #255890381 passed with stages
in 11 minutes and 4 seconds
......@@ -172,6 +172,13 @@ describe("lexer", () => {
expect(token.next().done).toBeTruthy();
});
it("xml declaration trailing whitespace", () => {
expect.assertions(2);
const token = lexer.tokenize(inlineSource('<?xml version="1.0" encoding="utf-8"?> \r\n'));
expect(token.next()).toBeToken({ type: TokenType.EOF });
expect(token.next().done).toBeTruthy();
});
it("uppercase doctype", () => {
expect.assertions(5);
const token = lexer.tokenize(inlineSource("<!DOCTYPE html>"));
......
......@@ -11,7 +11,7 @@ const MATCH_WHITESPACE = /^(?:\r\n|\r|\n|[ \t]+(?:\r\n|\r|\n)?)/;
const MATCH_DOCTYPE_OPEN = /^<!(DOCTYPE)\s/i;
const MATCH_DOCTYPE_VALUE = /^[^>]+/;
const MATCH_DOCTYPE_CLOSE = /^>/;
const MATCH_XML_TAG = /^<\?xml.*?\?>\n/;
const MATCH_XML_TAG = /^<\?xml.*?\?>\s+/;
const MATCH_TAG_OPEN = /^<(\/?)([a-zA-Z0-9\-:]+)/; // https://www.w3.org/TR/html/syntax.html#start-tags
const MATCH_TAG_CLOSE = /^\/?>/;
const MATCH_TEXT = /^[^]*?(?=(?:[ \t]*(?:\r\n|\r|\n)|<[^ ]|$))/;
......@@ -179,6 +179,7 @@ export class Lexer {
[MATCH_DOCTYPE_OPEN, State.DOCTYPE, TokenType.DOCTYPE_OPEN],
[MATCH_WHITESPACE, State.INITIAL, TokenType.WHITESPACE],
[MATCH_DIRECTIVE, State.INITIAL, TokenType.DIRECTIVE],
[MATCH_CONDITIONAL, State.INITIAL, TokenType.CONDITIONAL],
[MATCH_COMMENT, State.INITIAL, TokenType.COMMENT],
[false, State.TEXT, false],
],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment