Commit a0f61904 authored by David Sveningsson's avatar David Sveningsson

feat(lexer): handle rudimentary template tags such as `<% .. %>`

parent ab984bf1
Pipeline #102117570 passed with stages
in 9 minutes and 18 seconds
......@@ -674,4 +674,20 @@ describe("lexer", () => {
expect(token.next()).toBeToken({ type: TokenType.EOF });
});
});
describe("should not choke on templating", () => {
it.each`
input
${"<% ... %>"}
${"<? ... ?>"}
${"<$ ... $>"}
`("$input", ({ input }) => {
const token = lexer.tokenize(inlineSource(input));
expect(token.next()).toBeToken({
type: TokenType.TEMPLATING,
data: [input],
});
expect(token.next()).toBeToken({ type: TokenType.EOF });
});
});
});
......@@ -24,6 +24,7 @@ const MATCH_XML_TAG = /^<\?xml.*?\?>\n/;
const MATCH_TAG_OPEN = /^<(\/?)([a-zA-Z0-9\-:]+)/; // https://www.w3.org/TR/html/syntax.html#start-tags
const MATCH_TAG_CLOSE = /^\/?>/;
const MATCH_TEXT = /^[^]*?(?=(?:[ \t]*(?:\r\n|\r|\n)|<[^ ]|$))/;
const MATCH_TEMPLATING = /^(?:<%.*?%>|<\?.*?\?>|<\$.*?\$>)/;
const MATCH_TAG_LOOKAHEAD = /^[^]*?(?=<|$)/;
const MATCH_ATTR_START = /^([^\t\r\n\f \/><"'=]+)/; // https://www.w3.org/TR/html/syntax.html#elements-attributes
const MATCH_ATTR_SINGLE = /^\s*=\s*'([^']*?)(')/;
......@@ -266,6 +267,7 @@ export class Lexer {
[MATCH_DIRECTIVE, State.TEXT, TokenType.DIRECTIVE],
[MATCH_CONDITIONAL, State.TEXT, TokenType.CONDITIONAL],
[MATCH_COMMENT, State.TEXT, TokenType.COMMENT],
[MATCH_TEMPLATING, State.TEXT, TokenType.TEMPLATING],
[MATCH_TAG_OPEN, State.TAG, TokenType.TAG_OPEN],
[MATCH_TEXT, State.TEXT, TokenType.TEXT],
[MATCH_TAG_LOOKAHEAD, State.TEXT, TokenType.TEXT],
......
......@@ -11,6 +11,7 @@ export enum TokenType {
ATTR_NAME,
ATTR_VALUE,
TEXT,
TEMPLATING,
SCRIPT,
COMMENT,
CONDITIONAL,
......
......@@ -281,6 +281,21 @@ describe("parser", () => {
});
expect(events.shift()).toBeUndefined();
});
describe("templating as text", () => {
it.each`
input
${"<% ... %>"}
${"<? ... ?>"}
${"<$ ... $>"}
`("$input", ({ input }) => {
expect.assertions(1);
const text = `lorem ${input} ipsum`;
const doc = parser.parseHtml(`<p>${text}</p>`);
const element = doc.querySelector("p");
expect(element.textContent).toEqual(text);
});
});
});
describe("should fail on", () => {
......
......@@ -114,6 +114,7 @@ export class Parser {
break;
case TokenType.TEXT:
case TokenType.TEMPLATING:
this.appendText(token.data, token.location);
break;
......
......@@ -118,6 +118,21 @@ describe("rule no-raw-characters", () => {
});
});
describe("should not report templating", () => {
it.each`
input
${"<% ... %>"}
${"<? ... ?>"}
${"<$ ... $>"}
`("$input", ({ input }) => {
expect.assertions(1);
const report = htmlvalidate.validateString(
`<p>lorem ${input} ipsum</p>`
);
expect(report).toBeValid();
});
});
it("smoketest", () => {
const report = htmlvalidate.validateFile(
"test-files/rules/no-raw-characters.html"
......
......@@ -9,6 +9,7 @@ const defaults = {
const textRegexp = /([<>]|&(?![a-zA-Z0-9#]+;))/g;
const unquotedAttrRegexp = /([<>"'=`]|&(?![a-zA-Z0-9#]+;))/g;
const matchTemplate = /^(<%.*?%>|<\?.*?\?>|<\$.*?\$>)$/;
const replacementTable: Map<string, string> = new Map([
['"', "&quot;"],
......@@ -44,6 +45,12 @@ class NoRawCharacters extends Rule {
if (child.nodeType !== NodeType.TEXT_NODE) {
continue;
}
/* workaround for templating <% ... %> etc */
if (child.textContent.match(matchTemplate)) {
continue;
}
this.findRawChars(child.textContent, child.location, textRegexp);
}
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment