Commit 28015ba0 authored by Roy Revelt's avatar Roy Revelt

feat: recognise unclosed/terminated ESP tags within tag attributes

parent 13740c17
......@@ -35,7 +35,7 @@ Copyright (c) 2015-2020 Roy Revelt and other contributors
[gitlab-img]: https://img.shields.io/badge/repo-on%20GitLab-brightgreen.svg?style=flat-square
[gitlab-url]: https://gitlab.com/codsen/codsen/tree/master/packages/codsen-tokenizer
[cov-img]: https://img.shields.io/badge/coverage-90.39%25-brightgreen.svg?style=flat-square
[cov-img]: https://img.shields.io/badge/coverage-90.49%25-brightgreen.svg?style=flat-square
[cov-url]: https://gitlab.com/codsen/codsen/tree/master/packages/codsen-tokenizer
[deps2d-img]: https://img.shields.io/badge/deps%20in%202D-see_here-08f0fd.svg?style=flat-square
[deps2d-url]: http://npm.anvaka.com/#/view/2d/codsen-tokenizer
......
......@@ -1149,6 +1149,24 @@ function tokenizer(str, originalOpts) {
value: null
});
}
} else if (token.type === "esp" && attribToBackup && parentTokenToBackup && attribToBackup.attribOpeningQuoteAt && "'\"".includes(str[_i]) && str[attribToBackup.attribOpeningQuoteAt] === str[_i] && attributeEnds(str, attribToBackup.attribOpeningQuoteAt, _i)) {
token.end = _i;
token.value = str.slice(token.start, _i);
if (attribToBackup && !Array.isArray(attribToBackup.attribValue)) {
attribToBackup.attribValue = [];
}
attribToBackup.attribValue.push(token);
attribToBackup.attribValueEndsAt = _i;
attribToBackup.attribValueRaw = str.slice(attribToBackup.attribValueStartsAt, _i);
attribToBackup.attribClosingQuoteAt = _i;
attribToBackup.attribEnd = _i + 1;
token = clone(parentTokenToBackup);
token.attribs.push(attribToBackup);
attribToBackup = undefined;
parentTokenToBackup = undefined;
layers.pop();
layers.pop();
layers.pop();
}
if (!doNothing && token.type === "tag" && !Number.isInteger(attrib.attribValueStartsAt) && Number.isInteger(attrib.attribNameEndsAt) && attrib.attribNameEndsAt <= _i && str[_i] && str[_i].trim()) {
if (str[_i] === "=" && !"'\"=".includes(str[stringLeftRight.right(str, _i)]) && !espChars.includes(str[stringLeftRight.right(str, _i)])
......
......@@ -2961,11 +2961,18 @@
return;
}
var whichLayerToMatch = matchFirstInstead ? layers[0] : layers[layers.length - 1];
var whichLayerToMatch = matchFirstInstead ? layers[0] : layers[layers.length - 1]; // console.log(
// `023 matchLayer(): ${`\u001b[${33}m${`whichLayerToMatch`}\u001b[${39}m`} = ${JSON.stringify(
// whichLayerToMatch,
// null,
// 4
// )}`
// );
if (whichLayerToMatch.type !== "esp") {
// we aim to match ESP tag layers, so instantly it's falsey result
// because layer we match against is not ESP tag layer
// console.log(`033 matchLayer(): early return undefined`);
return;
}
......@@ -2975,8 +2982,14 @@
Array.from(wholeEspTagLump).every(function (char) {
return whichLayerToMatch.guessedClosingLump.includes(char);
})) {
// console.log(
// `047 matchLayer(): ${`\u001b[${32}m${`RETURN`}\u001b[${39}m`} ${
// wholeEspTagLump.length
// }`
// );
return wholeEspTagLump.length;
}
} // console.log(`054 matchLayer(): finally, return undefined`);
}
// starts. Previously it sat within if() clauses but became unwieldy and
......@@ -4805,6 +4818,56 @@
value: null
});
}
} else if (token.type === "esp" && attribToBackup && parentTokenToBackup && attribToBackup.attribOpeningQuoteAt && "'\"".includes(str[_i]) && str[attribToBackup.attribOpeningQuoteAt] === str[_i] && isAttrClosing(str, attribToBackup.attribOpeningQuoteAt, _i)) {
// imagine unclosed ESP tag inside attr value:
// <tr class="{% x">
// ^
// we're here
// we need to still proactively look for closing attribute quotes,
// even inside ESP tags, if we're inside tag attributes
// 1. patch up missing token (which is type="esp" currently) values
token.end = _i;
token.value = str.slice(token.start, _i); // 2. push token into attribToBackup.attribValue
if (attribToBackup && !Array.isArray(attribToBackup.attribValue)) {
attribToBackup.attribValue = [];
}
attribToBackup.attribValue.push(token); // 3. patch up missing values in attribToBackup
attribToBackup.attribValueEndsAt = _i;
attribToBackup.attribValueRaw = str.slice(attribToBackup.attribValueStartsAt, _i);
attribToBackup.attribClosingQuoteAt = _i;
attribToBackup.attribEnd = _i + 1; // 4. restore parent token
token = lodash_clonedeep(parentTokenToBackup);
token.attribs.push(attribToBackup); // 5. reset all
attribToBackup = undefined;
parentTokenToBackup = undefined; // 6. pop the last 3 layers
// currently layers array should be like:
// [
// {
// "type": "simple",
// "value": '"',
// "position": 10
// },
// {
// "type": "esp",
// "openingLump": "{%",
// "guessedClosingLump": "%}",
// "position": 11
// }
// {
// "type": "simple",
// "value": '"',
// "position": 15
// },
// ]
layers.pop();
layers.pop();
layers.pop();
} // Catch the start of a tag attribute's value:
// -------------------------------------------------------------------------
......
......@@ -1745,6 +1745,35 @@ function tokenizer(str, originalOpts) {
value: null,
});
}
} else if (
token.type === "esp" &&
attribToBackup &&
parentTokenToBackup &&
attribToBackup.attribOpeningQuoteAt &&
`'"`.includes(str[i]) &&
str[attribToBackup.attribOpeningQuoteAt] === str[i] &&
attributeEnds(str, attribToBackup.attribOpeningQuoteAt, i)
) {
token.end = i;
token.value = str.slice(token.start, i);
if (attribToBackup && !Array.isArray(attribToBackup.attribValue)) {
attribToBackup.attribValue = [];
}
attribToBackup.attribValue.push(token);
attribToBackup.attribValueEndsAt = i;
attribToBackup.attribValueRaw = str.slice(
attribToBackup.attribValueStartsAt,
i
);
attribToBackup.attribClosingQuoteAt = i;
attribToBackup.attribEnd = i + 1;
token = clone(parentTokenToBackup);
token.attribs.push(attribToBackup);
attribToBackup = undefined;
parentTokenToBackup = undefined;
layers.pop();
layers.pop();
layers.pop();
}
if (
!doNothing &&
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -19,18 +19,18 @@ function matchLayerLast(wholeEspTagLump, layers, matchFirstInstead) {
? layers[0]
: layers[layers.length - 1];
console.log(
`023 matchLayer(): ${`\u001b[${33}m${`whichLayerToMatch`}\u001b[${39}m`} = ${JSON.stringify(
whichLayerToMatch,
null,
4
)}`
);
// console.log(
// `023 matchLayer(): ${`\u001b[${33}m${`whichLayerToMatch`}\u001b[${39}m`} = ${JSON.stringify(
// whichLayerToMatch,
// null,
// 4
// )}`
// );
if (whichLayerToMatch.type !== "esp") {
// we aim to match ESP tag layers, so instantly it's falsey result
// because layer we match against is not ESP tag layer
console.log(`033 matchLayer(): early return undefined`);
// console.log(`033 matchLayer(): early return undefined`);
return;
}
......@@ -43,15 +43,15 @@ function matchLayerLast(wholeEspTagLump, layers, matchFirstInstead) {
whichLayerToMatch.guessedClosingLump.includes(char)
)
) {
console.log(
`047 matchLayer(): ${`\u001b[${32}m${`RETURN`}\u001b[${39}m`} ${
wholeEspTagLump.length
}`
);
// console.log(
// `047 matchLayer(): ${`\u001b[${32}m${`RETURN`}\u001b[${39}m`} ${
// wholeEspTagLump.length
// }`
// );
return wholeEspTagLump.length;
}
console.log(`054 matchLayer(): finally, return undefined`);
// console.log(`054 matchLayer(): finally, return undefined`);
}
export default matchLayerLast;
......@@ -280,22 +280,200 @@ tap.test(
}
);
tap.todo(
tap.test(
`05 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - tails missing completely - attr end follows`,
(t) => {
const gathered = [];
ct(`<a b="{% x">`, {
const value = `<tr class="{% x">`;
ct(value, {
tagCb: (obj) => {
gathered.push(obj);
},
});
t.match(gathered, [], "05");
t.same(
gathered,
[
{
type: "tag",
start: 0,
end: value.length,
value,
tagNameStartsAt: 1,
tagNameEndsAt: 3,
tagName: "tr",
recognised: true,
closing: false,
void: false,
pureHTML: false,
kind: null,
attribs: [
{
attribName: "class",
attribNameRecognised: true,
attribNameStartsAt: 4,
attribNameEndsAt: 9,
attribOpeningQuoteAt: 10,
attribClosingQuoteAt: 15,
attribValueRaw: "{% x",
attribValue: [
{
type: "esp",
start: 11,
end: 15,
value: "{% x",
head: "{%",
headStartsAt: 11,
headEndsAt: 13,
tail: null,
tailStartsAt: null,
tailEndsAt: null,
},
],
attribValueStartsAt: 11,
attribValueEndsAt: 15,
attribStart: 4,
attribEnd: 16,
},
],
},
],
"05"
);
t.end();
}
);
tap.test(
`06 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - tails missing completely - attr end follows`,
(t) => {
const gathered = [];
ct(`<tr class="{% x"><td style="z"></td></tr>`, {
tagCb: (obj) => {
gathered.push(obj);
},
});
t.same(
gathered,
[
{
type: "tag",
start: 0,
end: 17,
value: '<tr class="{% x">',
tagNameStartsAt: 1,
tagNameEndsAt: 3,
tagName: "tr",
recognised: true,
closing: false,
void: false,
pureHTML: false,
kind: null,
attribs: [
{
attribName: "class",
attribNameRecognised: true,
attribNameStartsAt: 4,
attribNameEndsAt: 9,
attribOpeningQuoteAt: 10,
attribClosingQuoteAt: 15,
attribValueRaw: "{% x",
attribValue: [
{
type: "esp",
start: 11,
end: 15,
value: "{% x",
head: "{%",
headStartsAt: 11,
headEndsAt: 13,
tail: null,
tailStartsAt: null,
tailEndsAt: null,
},
],
attribValueStartsAt: 11,
attribValueEndsAt: 15,
attribStart: 4,
attribEnd: 16,
},
],
},
{
type: "tag",
start: 17,
end: 31,
value: '<td style="z">',
tagNameStartsAt: 18,
tagNameEndsAt: 20,
tagName: "td",
recognised: true,
closing: false,
void: false,
pureHTML: true,
kind: null,
attribs: [
{
attribName: "style",
attribNameRecognised: true,
attribNameStartsAt: 21,
attribNameEndsAt: 26,
attribOpeningQuoteAt: 27,
attribClosingQuoteAt: 29,
attribValueRaw: "z",
attribValue: [
{
type: "text",
start: 28,
end: 29,
value: "z",
},
],
attribValueStartsAt: 28,
attribValueEndsAt: 29,
attribStart: 21,
attribEnd: 30,
},
],
},
{
type: "tag",
start: 31,
end: 36,
value: "</td>",
tagNameStartsAt: 33,
tagNameEndsAt: 35,
tagName: "td",
recognised: true,
closing: true,
void: false,
pureHTML: true,
kind: null,
attribs: [],
},
{
type: "tag",
start: 36,
end: 41,
value: "</tr>",
tagNameStartsAt: 38,
tagNameEndsAt: 40,
tagName: "tr",
recognised: true,
closing: true,
void: false,
pureHTML: true,
kind: null,
attribs: [],
},
],
"06"
);
t.end();
}
);
tap.todo(
`06 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - tails missing completely - attr end follows + another tag`,
`07 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - tails missing completely - attr end follows + another tag`,
(t) => {
const gathered = [];
ct(`<a b="{% x"><c d="y %}">`, {
......@@ -303,13 +481,13 @@ tap.todo(
gathered.push(obj);
},
});
t.match(gathered, [], "06");
t.match(gathered, [], "07");
t.end();
}
);
tap.todo(
`07 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - heads missing character`,
`08 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - heads missing character`,
(t) => {
const gathered = [];
ct(`<a b="{ x %}1{% y %}2">`, {
......@@ -317,13 +495,13 @@ tap.todo(
gathered.push(obj);
},
});
t.match(gathered, [], "07");
t.match(gathered, [], "08");
t.end();
}
);
tap.todo(
`08 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - heads missing completely`,
`09 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - heads missing completely`,
(t) => {
const gathered = [];
ct(`<a b="x %}1{% y %}2">`, {
......@@ -331,13 +509,13 @@ tap.todo(
gathered.push(obj);
},
});
t.match(gathered, [], "08");
t.match(gathered, [], "09");
t.end();
}
);
tap.todo(
`09 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - Venn`,
`10 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - Venn`,
(t) => {
const gathered = [];
ct(`<a b="{% x"><b c="y %}">`, {
......@@ -345,13 +523,13 @@ tap.todo(
gathered.push(obj);
},
});
t.match(gathered, [], "09");
t.match(gathered, [], "10");
t.end();
}
);
tap.todo(
`10 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - two heads, one tail only`,
`11 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - two heads, one tail only`,
(t) => {
const gathered = [];
ct(`<a b="{% {% %}">`, {
......@@ -359,13 +537,13 @@ tap.todo(
gathered.push(obj);
},
});
t.match(gathered, [], "10");
t.match(gathered, [], "11");
t.end();
}
);
tap.todo(
`11 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - two tails`,
`12 - ${`\u001b[${35}m${`broken ESP tags`}\u001b[${39}m`} - two tails`,
(t) => {
const gathered = [];
ct(`<a b="%} %}">`, {
......@@ -373,7 +551,7 @@ tap.todo(
gathered.push(obj);
},
});
t.match(gathered, [], "11");
t.match(gathered, [], "12");
t.end();
}
);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment