Commit 21174667 authored by Roy Revelt's avatar Roy Revelt

feat: improvements to esp tag recognition + some rebasing around esp tag extraction

parent 543214ab
......@@ -35,7 +35,7 @@ Copyright (c) 2015-2020 Roy Revelt and other contributors
[gitlab-img]: https://img.shields.io/badge/repo-on%20GitLab-brightgreen.svg?style=flat-square
[gitlab-url]: https://gitlab.com/codsen/codsen/tree/master/packages/codsen-tokenizer
[cov-img]: https://img.shields.io/badge/coverage-92%25-brightgreen.svg?style=flat-square
[cov-img]: https://img.shields.io/badge/coverage-92.1%25-brightgreen.svg?style=flat-square
[cov-url]: https://gitlab.com/codsen/codsen/tree/master/packages/codsen-tokenizer
[deps2d-img]: https://img.shields.io/badge/deps%20in%202D-see_here-08f0fd.svg?style=flat-square
[deps2d-url]: http://npm.anvaka.com/#/view/2d/codsen-tokenizer
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import { espChars } from "./util";
function getWholeEspTagLumpOnTheRight(str, i, layers) {
let wholeEspTagLumpOnTheRight = str[i];
const len = str.length;
console.log(
`008 getWholeEspTagLumpOnTheRight(): ${`\u001b[${32}m${`START`}\u001b[${39}m`}`
);
for (let y = i + 1; y < len; y++) {
console.log(
`013 getWholeEspTagLumpOnTheRight(): ${`\u001b[${36}m${`str[${y}]=${str[y]}`}\u001b[${39}m`}`
);
if (
// consider:
// ${(y/4)?int}
// ^
// we're here - is this opening bracket part of heads?!?
// if lump already is two chars long
wholeEspTagLumpOnTheRight.length > 1 &&
// contains one of opening-polarity characters
(wholeEspTagLumpOnTheRight.includes(`{`) ||
wholeEspTagLumpOnTheRight.includes(`[`) ||
wholeEspTagLumpOnTheRight.includes(`(`)) &&
// bail if it's a bracket
str[y] === "("
) {
console.log(
`031 getWholeEspTagLumpOnTheRight(): ${`\u001b[${31}m${`BREAK`}\u001b[${39}m`}`
);
break;
}
if (
espChars.includes(str[y]) ||
(str[i] === "<" && str[y] === "/") ||
// accept closing bracket if it's RPL comment, tails of: <#-- z -->
(str[y] === ">" &&
wholeEspTagLumpOnTheRight === "--" &&
Array.isArray(layers) &&
layers.length &&
layers[layers.length - 1].type === "esp" &&
layers[layers.length - 1].openingLump[0] === "<" &&
layers[layers.length - 1].openingLump[2] === "-" &&
layers[layers.length - 1].openingLump[3] === "-")
) {
wholeEspTagLumpOnTheRight += str[y];
} else {
console.log(`051 ${`\u001b[${31}m${`BREAK`}\u001b[${39}m`}`);
break;
}
}
// if lump is tails+heads, report the length of tails only:
// {%- a -%}{%- b -%}
// ^
// we're talking about this lump of tails and heads
if (
wholeEspTagLumpOnTheRight &&
Array.isArray(layers) &&
layers.length &&
layers[layers.length - 1].type === "esp" &&
layers[layers.length - 1].guessedClosingLump &&
wholeEspTagLumpOnTheRight.length >
layers[layers.length - 1].guessedClosingLump.length
) {
//
// case I.
//
if (
wholeEspTagLumpOnTheRight.endsWith(layers[layers.length - 1].openingLump)
) {
// no need to extract tails, heads "{%-" were confirmed in example:
// {%- a -%}{%- b -%}
// ^
// here
// return string, extracted ESP tails
return wholeEspTagLumpOnTheRight.slice(
0,
wholeEspTagLumpOnTheRight.length -
layers[layers.length - 1].openingLump.length
);
}
// ELSE
// imagine a case like:
// {%- aa %}{% bb %}
// opening heads were {%-, flipped were -%}. Now when we take lump %}{%
// and match, the dash will be missing.
// What we're going to do is we'll split the lump where last matched
// continuous chunk ends (%} in example above) with condition that
// at least one character from ESP-list follows, which is not part of
// guessed closing lump.
let uniqueCharsListFromGuessedClosingLumpArr = new Set(
layers[layers.length - 1].guessedClosingLump
);
let found = 0;
for (let y = 0, len2 = wholeEspTagLumpOnTheRight.length; y < len2; y++) {
if (
!uniqueCharsListFromGuessedClosingLumpArr.has(
wholeEspTagLumpOnTheRight[y]
) &&
found > 1
) {
return wholeEspTagLumpOnTheRight.slice(0, y);
}
if (
uniqueCharsListFromGuessedClosingLumpArr.has(
wholeEspTagLumpOnTheRight[y]
)
) {
found += 1;
uniqueCharsListFromGuessedClosingLumpArr = new Set(
[...uniqueCharsListFromGuessedClosingLumpArr].filter(
(el) => el !== wholeEspTagLumpOnTheRight[y]
)
);
}
}
}
console.log(`130 getWholeEspTagLumpOnTheRight(): final return`);
return wholeEspTagLumpOnTheRight;
}
export default getWholeEspTagLumpOnTheRight;
import matchLayerLast from "./matchLayerLast";
function matchLayerFirst(str2, i, layers) {
return matchLayerLast(str2, i, layers, true);
}
export default matchLayerFirst;
import { flipEspTag, espChars } from "./util";
// RETURNS: bool false or integer, length of a matched ESP lump.
function matchLayerLast(str, i, layers, matchFirstInstead) {
// RETURNS: undefined or integer, length of a matched ESP lump.
function matchLayerLast(wholeEspTagLump, layers, matchFirstInstead) {
if (!layers.length) {
return false;
return;
}
const whichLayerToMatch = matchFirstInstead
? layers[0]
: layers[layers.length - 1];
console.log(
`012 matchLayerLast(): ${`\u001b[${33}m${`whichLayerToMatch`}\u001b[${39}m`} = ${JSON.stringify(
`011 matchLayer(): ${`\u001b[${33}m${`whichLayerToMatch`}\u001b[${39}m`} = ${JSON.stringify(
whichLayerToMatch,
null,
4
)}`
);
if (whichLayerToMatch.type === "simple") {
return (
!whichLayerToMatch.value || str[i] === flipEspTag(whichLayerToMatch.value)
);
}
if (whichLayerToMatch.type === "esp") {
console.log(`024 matchLayerLast(): matching esp tag`);
if (
!espChars.includes(str[i]) &&
!(
str[i] === ">" &&
Array.isArray(layers) &&
layers.length &&
layers[layers.length - 1].type === "esp" &&
layers[layers.length - 1].openingLump[0] === "<"
)
) {
console.log(
`036 matchLayerLast(): return false because ${str[i]} is not ESP char`
);
return false;
}
let wholeEspTagLump = "";
if (str[i] === ">") {
wholeEspTagLump = ">";
} else {
console.log(`045 matchLayerLast(): extracting esp lump`);
// so the first character is from ESP tags list
// 1. extract esp tag lump
console.log(
`049 matchLayerLast(): ${`\u001b[${36}m${`LOOP`}\u001b[${39}m`}`
);
for (let y = i; y < str.length; y++) {
console.log(
`053 ${`\u001b[${36}m${`---- str[${y}]=${str[y]}`}\u001b[${39}m`}`
);
if (espChars.includes(str[y])) {
wholeEspTagLump += str[y];
} else {
console.log(`058 ${`\u001b[${36}m${`---- BREAK`}\u001b[${39}m`}`);
break;
}
}
}
if (whichLayerToMatch.type !== "esp") {
// we aim to match ESP tag layers, so instantly it's falsey result
// because layer we match against is not ESP tag layer
console.log(`021 matchLayer(): early return undefined`);
return;
}
if (
// match every character from the last "layers" complex-type entry must be
// present in the extracted lump
Array.from(wholeEspTagLump).every((char) =>
whichLayerToMatch.guessedClosingLump.includes(char)
)
) {
console.log(
`065 matchLayerLast(): ${`\u001b[${33}m${`wholeEspTagLump`}\u001b[${39}m`} = ${JSON.stringify(
wholeEspTagLump,
null,
4
)}`
);
console.log(
`072 matchLayerLast(): ${`\u001b[${33}m${`whichLayerToMatch.openingLump`}\u001b[${39}m`} = ${JSON.stringify(
whichLayerToMatch.openingLump,
null,
4
)}`
`033 matchLayer(): ${`\u001b[${32}m${`RETURN`}\u001b[${39}m`} ${
wholeEspTagLump.length
}`
);
if (wholeEspTagLump.length === 1) {
console.log(
`081 matchLayerLast(): ${`\u001b[${32}m${`RETURN`}\u001b[${39}m`} 1`
);
return 1;
}
// if lump is tails+heads, report the length of tails only:
// {%- a -%}{%- b -%}
// ^
// we're talking about this lump of tails and heads
if (
wholeEspTagLump &&
whichLayerToMatch.openingLump &&
wholeEspTagLump.length > whichLayerToMatch.guessedClosingLump.length
) {
if (wholeEspTagLump.endsWith(whichLayerToMatch.openingLump)) {
// no need to extract tails, heads "{%-" were confirmed in example:
// {%- a -%}{%- b -%}
// ^
// here
console.log(
`101 matchLayerLast(): ${`\u001b[${32}m${`RETURN`}\u001b[${39}m`} ${
wholeEspTagLump.length - whichLayerToMatch.openingLump.length
}`
);
return wholeEspTagLump.length - whichLayerToMatch.openingLump.length;
}
// else {
// imagine case like:
// {%- aa %}{% bb %}
// opening heads were {%-, flipped were -%}. Now when we take lump %}{%
// and match, the dash will be missing.
// What we're going to do is we'll split the lump where last matched
// continuous chunk ends (%} in example above) with condition that
// at least one character from ESP-list follows, which is not part of
// guessed closing lump.
let uniqueCharsListFromGuessedClosingLumpArr = new Set(
whichLayerToMatch.guessedClosingLump
);
console.log(
`121 ${`\u001b[${33}m${`uniqueCharsListFromGuessedClosingLumpArr`}\u001b[${39}m`} = ${JSON.stringify(
uniqueCharsListFromGuessedClosingLumpArr,
null,
0
)}`
);
let found = 0;
for (let y = 0, len2 = wholeEspTagLump.length; y < len2; y++) {
console.log(`130 char = ${wholeEspTagLump[y]}`);
if (
!uniqueCharsListFromGuessedClosingLumpArr.has(wholeEspTagLump[y]) &&
found > 1
) {
console.log(`136 RETURN ${y}`);
return y;
}
if (uniqueCharsListFromGuessedClosingLumpArr.has(wholeEspTagLump[y])) {
found += 1;
uniqueCharsListFromGuessedClosingLumpArr = new Set(
[...uniqueCharsListFromGuessedClosingLumpArr].filter(
(el) => el !== wholeEspTagLump[y]
)
);
console.log(
`148 SET found = ${found}; uniqueCharsListFromGuessedClosingLumpArr = ${JSON.stringify(
uniqueCharsListFromGuessedClosingLumpArr,
null,
0
)}`
);
}
}
} else if (
// match every character from the last "layers" complex-type entry must be
// present in the extracted lump
whichLayerToMatch.guessedClosingLump
.split("")
.every((char) => wholeEspTagLump.includes(char))
) {
console.log(`163 RETURN ${wholeEspTagLump.length}`);
return wholeEspTagLump.length;
}
return wholeEspTagLump.length;
}
console.log(`040 matchLayer(): finally, return undefined`);
}
export default matchLayerLast;
......@@ -4,7 +4,7 @@ import { matchLeft, matchRight } from "string-match-left-right";
// starts. Previously it sat within if() clauses but became unwieldy and
// so we extracted into a function.
function startsComment(str, i, token) {
function startsComment(str, i, token, layers) {
// console.log(
// `R1: ${!!matchRight(str, i, ["!--"], {
// maxMismatches: 1,
......@@ -34,7 +34,7 @@ function startsComment(str, i, token) {
// );
return (
// the opening is deliberately loose, with one dash missing, "!-" instead of "!--"
((str[i] === "<" &&
(str[i] === "<" &&
(matchRight(str, i, ["!--"], {
maxMismatches: 1,
firstMustMatch: true, // <--- FUZZY MATCH, BUT EXCL. MARK IS OBLIGATORY
......@@ -51,17 +51,24 @@ function startsComment(str, i, token) {
trimBeforeMatching: true,
}) &&
(token.type !== "comment" || token.kind !== "not")) ||
(str[i] === "-" &&
matchRight(str, i, ["->"], {
trimBeforeMatching: true,
}) &&
(token.type !== "comment" ||
(!token.closing && token.kind !== "not")) &&
!matchLeft(str, i, "<", {
trimBeforeMatching: true,
trimCharsBeforeMatching: ["-", "!"],
}))) &&
(token.type !== "esp" || !token.tail || token.tail.includes(str[i]))
(str[i] === "-" &&
matchRight(str, i, ["->"], {
trimBeforeMatching: true,
}) &&
(token.type !== "comment" || (!token.closing && token.kind !== "not")) &&
!matchLeft(str, i, "<", {
trimBeforeMatching: true,
trimCharsBeforeMatching: ["-", "!"],
}) &&
// insurance against ESP tag, RPL comments: <#-- z -->
(!Array.isArray(layers) ||
!layers.length ||
layers[layers.length - 1].type !== "esp" ||
!(
layers[layers.length - 1].openingLump[0] === "<" &&
layers[layers.length - 1].openingLump[2] === "-" &&
layers[layers.length - 1].openingLump[3] === "-"
)))
);
}
......
......@@ -8,8 +8,6 @@ import { flipEspTag, espChars, xBeforeYOnTheRight } from "./util";
function startsEsp(str, i, token, layers, styleStarts) {
console.log(
`010 startsEsp(): RETURNS ${
// 1. two consecutive esp characters - Liquid, Mailchimp etc.
// {{ or |* and so on
(espChars.includes(str[i]) &&
str[i + 1] &&
espChars.includes(str[i + 1]) &&
......@@ -18,43 +16,34 @@ function startsEsp(str, i, token, layers, styleStarts) {
!(str[i] === "-" && "-{(".includes(str[i + 1])) &&
!("})".includes(str[i]) && "-".includes(str[i + 1])) &&
!(
// insurance against repeated percentages
(
str[i] === "%" &&
"0123456789".includes(str[left(str, i)]) &&
(!str[i + 2] ||
[`"`, `'`, ";"].includes(str[i + 2]) ||
!str[i + 2].trim().length)
)
str[i] === "%" &&
"0123456789".includes(str[left(str, i)]) &&
(!str[i + 2] ||
[`"`, `'`, ";"].includes(str[i + 2]) ||
!str[i + 2].trim().length)
) &&
!(
styleStarts &&
("{}".includes(str[i]) || "{}".includes(str[right(str, i)]))
)) ||
//
// 2. html-like syntax - Responsys RPL and similar
// <#if z> or </#if> and so on
// normal opening tag
(str[i] === "<" &&
// and
// either it's closing tag and what follows is ESP-char
((str[i + 1] === "/" && espChars.includes(str[i + 2])) ||
// or
// it's not closing and esp char follows right away
espChars.includes(str[i + 1]))) ||
//
// 3. single character tails, for example RPL's closing curlies: ${zzz}
// it's specifically a closing-kind character
(espChars.includes(str[i + 1]) && !["-"].includes(str[i + 1])))) ||
(`>})`.includes(str[i]) &&
// heads include the opposite of it
Array.isArray(layers) &&
layers.length &&
layers[layers.length - 1].type === "esp" &&
layers[layers.length - 1].openingLump.includes(flipEspTag(str[i])) &&
// insurance against "greater than", as in:
// <#if product.weight > 100>
(str[i] !== ">" || !xBeforeYOnTheRight(str, i + 1, ">", "<")))
(str[i] !== ">" || !xBeforeYOnTheRight(str, i + 1, ">", "<"))) ||
(str[i] === "-" &&
str[i + 1] === "-" &&
str[i + 2] === ">" &&
Array.isArray(layers) &&
layers.length &&
layers[layers.length - 1].type === "esp" &&
layers[layers.length - 1].openingLump[0] === "<" &&
layers[layers.length - 1].openingLump[2] === "-" &&
layers[layers.length - 1].openingLump[3] === "-")
}`
);
return (
......@@ -106,7 +95,19 @@ function startsEsp(str, i, token, layers, styleStarts) {
layers[layers.length - 1].openingLump.includes(flipEspTag(str[i])) &&
// insurance against "greater than", as in:
// <#if product.weight > 100>
(str[i] !== ">" || !xBeforeYOnTheRight(str, i + 1, ">", "<")))
(str[i] !== ">" || !xBeforeYOnTheRight(str, i + 1, ">", "<"))) ||
//
// 4. comment closing in RPL-like templating languages, for example:
// <#-- z -->
(str[i] === "-" &&
str[i + 1] === "-" &&
str[i + 2] === ">" &&
Array.isArray(layers) &&
layers.length &&
layers[layers.length - 1].type === "esp" &&
layers[layers.length - 1].openingLump[0] === "<" &&
layers[layers.length - 1].openingLump[2] === "-" &&
layers[layers.length - 1].openingLump[3] === "-")
);
}
......
......@@ -142,7 +142,7 @@ tap.test(
);
tap.test(
`05 - ${`\u001b[${33}m${`no overlap`}\u001b[${39}m`} - Responsys-style ESP tag`,
`05 - ${`\u001b[${33}m${`no overlap`}\u001b[${39}m`} - dollar + round brackets`,
(t) => {
const gathered = [];
ct(`<a>$(something)<b>`, {
......@@ -157,6 +157,7 @@ tap.test(
type: "tag",
start: 0,
end: 3,
value: "<a>",
},
{
type: "esp",
......@@ -164,11 +165,13 @@ tap.test(
end: 15,
head: "$(",
tail: ")",
value: "$(something)",
},
{
type: "tag",
start: 15,
end: 18,
value: "<b>",
},
],
"05.01"
......
......@@ -397,7 +397,7 @@ tap.test(
`06 - ${`\u001b[${35}m${`ESP tags within attr values`}\u001b[${39}m`} - otherwise a sensitive characters inside ESP tag`,
(t) => {
const gathered = [];
ct(`<a>{% if a<b and c>d '"'''' ><>< %}<b>`, {
ct(`<a>{% if a<b and c>d '"' ><>< %}<b>`, {
tagCb: (obj) => {
gathered.push(obj);
},
......@@ -413,13 +413,14 @@ tap.test(
{
type: "esp",
start: 3,
end: 35,
end: 32,
head: "{%",
tail: "%}",
},
{
type: "tag",
start: 35,
end: 38,
start: 32,
end: 35,
},
],
"06.01"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment