Commit 7a2d264f authored by David Sveningsson's avatar David Sveningsson
Browse files

feat(rules): new rule `no-utf8-bom` disallowing usage of UTF-8 BOM

parent 8b1a6bc3
Pipeline #255884669 passed with stages
in 11 minutes and 23 seconds
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`docs/rules/no-utf8-bom.md inline validation: correct 1`] = `Array []`;
exports[`docs/rules/no-utf8-bom.md inline validation: incorrect 1`] = `Array []`;
import HtmlValidate from "../../../src/htmlvalidate";
const markup: { [key: string]: string } = {};
markup["incorrect"] = `<!DOCTYPE html>`;
markup["correct"] = `<!DOCTYPE html>`;
describe("docs/rules/no-utf8-bom.md", () => {
it("inline validation: incorrect", () => {
expect.assertions(1);
const htmlvalidate = new HtmlValidate({"rules":{"no-utf8-bom":"error"}});
const report = htmlvalidate.validateString(markup["incorrect"]);
expect(report.results).toMatchSnapshot();
});
it("inline validation: correct", () => {
expect.assertions(1);
const htmlvalidate = new HtmlValidate({"rules":{"no-utf8-bom":"error"}});
const report = htmlvalidate.validateString(markup["correct"]);
expect(report.results).toMatchSnapshot();
});
});
---
docType: rule
name: no-utf8-bom
category: document
summary: Disallow documents from having UTF-8 BOM
---
# Disallow documents from having UTF-8 BOM (`no-utf8-bom`)
A unicode byte order mark (BOM) is needed in UTF-16 and UTF-32 to determine endiannes but for UTF-8 it has no meaning.
Browsers are required to handle the BOM under all circumstances but tooling might not handle it properly and is therefor best left out.
Instead the document should be served with the `Content-Type: application/javascript; charset=utf-8` header and/or the `<meta charset="utf-8">` meta-tag.
......@@ -40,6 +40,7 @@ const config: ConfigData = {
"no-redundant-role": "error",
"no-self-closing": "error",
"no-trailing-whitespace": "error",
"no-utf8-bom": "error",
"prefer-button": "error",
"prefer-native-element": "error",
"prefer-tbody": "error",
......
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`rule no-utf8-bom should contain documentation 1`] = `
Object {
"description": "This file is saved with the UTF-8 byte order mark (BOM) present. It is neither required or recommended to use.
Instead the document should be served with the \`Content-Type: application/javascript; charset=utf-8\` header.",
"url": "https://html-validate.org/rules/no-utf8-bom.html",
}
`;
......@@ -46,6 +46,7 @@ import NoSelfClosing from "./no-self-closing";
import NoStyleTag from "./no-style-tag";
import NoTrailingWhitespace from "./no-trailing-whitespace";
import NoUnknownElements from "./no-unknown-elements";
import NoUtf8Bom from "./no-utf8-bom";
import PreferButton from "./prefer-button";
import PreferNativeElement from "./prefer-native-element";
import PreferTbody from "./prefer-tbody";
......@@ -108,6 +109,7 @@ const bundledRules: Record<string, RuleConstructor<any, any>> = {
"no-style-tag": NoStyleTag,
"no-trailing-whitespace": NoTrailingWhitespace,
"no-unknown-elements": NoUnknownElements,
"no-utf8-bom": NoUtf8Bom,
"prefer-button": PreferButton,
"prefer-native-element": PreferNativeElement,
"prefer-tbody": PreferTbody,
......
import HtmlValidate from "../htmlvalidate";
import "../matchers";
describe("rule no-utf8-bom", () => {
let htmlvalidate: HtmlValidate;
beforeAll(() => {
htmlvalidate = new HtmlValidate({
rules: { "no-utf8-bom": ["error"] },
});
});
it("should not report when no BOM is included", () => {
expect.assertions(1);
const markup = "<p>lorem ipsum</p>";
const report = htmlvalidate.validateString(markup);
expect(report).toBeValid();
});
it("should not report when BOM is present elsewhere", () => {
expect.assertions(1);
const markup = "<p>lorem\uFEFFipsum</p>";
const report = htmlvalidate.validateString(markup);
expect(report).toBeValid();
});
it("should report error file starts with UTF-8 BOM", () => {
expect.assertions(2);
const markup = "\uFEFF<p>lorem ipsum</p>";
const report = htmlvalidate.validateString(markup);
expect(report).toBeInvalid();
expect(report).toHaveError("no-utf8-bom", "File should be saved without UTF-8 BOM");
});
it("should contain documentation", () => {
expect.assertions(1);
expect(htmlvalidate.getRuleDocumentation("no-utf8-bom")).toMatchSnapshot();
});
});
import { TokenEvent } from "../event";
import { TokenType } from "../lexer";
import { Rule, RuleDocumentation, ruleDocumentationUrl } from "../rule";
export default class NoUtf8Bom extends Rule {
public documentation(): RuleDocumentation {
return {
description: `This file is saved with the UTF-8 byte order mark (BOM) present. It is neither required or recommended to use.\n\nInstead the document should be served with the \`Content-Type: application/javascript; charset=utf-8\` header.`,
url: ruleDocumentationUrl(__filename),
};
}
public setup(): void {
const unregister = this.on("token", (event: TokenEvent) => {
if (event.type === TokenType.UNICODE_BOM) {
this.report(null, "File should be saved without UTF-8 BOM", event.location);
}
/* since the BOM must be the very first thing the rule can now be disabled for the rest of the run */
this.setEnabled(false);
unregister();
});
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment