Parallel processing [note]
Hello, just sharing a note. We use html-validate in CI and it is the main use of time in our CI.
Our html-validate also does external links caching which uses cURL. You can see that here https://github.com/fulldecent/github-pages-template
So we want to run it in parallel for each processor. Here is how that runs:
Main file
// build-html-validate.mjs
import { Worker } from "worker_threads";
import { glob } from "glob";
import os from "os";
// Path to the worker file
const workerPath = new URL("./build-html-validate-worker.mjs", import.meta.url);
// Find and sort all HTML files in the 'build' directory
const targetQueue = glob.sync("build/**/*.html").sort();
if (targetQueue.length === 0) {
console.log("No HTML files found for validation.");
process.exit(0);
}
// Number of CPU cores
const cpuCount = os.cpus().length;
const maxWorkers = Math.min(cpuCount, targetQueue.length);
let allTestsPassed = true;
// Function to create and initialize a worker
function createWorker() {
const worker = new Worker(workerPath, { type: "module" });
worker.on("message", (message) => {
if (message.success) {
const { target, result } = message;
if (result.valid) {
console.log(`✅ ${target}`);
} else {
console.log(`❌ ${target}`);
console.log(result.results);
allTestsPassed = false;
}
} else {
console.error(`Error validating ${message.target}: ${message.error}`);
allTestsPassed = false;
}
// Assign the next task to the worker
assignTask(worker);
});
worker.on("error", (error) => {
console.error("Worker error:", error);
allTestsPassed = false;
});
worker.on("exit", (code) => {
if (code !== 0) {
console.error(`Worker stopped with exit code ${code}`);
allTestsPassed = false;
}
// Remove the worker from the pool
const index = workers.indexOf(worker);
if (index !== -1) workers.splice(index, 1);
// If all workers have exited, finalize
if (workers.length === 0) {
finalize();
}
});
workers.push(worker);
return worker;
}
// Function to assign a task to a worker
function assignTask(worker) {
if (targetQueue.length > 0) {
const target = targetQueue.shift();
worker.postMessage(target);
} else {
// No more tasks; send exit signal to worker
worker.postMessage("exit");
}
}
console.log("🧪 Validating HTML files 🧪");
const workers = [];
// Create workers and assign initial tasks
for (let i = 0; i < maxWorkers; i++) {
const worker = createWorker();
assignTask(worker);
}
function finalize() {
if (allTestsPassed) {
console.log("✨ Passed ✨");
} else {
console.log("❌ Some HTML files failed validation.");
process.exit(1);
}
}
Worker mjs
// build-html-validate-worker.mjs
import { parentPort } from "worker_threads";
import { HtmlValidate, formatterFactory } from "html-validate";
import plugin from "./plugin.html-validate.mjs";
// Global error handlers
process.on("uncaughtException", (err) => {
console.error("Uncaught exception in worker:", err);
process.exit(1);
});
process.on("unhandledRejection", (reason, promise) => {
console.error("Unhandled rejection in worker:", reason);
process.exit(1);
});
// Initialize HtmlValidate instance
const htmlValidate = new HtmlValidate({
extends: ["html-validate:prettier"],
plugins: [plugin],
rules: {
"mailto-awesome": "error",
"external-links": "error",
"no-jquery": "error",
"canonical-link": "error",
"latest-packages": "error",
"https-links": "error",
"internal-links": "error",
},
});
const formatter = formatterFactory("stylish");
// Function to validate a single HTML file
async function buildHtmlValidate(target) {
const report = await htmlValidate.validateFile(target);
return {
valid: report.valid,
results: formatter(report.results),
};
}
// Listen for tasks from the main thread
parentPort.on("message", async (message) => {
if (message === "exit") {
// Exit signal received
process.exit(0);
}
const target = message;
try {
const result = await buildHtmlValidate(target);
parentPort.postMessage({ success: true, target, result });
} catch (error) {
parentPort.postMessage({ success: false, target, error: error.message });
}
});
Anybody is welcome to steal this for yourself.
I'm not sure if a future version of html-validate will do some of this by itself with the main bin/ file. But just sharing what we have now in case it could help anyone.