Skip to content

Parallel processing [note]

Hello, just sharing a note. We use html-validate in CI and it is the main use of time in our CI.

Our html-validate also does external links caching which uses cURL. You can see that here https://github.com/fulldecent/github-pages-template

So we want to run it in parallel for each processor. Here is how that runs:

Main file

// build-html-validate.mjs
import { Worker } from "worker_threads";
import { glob } from "glob";
import os from "os";

// Path to the worker file
const workerPath = new URL("./build-html-validate-worker.mjs", import.meta.url);

// Find and sort all HTML files in the 'build' directory
const targetQueue = glob.sync("build/**/*.html").sort();

if (targetQueue.length === 0) {
  console.log("No HTML files found for validation.");
  process.exit(0);
}

// Number of CPU cores
const cpuCount = os.cpus().length;
const maxWorkers = Math.min(cpuCount, targetQueue.length);

let allTestsPassed = true;

// Function to create and initialize a worker
function createWorker() {
  const worker = new Worker(workerPath, { type: "module" });

  worker.on("message", (message) => {
    if (message.success) {
      const { target, result } = message;
      if (result.valid) {
        console.log(`✅ ${target}`);
      } else {
        console.log(`❌ ${target}`);
        console.log(result.results);
        allTestsPassed = false;
      }
    } else {
      console.error(`Error validating ${message.target}: ${message.error}`);
      allTestsPassed = false;
    }

    // Assign the next task to the worker
    assignTask(worker);
  });

  worker.on("error", (error) => {
    console.error("Worker error:", error);
    allTestsPassed = false;
  });

  worker.on("exit", (code) => {
    if (code !== 0) {
      console.error(`Worker stopped with exit code ${code}`);
      allTestsPassed = false;
    }

    // Remove the worker from the pool
    const index = workers.indexOf(worker);
    if (index !== -1) workers.splice(index, 1);

    // If all workers have exited, finalize
    if (workers.length === 0) {
      finalize();
    }
  });

  workers.push(worker);
  return worker;
}

// Function to assign a task to a worker
function assignTask(worker) {
  if (targetQueue.length > 0) {
    const target = targetQueue.shift();
    worker.postMessage(target);
  } else {
    // No more tasks; send exit signal to worker
    worker.postMessage("exit");
  }
}

console.log("🧪 Validating HTML files 🧪");

const workers = [];

// Create workers and assign initial tasks
for (let i = 0; i < maxWorkers; i++) {
  const worker = createWorker();
  assignTask(worker);
}

function finalize() {
  if (allTestsPassed) {
    console.log("✨ Passed ✨");
  } else {
    console.log("❌ Some HTML files failed validation.");
    process.exit(1);
  }
}

Worker mjs

// build-html-validate-worker.mjs

import { parentPort } from "worker_threads";
import { HtmlValidate, formatterFactory } from "html-validate";
import plugin from "./plugin.html-validate.mjs";

// Global error handlers
process.on("uncaughtException", (err) => {
  console.error("Uncaught exception in worker:", err);
  process.exit(1);
});

process.on("unhandledRejection", (reason, promise) => {
  console.error("Unhandled rejection in worker:", reason);
  process.exit(1);
});

// Initialize HtmlValidate instance
const htmlValidate = new HtmlValidate({
  extends: ["html-validate:prettier"],
  plugins: [plugin],
  rules: {
    "mailto-awesome": "error",
    "external-links": "error",
    "no-jquery": "error",
    "canonical-link": "error",
    "latest-packages": "error",
    "https-links": "error",
    "internal-links": "error",
  },
});

const formatter = formatterFactory("stylish");

// Function to validate a single HTML file
async function buildHtmlValidate(target) {
  const report = await htmlValidate.validateFile(target);
  return {
    valid: report.valid,
    results: formatter(report.results),
  };
}

// Listen for tasks from the main thread
parentPort.on("message", async (message) => {
  if (message === "exit") {
    // Exit signal received
    process.exit(0);
  }

  const target = message;
  try {
    const result = await buildHtmlValidate(target);
    parentPort.postMessage({ success: true, target, result });
  } catch (error) {
    parentPort.postMessage({ success: false, target, error: error.message });
  }
});

Anybody is welcome to steal this for yourself.

I'm not sure if a future version of html-validate will do some of this by itself with the main bin/ file. But just sharing what we have now in case it could help anyone.