Commit ca8895f1 authored by gustawdaniel's avatar gustawdaniel
Browse files

Parallel scraping of profile pages

parent 79624db3
import fs from "fs";
import axios from 'axios';
import {getConfig} from "./helpers";
import {getConfig, Output} from "./helpers";
const Reset = "\x1b[0m"
const FgRed = "\x1b[31m"
......@@ -8,18 +8,31 @@ const FgGreen = "\x1b[32m"
const init = new Date().getTime();
let last = new Date().getTime();
let queueLength = 0;
const MAX_QUEUE_LENGTH = 500;
const append = async (links: string[]) => {
queueLength++;
const link: string = links.pop() || '';
const name = link.split('/').reverse()[0];
const {data, status} = await axios.get(link);
fs.writeFileSync(process.cwd() + `/raw/${name}.html`, data);
const now = new Date().getTime();
console.log(status === 200 ? `${FgGreen}%s\t%s\t%s\t%s\t%s\t%s${Reset}` : `${FgRed}%s\t%s\t%s\t%s\t%s\t%s${Reset}`,
status, links.length, queueLength, now - last, now - init, name
);
last = new Date().getTime();
}
const sleep = (time: number) => new Promise((resolve) => setTimeout(resolve, time))
const main = async () => {
const links = getConfig().map((a:{link:string}):string => a.link);
const links = getConfig().map((a: { link: string }): string => a.link);
while (links.length) {
const link = links.pop();
const name = link.split('/').reverse()[0];
const {data, status} = await axios.get(link);
fs.writeFileSync(process.cwd() + `/raw/${name}.html`, data);
const now = new Date().getTime();
console.log(status === 200 ? `${FgGreen}%s\t%s\t%s\t%s\t%s${Reset}` : `${FgRed}%s\t%s\t%s\t%s\t%s${Reset}`, status, links.length, now - last, now - init, name);
last = new Date().getTime();
await sleep(9);
if (queueLength < MAX_QUEUE_LENGTH)
append(links).finally(() => queueLength--)
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment