47 lines
1.6 KiB
JavaScript
47 lines
1.6 KiB
JavaScript
import fs from "fs-extra";
|
|
import path from "path";
|
|
import { Worker } from "worker_threads";
|
|
import { fileURLToPath } from "url";
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
const createWorker = (urlsChunk, workerId) => {
|
|
return new Promise((resolve, reject) => {
|
|
const worker = new Worker(path.join(__dirname, "worker.js"), {
|
|
workerData: { urlsChunk, workerId },
|
|
});
|
|
worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
|
|
worker.on("error", reject);
|
|
worker.on("exit", (code) => {
|
|
if (code !== 0) {
|
|
reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
|
|
}
|
|
else {
|
|
resolve();
|
|
}
|
|
});
|
|
});
|
|
};
|
|
export const downloader = async () => {
|
|
try {
|
|
const linksPath = path.join(__dirname, "..", "data", "scraped.json");
|
|
const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
|
|
const numWorkers = 4;
|
|
const chunkSize = Math.ceil(links.length / numWorkers);
|
|
const workers = [];
|
|
for (let i = 0; i < numWorkers; i++) {
|
|
const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
|
|
workers.push(createWorker(chunk, i));
|
|
}
|
|
await Promise.all(workers);
|
|
console.log("Scraped files successfully.");
|
|
}
|
|
catch (error) {
|
|
if (error instanceof Error) {
|
|
console.error(`Error: ${error.message}.`);
|
|
}
|
|
else {
|
|
console.error(`Unknown error.`);
|
|
}
|
|
}
|
|
};
|