Upload of project source code files.
This commit is contained in:
15
dist/data/config.js
vendored
Normal file
15
dist/data/config.js
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
export const token = "MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
|
||||
export const channels_to_fetch = [
|
||||
"1206451476147478599",
|
||||
"1206451493771677716",
|
||||
"1278448478409855019",
|
||||
"1207109205966852137",
|
||||
"1258637884211204227",
|
||||
"1206515542643712090",
|
||||
"1259641201208856648",
|
||||
"1215979442099454103",
|
||||
"1206451550155968574",
|
||||
"1206451560087818272",
|
||||
"1206515354583568405",
|
||||
"1258874570606383225",
|
||||
];
|
||||
4578
dist/data/scraped.json
vendored
Normal file
4578
dist/data/scraped.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
46
dist/src/downloader.js
vendored
Normal file
46
dist/src/downloader.js
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import { Worker } from "worker_threads";
|
||||
import { fileURLToPath } from "url";
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const createWorker = (urlsChunk, workerId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const worker = new Worker(path.join(__dirname, "worker.js"), {
|
||||
workerData: { urlsChunk, workerId },
|
||||
});
|
||||
worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
|
||||
worker.on("error", reject);
|
||||
worker.on("exit", (code) => {
|
||||
if (code !== 0) {
|
||||
reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
|
||||
}
|
||||
else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
export const downloader = async () => {
|
||||
try {
|
||||
const linksPath = path.join(__dirname, "..", "data", "scraped.json");
|
||||
const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
|
||||
const numWorkers = 4;
|
||||
const chunkSize = Math.ceil(links.length / numWorkers);
|
||||
const workers = [];
|
||||
for (let i = 0; i < numWorkers; i++) {
|
||||
const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
|
||||
workers.push(createWorker(chunk, i));
|
||||
}
|
||||
await Promise.all(workers);
|
||||
console.log("Scraped files successfully.");
|
||||
}
|
||||
catch (error) {
|
||||
if (error instanceof Error) {
|
||||
console.error(`Error: ${error.message}.`);
|
||||
}
|
||||
else {
|
||||
console.error(`Unknown error.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
60
dist/src/index.js
vendored
Normal file
60
dist/src/index.js
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
import fetch from "node-fetch";
|
||||
import fs from "fs-extra";
|
||||
import { token, channels_to_fetch } from "../data/config.js";
|
||||
import { isMainThread } from "worker_threads";
|
||||
import { downloader } from "./downloader.js";
|
||||
const createDataFolder = () => {
|
||||
if (!fs.existsSync("../data")) {
|
||||
fs.mkdirSync("../data");
|
||||
}
|
||||
};
|
||||
const request = async (channel_id, before) => {
|
||||
const options = {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: token,
|
||||
Accept: "application/json",
|
||||
},
|
||||
};
|
||||
const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${before ? `&before=${before}` : ""}`;
|
||||
const response = await fetch(url, options);
|
||||
return (await response.json());
|
||||
};
|
||||
const getAllMessages = async (channel_id) => {
|
||||
let page = await request(channel_id);
|
||||
let result = page;
|
||||
while (page.length >= 100) {
|
||||
page = await request(channel_id, page[page.length - 1].id);
|
||||
result = result.concat(page);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
const getAllAttachments = async (channel_id) => {
|
||||
const messages = await getAllMessages(channel_id);
|
||||
const attachments = messages
|
||||
.map((msg) => msg.attachments)
|
||||
.flat()
|
||||
.map((attachment) => attachment.proxy_url);
|
||||
return attachments.filter((attachment) => attachment);
|
||||
};
|
||||
const saveToFile = (fileName, data) => {
|
||||
fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
|
||||
};
|
||||
const main = async () => {
|
||||
console.log("Scraping links...");
|
||||
let allAttachments = [];
|
||||
for (const channel_id of channels_to_fetch) {
|
||||
const channelAttachments = await getAllAttachments(channel_id);
|
||||
allAttachments = allAttachments.concat(channelAttachments);
|
||||
console.log(`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`);
|
||||
}
|
||||
const uniqueAttachments = [...new Set(allAttachments)];
|
||||
createDataFolder();
|
||||
saveToFile("scraped.json", uniqueAttachments);
|
||||
console.log(`Scraped and saved ${uniqueAttachments.length} attachment links.`);
|
||||
console.log("Scraping files...");
|
||||
if (isMainThread) {
|
||||
downloader();
|
||||
}
|
||||
};
|
||||
main();
|
||||
38
dist/src/worker.js
vendored
Normal file
38
dist/src/worker.js
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
import { workerData, parentPort } from "worker_threads";
|
||||
import fetch from "node-fetch";
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { randomBytes } from "crypto";
|
||||
const generateRandomFilename = () => randomBytes(8).toString("hex");
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const imgsDir = path.join(__dirname, "..", "..", "files");
|
||||
fs.ensureDirSync(imgsDir);
|
||||
const downloadFile = async (url) => {
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok)
|
||||
throw new Error(`Failed to fetch ${url}.`);
|
||||
const buffer = await response.buffer();
|
||||
const fileName = path.basename(url.split("?")[0]);
|
||||
const filePath = path.join(imgsDir, `${generateRandomFilename()}-${fileName}`);
|
||||
await fs.outputFile(filePath, buffer);
|
||||
parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
|
||||
}
|
||||
catch (error) {
|
||||
if (error instanceof Error) {
|
||||
parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
|
||||
}
|
||||
else {
|
||||
parentPort?.postMessage(`Unknown error scraping ${url}.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
const main = async () => {
|
||||
const { urlsChunk } = workerData;
|
||||
for (const url of urlsChunk) {
|
||||
await downloadFile(url);
|
||||
}
|
||||
};
|
||||
main();
|
||||
Reference in New Issue
Block a user