Upload of project source code files.

2026-02-17 04:07:37 +01:00
parent b380f42356
commit 723f474228
12 changed files with 5193 additions and 0 deletions
--- a/data/config.ts
+++ b/data/config.ts
@@ -0,0 +1,16 @@
+export const token =
+  "MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
+export const channels_to_fetch = [
+  "1206451476147478599",
+  "1206451493771677716",
+  "1278448478409855019",
+  "1207109205966852137",
+  "1258637884211204227",
+  "1206515542643712090",
+  "1259641201208856648",
+  "1215979442099454103",
+  "1206451550155968574",
+  "1206451560087818272",
+  "1206515354583568405",
+  "1258874570606383225",
+];
--- a/dist/data/config.js
+++ b/dist/data/config.js
@@ -0,0 +1,15 @@
+export const token = "MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
+export const channels_to_fetch = [
+    "1206451476147478599",
+    "1206451493771677716",
+    "1278448478409855019",
+    "1207109205966852137",
+    "1258637884211204227",
+    "1206515542643712090",
+    "1259641201208856648",
+    "1215979442099454103",
+    "1206451550155968574",
+    "1206451560087818272",
+    "1206515354583568405",
+    "1258874570606383225",
+];
--- a/dist/data/scraped.json
+++ b/dist/data/scraped.json
--- a/dist/src/downloader.js
+++ b/dist/src/downloader.js
@@ -0,0 +1,46 @@
+import fs from "fs-extra";
+import path from "path";
+import { Worker } from "worker_threads";
+import { fileURLToPath } from "url";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const createWorker = (urlsChunk, workerId) => {
+    return new Promise((resolve, reject) => {
+        const worker = new Worker(path.join(__dirname, "worker.js"), {
+            workerData: { urlsChunk, workerId },
+        });
+        worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
+        worker.on("error", reject);
+        worker.on("exit", (code) => {
+            if (code !== 0) {
+                reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
+            }
+            else {
+                resolve();
+            }
+        });
+    });
+};
+export const downloader = async () => {
+    try {
+        const linksPath = path.join(__dirname, "..", "data", "scraped.json");
+        const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
+        const numWorkers = 4;
+        const chunkSize = Math.ceil(links.length / numWorkers);
+        const workers = [];
+        for (let i = 0; i < numWorkers; i++) {
+            const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
+            workers.push(createWorker(chunk, i));
+        }
+        await Promise.all(workers);
+        console.log("Scraped files successfully.");
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            console.error(`Error: ${error.message}.`);
+        }
+        else {
+            console.error(`Unknown error.`);
+        }
+    }
+};
--- a/dist/src/index.js
+++ b/dist/src/index.js
@@ -0,0 +1,60 @@
+import fetch from "node-fetch";
+import fs from "fs-extra";
+import { token, channels_to_fetch } from "../data/config.js";
+import { isMainThread } from "worker_threads";
+import { downloader } from "./downloader.js";
+const createDataFolder = () => {
+    if (!fs.existsSync("../data")) {
+        fs.mkdirSync("../data");
+    }
+};
+const request = async (channel_id, before) => {
+    const options = {
+        method: "GET",
+        headers: {
+            Authorization: token,
+            Accept: "application/json",
+        },
+    };
+    const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${before ? `&before=${before}` : ""}`;
+    const response = await fetch(url, options);
+    return (await response.json());
+};
+const getAllMessages = async (channel_id) => {
+    let page = await request(channel_id);
+    let result = page;
+    while (page.length >= 100) {
+        page = await request(channel_id, page[page.length - 1].id);
+        result = result.concat(page);
+    }
+    return result;
+};
+const getAllAttachments = async (channel_id) => {
+    const messages = await getAllMessages(channel_id);
+    const attachments = messages
+        .map((msg) => msg.attachments)
+        .flat()
+        .map((attachment) => attachment.proxy_url);
+    return attachments.filter((attachment) => attachment);
+};
+const saveToFile = (fileName, data) => {
+    fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
+};
+const main = async () => {
+    console.log("Scraping links...");
+    let allAttachments = [];
+    for (const channel_id of channels_to_fetch) {
+        const channelAttachments = await getAllAttachments(channel_id);
+        allAttachments = allAttachments.concat(channelAttachments);
+        console.log(`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`);
+    }
+    const uniqueAttachments = [...new Set(allAttachments)];
+    createDataFolder();
+    saveToFile("scraped.json", uniqueAttachments);
+    console.log(`Scraped and saved ${uniqueAttachments.length} attachment links.`);
+    console.log("Scraping files...");
+    if (isMainThread) {
+        downloader();
+    }
+};
+main();
--- a/dist/src/worker.js
+++ b/dist/src/worker.js
@@ -0,0 +1,38 @@
+import { workerData, parentPort } from "worker_threads";
+import fetch from "node-fetch";
+import fs from "fs-extra";
+import path from "path";
+import { fileURLToPath } from "url";
+import { randomBytes } from "crypto";
+const generateRandomFilename = () => randomBytes(8).toString("hex");
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const imgsDir = path.join(__dirname, "..", "..", "files");
+fs.ensureDirSync(imgsDir);
+const downloadFile = async (url) => {
+    try {
+        const response = await fetch(url);
+        if (!response.ok)
+            throw new Error(`Failed to fetch ${url}.`);
+        const buffer = await response.buffer();
+        const fileName = path.basename(url.split("?")[0]);
+        const filePath = path.join(imgsDir, `${generateRandomFilename()}-${fileName}`);
+        await fs.outputFile(filePath, buffer);
+        parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
+        }
+        else {
+            parentPort?.postMessage(`Unknown error scraping ${url}.`);
+        }
+    }
+};
+const main = async () => {
+    const { urlsChunk } = workerData;
+    for (const url of urlsChunk) {
+        await downloadFile(url);
+    }
+};
+main();
--- a/package-lock.json
+++ b/package-lock.json
@@ -0,0 +1,213 @@
+{
+  "name": "scraper",
+  "version": "1.0.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "scraper",
+      "version": "1.0.0",
+      "license": "MIT",
+      "dependencies": {
+        "crypto": "^1.0.1",
+        "fs-extra": "^11.2.0",
+        "node-fetch": "^3.3.2"
+      },
+      "devDependencies": {
+        "@types/fs-extra": "^11.0.4",
+        "@types/node": "^22.5.4",
+        "typescript": "^5.5.4"
+      }
+    },
+    "node_modules/@types/fs-extra": {
+      "version": "11.0.4",
+      "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.4.tgz",
+      "integrity": "sha512-yTbItCNreRooED33qjunPthRcSjERP1r4MqCZc7wv0u2sUkzTFp45tgUfS5+r7FrZPdmCCNflLhVSP/o+SemsQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/jsonfile": "*",
+        "@types/node": "*"
+      }
+    },
+    "node_modules/@types/jsonfile": {
+      "version": "6.1.4",
+      "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.4.tgz",
+      "integrity": "sha512-D5qGUYwjvnNNextdU59/+fI+spnwtTFmyQP0h+PfIOSkNfpU6AOICUOkm4i0OnSk+NyjdPJrxCDro0sJsWlRpQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "22.9.0",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.9.0.tgz",
+      "integrity": "sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.19.8"
+      }
+    },
+    "node_modules/crypto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/crypto/-/crypto-1.0.1.tgz",
+      "integrity": "sha512-VxBKmeNcqQdiUQUW2Tzq0t377b54N2bMtXO/qiLa+6eRRmmC4qT3D4OnTGoT/U6O9aklQ/jTwbOtRMTTY8G0Ig==",
+      "deprecated": "This package is no longer supported. It's now a built-in Node module. If you've depended on crypto, you should switch to the one that's built-in.",
+      "license": "ISC"
+    },
+    "node_modules/data-uri-to-buffer": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
+      "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 12"
+      }
+    },
+    "node_modules/fetch-blob": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
+      "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "paypal",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "node-domexception": "^1.0.0",
+        "web-streams-polyfill": "^3.0.3"
+      },
+      "engines": {
+        "node": "^12.20 || >= 14.13"
+      }
+    },
+    "node_modules/formdata-polyfill": {
+      "version": "4.0.10",
+      "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
+      "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
+      "license": "MIT",
+      "dependencies": {
+        "fetch-blob": "^3.1.2"
+      },
+      "engines": {
+        "node": ">=12.20.0"
+      }
+    },
+    "node_modules/fs-extra": {
+      "version": "11.2.0",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz",
+      "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==",
+      "license": "MIT",
+      "dependencies": {
+        "graceful-fs": "^4.2.0",
+        "jsonfile": "^6.0.1",
+        "universalify": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=14.14"
+      }
+    },
+    "node_modules/graceful-fs": {
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
+      "license": "ISC"
+    },
+    "node_modules/jsonfile": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
+      "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
+      "license": "MIT",
+      "dependencies": {
+        "universalify": "^2.0.0"
+      },
+      "optionalDependencies": {
+        "graceful-fs": "^4.1.6"
+      }
+    },
+    "node_modules/node-domexception": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
+      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "github",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.5.0"
+      }
+    },
+    "node_modules/node-fetch": {
+      "version": "3.3.2",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
+      "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
+      "license": "MIT",
+      "dependencies": {
+        "data-uri-to-buffer": "^4.0.0",
+        "fetch-blob": "^3.1.4",
+        "formdata-polyfill": "^4.0.10"
+      },
+      "engines": {
+        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/node-fetch"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.6.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz",
+      "integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "6.19.8",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
+      "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/universalify": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+      "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 10.0.0"
+      }
+    },
+    "node_modules/web-streams-polyfill": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
+      "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    }
+  }
+}
--- a/package.json
+++ b/package.json
@@ -0,0 +1,24 @@
+{
+  "name": "scraper",
+  "version": "1.0.0",
+  "main": "dist\\src\\index.js",
+  "type": "module",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "start": "tsc && cd dist/src && node index.js"
+  },
+  "keywords": [],
+  "author": "syscallwzrd [https://github.com/syscallwizard]",
+  "license": "MIT",
+  "description": "Discord attachment scraping utility.",
+  "dependencies": {
+    "crypto": "^1.0.1",
+    "fs-extra": "^11.2.0",
+    "node-fetch": "^3.3.2"
+  },
+  "devDependencies": {
+    "@types/fs-extra": "^11.0.4",
+    "@types/node": "^22.5.4",
+    "typescript": "^5.5.4"
+  }
+}
--- a/src/downloader.ts
+++ b/src/downloader.ts
@@ -0,0 +1,49 @@
+import fs from "fs-extra";
+import path from "path";
+import { Worker } from "worker_threads";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const createWorker = (urlsChunk: string[], workerId: number) => {
+  return new Promise<void>((resolve, reject) => {
+    const worker = new Worker(path.join(__dirname, "worker.js"), {
+      workerData: { urlsChunk, workerId },
+    });
+    worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
+    worker.on("error", reject);
+    worker.on("exit", (code) => {
+      if (code !== 0) {
+        reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
+      } else {
+        resolve();
+      }
+    });
+  });
+};
+
+export const downloader = async () => {
+  try {
+    const linksPath = path.join(__dirname, "..", "data", "scraped.json");
+    const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
+
+    const numWorkers = 4;
+    const chunkSize = Math.ceil(links.length / numWorkers);
+    const workers = [];
+
+    for (let i = 0; i < numWorkers; i++) {
+      const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
+      workers.push(createWorker(chunk, i));
+    }
+
+    await Promise.all(workers);
+    console.log("Scraped files successfully.");
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error(`Error: ${error.message}.`);
+    } else {
+      console.error(`Unknown error.`);
+    }
+  }
+};
--- a/src/index.ts
+++ b/src/index.ts
@@ -0,0 +1,94 @@
+import fetch from "node-fetch";
+import fs from "fs-extra";
+import { token, channels_to_fetch } from "../data/config.js";
+import { isMainThread } from "worker_threads";
+import { downloader } from "./downloader.js";
+
+type Attachment = {
+  id: string;
+  proxy_url: string;
+};
+
+type DiscordMessage = {
+  id: string;
+  attachments: Attachment[];
+};
+
+const createDataFolder = () => {
+  if (!fs.existsSync("../data")) {
+    fs.mkdirSync("../data");
+  }
+};
+
+const request = async (channel_id: string, before?: string) => {
+  const options = {
+    method: "GET",
+    headers: {
+      Authorization: token,
+      Accept: "application/json",
+    },
+  };
+
+  const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${
+    before ? `&before=${before}` : ""
+  }`;
+  const response = await fetch(url, options);
+  return (await response.json()) as DiscordMessage[];
+};
+
+const getAllMessages = async (channel_id: string) => {
+  let page: DiscordMessage[] = await request(channel_id);
+  let result: DiscordMessage[] = page;
+
+  while (page.length >= 100) {
+    page = await request(channel_id, page[page.length - 1].id);
+    result = result.concat(page);
+  }
+
+  return result;
+};
+
+const getAllAttachments = async (channel_id: string) => {
+  const messages = await getAllMessages(channel_id);
+  const attachments = messages
+    .map((msg) => msg.attachments)
+    .flat()
+    .map((attachment) => attachment.proxy_url);
+
+  return attachments.filter((attachment: string) => attachment);
+};
+
+const saveToFile = (fileName: string, data: any) => {
+  fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
+};
+
+const main = async () => {
+  console.log("Scraping links...");
+
+  let allAttachments: string[] = [];
+
+  for (const channel_id of channels_to_fetch) {
+    const channelAttachments = await getAllAttachments(channel_id);
+    allAttachments = allAttachments.concat(channelAttachments);
+    console.log(
+      `Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`
+    );
+  }
+
+  const uniqueAttachments = [...new Set(allAttachments)];
+
+  createDataFolder();
+  saveToFile("scraped.json", uniqueAttachments);
+
+  console.log(
+    `Scraped and saved ${uniqueAttachments.length} attachment links.`
+  );
+
+  console.log("Scraping files...");
+
+  if (isMainThread) {
+    downloader();
+  }
+};
+
+main();
--- a/src/worker.ts
+++ b/src/worker.ts
@@ -0,0 +1,47 @@
+import { workerData, parentPort } from "worker_threads";
+import fetch from "node-fetch";
+import fs from "fs-extra";
+import path from "path";
+import { fileURLToPath } from "url";
+import { randomBytes } from "crypto";
+
+const generateRandomFilename = () => randomBytes(8).toString("hex");
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const imgsDir = path.join(__dirname, "..", "..", "files");
+fs.ensureDirSync(imgsDir);
+
+const downloadFile = async (url: string) => {
+  try {
+    const response = await fetch(url);
+    if (!response.ok) throw new Error(`Failed to fetch ${url}.`);
+    const buffer = await response.buffer();
+    const fileName = path.basename(url.split("?")[0]);
+    const filePath = path.join(
+      imgsDir,
+      `${generateRandomFilename()}-${fileName}`
+    );
+    await fs.outputFile(filePath, buffer);
+    parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
+  } catch (error) {
+    if (error instanceof Error) {
+      parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
+    } else {
+      parentPort?.postMessage(`Unknown error scraping ${url}.`);
+    }
+  }
+};
+
+const main = async () => {
+  const { urlsChunk } = workerData as {
+    urlsChunk: string[];
+  };
+
+  for (const url of urlsChunk) {
+    await downloadFile(url);
+  }
+};
+
+main();
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -0,0 +1,13 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "ES2020",
+    "moduleResolution": "node",
+    "outDir": "./dist",
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "strict": true
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules"]
+}