Upload of project source code files.

This commit is contained in:
2026-02-17 04:07:37 +01:00
parent b380f42356
commit 723f474228
12 changed files with 5193 additions and 0 deletions

16
data/config.ts Normal file
View File

@@ -0,0 +1,16 @@
export const token =
"MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
export const channels_to_fetch = [
"1206451476147478599",
"1206451493771677716",
"1278448478409855019",
"1207109205966852137",
"1258637884211204227",
"1206515542643712090",
"1259641201208856648",
"1215979442099454103",
"1206451550155968574",
"1206451560087818272",
"1206515354583568405",
"1258874570606383225",
];

15
dist/data/config.js vendored Normal file
View File

@@ -0,0 +1,15 @@
export const token = "MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
export const channels_to_fetch = [
"1206451476147478599",
"1206451493771677716",
"1278448478409855019",
"1207109205966852137",
"1258637884211204227",
"1206515542643712090",
"1259641201208856648",
"1215979442099454103",
"1206451550155968574",
"1206451560087818272",
"1206515354583568405",
"1258874570606383225",
];

4578
dist/data/scraped.json vendored Normal file

File diff suppressed because it is too large Load Diff

46
dist/src/downloader.js vendored Normal file
View File

@@ -0,0 +1,46 @@
import fs from "fs-extra";
import path from "path";
import { Worker } from "worker_threads";
import { fileURLToPath } from "url";
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const createWorker = (urlsChunk, workerId) => {
return new Promise((resolve, reject) => {
const worker = new Worker(path.join(__dirname, "worker.js"), {
workerData: { urlsChunk, workerId },
});
worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
worker.on("error", reject);
worker.on("exit", (code) => {
if (code !== 0) {
reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
}
else {
resolve();
}
});
});
};
export const downloader = async () => {
try {
const linksPath = path.join(__dirname, "..", "data", "scraped.json");
const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
const numWorkers = 4;
const chunkSize = Math.ceil(links.length / numWorkers);
const workers = [];
for (let i = 0; i < numWorkers; i++) {
const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
workers.push(createWorker(chunk, i));
}
await Promise.all(workers);
console.log("Scraped files successfully.");
}
catch (error) {
if (error instanceof Error) {
console.error(`Error: ${error.message}.`);
}
else {
console.error(`Unknown error.`);
}
}
};

60
dist/src/index.js vendored Normal file
View File

@@ -0,0 +1,60 @@
import fetch from "node-fetch";
import fs from "fs-extra";
import { token, channels_to_fetch } from "../data/config.js";
import { isMainThread } from "worker_threads";
import { downloader } from "./downloader.js";
const createDataFolder = () => {
if (!fs.existsSync("../data")) {
fs.mkdirSync("../data");
}
};
const request = async (channel_id, before) => {
const options = {
method: "GET",
headers: {
Authorization: token,
Accept: "application/json",
},
};
const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${before ? `&before=${before}` : ""}`;
const response = await fetch(url, options);
return (await response.json());
};
const getAllMessages = async (channel_id) => {
let page = await request(channel_id);
let result = page;
while (page.length >= 100) {
page = await request(channel_id, page[page.length - 1].id);
result = result.concat(page);
}
return result;
};
const getAllAttachments = async (channel_id) => {
const messages = await getAllMessages(channel_id);
const attachments = messages
.map((msg) => msg.attachments)
.flat()
.map((attachment) => attachment.proxy_url);
return attachments.filter((attachment) => attachment);
};
const saveToFile = (fileName, data) => {
fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
};
const main = async () => {
console.log("Scraping links...");
let allAttachments = [];
for (const channel_id of channels_to_fetch) {
const channelAttachments = await getAllAttachments(channel_id);
allAttachments = allAttachments.concat(channelAttachments);
console.log(`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`);
}
const uniqueAttachments = [...new Set(allAttachments)];
createDataFolder();
saveToFile("scraped.json", uniqueAttachments);
console.log(`Scraped and saved ${uniqueAttachments.length} attachment links.`);
console.log("Scraping files...");
if (isMainThread) {
downloader();
}
};
main();

38
dist/src/worker.js vendored Normal file
View File

@@ -0,0 +1,38 @@
import { workerData, parentPort } from "worker_threads";
import fetch from "node-fetch";
import fs from "fs-extra";
import path from "path";
import { fileURLToPath } from "url";
import { randomBytes } from "crypto";
const generateRandomFilename = () => randomBytes(8).toString("hex");
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const imgsDir = path.join(__dirname, "..", "..", "files");
fs.ensureDirSync(imgsDir);
const downloadFile = async (url) => {
try {
const response = await fetch(url);
if (!response.ok)
throw new Error(`Failed to fetch ${url}.`);
const buffer = await response.buffer();
const fileName = path.basename(url.split("?")[0]);
const filePath = path.join(imgsDir, `${generateRandomFilename()}-${fileName}`);
await fs.outputFile(filePath, buffer);
parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
}
catch (error) {
if (error instanceof Error) {
parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
}
else {
parentPort?.postMessage(`Unknown error scraping ${url}.`);
}
}
};
const main = async () => {
const { urlsChunk } = workerData;
for (const url of urlsChunk) {
await downloadFile(url);
}
};
main();

213
package-lock.json generated Normal file
View File

@@ -0,0 +1,213 @@
{
"name": "scraper",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "scraper",
"version": "1.0.0",
"license": "MIT",
"dependencies": {
"crypto": "^1.0.1",
"fs-extra": "^11.2.0",
"node-fetch": "^3.3.2"
},
"devDependencies": {
"@types/fs-extra": "^11.0.4",
"@types/node": "^22.5.4",
"typescript": "^5.5.4"
}
},
"node_modules/@types/fs-extra": {
"version": "11.0.4",
"resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.4.tgz",
"integrity": "sha512-yTbItCNreRooED33qjunPthRcSjERP1r4MqCZc7wv0u2sUkzTFp45tgUfS5+r7FrZPdmCCNflLhVSP/o+SemsQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/jsonfile": "*",
"@types/node": "*"
}
},
"node_modules/@types/jsonfile": {
"version": "6.1.4",
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.4.tgz",
"integrity": "sha512-D5qGUYwjvnNNextdU59/+fI+spnwtTFmyQP0h+PfIOSkNfpU6AOICUOkm4i0OnSk+NyjdPJrxCDro0sJsWlRpQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/node": {
"version": "22.9.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.9.0.tgz",
"integrity": "sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~6.19.8"
}
},
"node_modules/crypto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/crypto/-/crypto-1.0.1.tgz",
"integrity": "sha512-VxBKmeNcqQdiUQUW2Tzq0t377b54N2bMtXO/qiLa+6eRRmmC4qT3D4OnTGoT/U6O9aklQ/jTwbOtRMTTY8G0Ig==",
"deprecated": "This package is no longer supported. It's now a built-in Node module. If you've depended on crypto, you should switch to the one that's built-in.",
"license": "ISC"
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
"license": "MIT",
"engines": {
"node": ">= 12"
}
},
"node_modules/fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "paypal",
"url": "https://paypal.me/jimmywarting"
}
],
"license": "MIT",
"dependencies": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
},
"engines": {
"node": "^12.20 || >= 14.13"
}
},
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"license": "MIT",
"dependencies": {
"fetch-blob": "^3.1.2"
},
"engines": {
"node": ">=12.20.0"
}
},
"node_modules/fs-extra": {
"version": "11.2.0",
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz",
"integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==",
"license": "MIT",
"dependencies": {
"graceful-fs": "^4.2.0",
"jsonfile": "^6.0.1",
"universalify": "^2.0.0"
},
"engines": {
"node": ">=14.14"
}
},
"node_modules/graceful-fs": {
"version": "4.2.11",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
"integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
"license": "ISC"
},
"node_modules/jsonfile": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
"integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
"license": "MIT",
"dependencies": {
"universalify": "^2.0.0"
},
"optionalDependencies": {
"graceful-fs": "^4.1.6"
}
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"license": "MIT",
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
"integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
"license": "MIT",
"dependencies": {
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/node-fetch"
}
},
"node_modules/typescript": {
"version": "5.6.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz",
"integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"dev": true,
"license": "MIT"
},
"node_modules/universalify": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
"integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
"license": "MIT",
"engines": {
"node": ">= 10.0.0"
}
},
"node_modules/web-streams-polyfill": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
"license": "MIT",
"engines": {
"node": ">= 8"
}
}
}
}

24
package.json Normal file
View File

@@ -0,0 +1,24 @@
{
"name": "scraper",
"version": "1.0.0",
"main": "dist\\src\\index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "tsc && cd dist/src && node index.js"
},
"keywords": [],
"author": "syscallwzrd [https://github.com/syscallwizard]",
"license": "MIT",
"description": "Discord attachment scraping utility.",
"dependencies": {
"crypto": "^1.0.1",
"fs-extra": "^11.2.0",
"node-fetch": "^3.3.2"
},
"devDependencies": {
"@types/fs-extra": "^11.0.4",
"@types/node": "^22.5.4",
"typescript": "^5.5.4"
}
}

49
src/downloader.ts Normal file
View File

@@ -0,0 +1,49 @@
import fs from "fs-extra";
import path from "path";
import { Worker } from "worker_threads";
import { fileURLToPath } from "url";
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const createWorker = (urlsChunk: string[], workerId: number) => {
return new Promise<void>((resolve, reject) => {
const worker = new Worker(path.join(__dirname, "worker.js"), {
workerData: { urlsChunk, workerId },
});
worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
worker.on("error", reject);
worker.on("exit", (code) => {
if (code !== 0) {
reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
} else {
resolve();
}
});
});
};
export const downloader = async () => {
try {
const linksPath = path.join(__dirname, "..", "data", "scraped.json");
const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
const numWorkers = 4;
const chunkSize = Math.ceil(links.length / numWorkers);
const workers = [];
for (let i = 0; i < numWorkers; i++) {
const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
workers.push(createWorker(chunk, i));
}
await Promise.all(workers);
console.log("Scraped files successfully.");
} catch (error) {
if (error instanceof Error) {
console.error(`Error: ${error.message}.`);
} else {
console.error(`Unknown error.`);
}
}
};

94
src/index.ts Normal file
View File

@@ -0,0 +1,94 @@
import fetch from "node-fetch";
import fs from "fs-extra";
import { token, channels_to_fetch } from "../data/config.js";
import { isMainThread } from "worker_threads";
import { downloader } from "./downloader.js";
type Attachment = {
id: string;
proxy_url: string;
};
type DiscordMessage = {
id: string;
attachments: Attachment[];
};
const createDataFolder = () => {
if (!fs.existsSync("../data")) {
fs.mkdirSync("../data");
}
};
const request = async (channel_id: string, before?: string) => {
const options = {
method: "GET",
headers: {
Authorization: token,
Accept: "application/json",
},
};
const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${
before ? `&before=${before}` : ""
}`;
const response = await fetch(url, options);
return (await response.json()) as DiscordMessage[];
};
const getAllMessages = async (channel_id: string) => {
let page: DiscordMessage[] = await request(channel_id);
let result: DiscordMessage[] = page;
while (page.length >= 100) {
page = await request(channel_id, page[page.length - 1].id);
result = result.concat(page);
}
return result;
};
const getAllAttachments = async (channel_id: string) => {
const messages = await getAllMessages(channel_id);
const attachments = messages
.map((msg) => msg.attachments)
.flat()
.map((attachment) => attachment.proxy_url);
return attachments.filter((attachment: string) => attachment);
};
const saveToFile = (fileName: string, data: any) => {
fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
};
const main = async () => {
console.log("Scraping links...");
let allAttachments: string[] = [];
for (const channel_id of channels_to_fetch) {
const channelAttachments = await getAllAttachments(channel_id);
allAttachments = allAttachments.concat(channelAttachments);
console.log(
`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`
);
}
const uniqueAttachments = [...new Set(allAttachments)];
createDataFolder();
saveToFile("scraped.json", uniqueAttachments);
console.log(
`Scraped and saved ${uniqueAttachments.length} attachment links.`
);
console.log("Scraping files...");
if (isMainThread) {
downloader();
}
};
main();

47
src/worker.ts Normal file
View File

@@ -0,0 +1,47 @@
import { workerData, parentPort } from "worker_threads";
import fetch from "node-fetch";
import fs from "fs-extra";
import path from "path";
import { fileURLToPath } from "url";
import { randomBytes } from "crypto";
const generateRandomFilename = () => randomBytes(8).toString("hex");
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const imgsDir = path.join(__dirname, "..", "..", "files");
fs.ensureDirSync(imgsDir);
const downloadFile = async (url: string) => {
try {
const response = await fetch(url);
if (!response.ok) throw new Error(`Failed to fetch ${url}.`);
const buffer = await response.buffer();
const fileName = path.basename(url.split("?")[0]);
const filePath = path.join(
imgsDir,
`${generateRandomFilename()}-${fileName}`
);
await fs.outputFile(filePath, buffer);
parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
} catch (error) {
if (error instanceof Error) {
parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
} else {
parentPort?.postMessage(`Unknown error scraping ${url}.`);
}
}
};
const main = async () => {
const { urlsChunk } = workerData as {
urlsChunk: string[];
};
for (const url of urlsChunk) {
await downloadFile(url);
}
};
main();

13
tsconfig.json Normal file
View File

@@ -0,0 +1,13 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "ES2020",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"skipLibCheck": true,
"strict": true
},
"include": ["src/**/*.ts"],
"exclude": ["node_modules"]
}