Upload of project source code files.
This commit is contained in:
16
data/config.ts
Normal file
16
data/config.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
export const token =
|
||||
"MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
|
||||
export const channels_to_fetch = [
|
||||
"1206451476147478599",
|
||||
"1206451493771677716",
|
||||
"1278448478409855019",
|
||||
"1207109205966852137",
|
||||
"1258637884211204227",
|
||||
"1206515542643712090",
|
||||
"1259641201208856648",
|
||||
"1215979442099454103",
|
||||
"1206451550155968574",
|
||||
"1206451560087818272",
|
||||
"1206515354583568405",
|
||||
"1258874570606383225",
|
||||
];
|
||||
15
dist/data/config.js
vendored
Normal file
15
dist/data/config.js
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
export const token = "MTMwNDc4MzM0MzQ5MzMxNjY2OQ.GE12WJ.7biOE8UPFGqoxU2L4xp6s0RWMmhyLgWod1vqME";
|
||||
export const channels_to_fetch = [
|
||||
"1206451476147478599",
|
||||
"1206451493771677716",
|
||||
"1278448478409855019",
|
||||
"1207109205966852137",
|
||||
"1258637884211204227",
|
||||
"1206515542643712090",
|
||||
"1259641201208856648",
|
||||
"1215979442099454103",
|
||||
"1206451550155968574",
|
||||
"1206451560087818272",
|
||||
"1206515354583568405",
|
||||
"1258874570606383225",
|
||||
];
|
||||
4578
dist/data/scraped.json
vendored
Normal file
4578
dist/data/scraped.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
46
dist/src/downloader.js
vendored
Normal file
46
dist/src/downloader.js
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import { Worker } from "worker_threads";
|
||||
import { fileURLToPath } from "url";
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const createWorker = (urlsChunk, workerId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const worker = new Worker(path.join(__dirname, "worker.js"), {
|
||||
workerData: { urlsChunk, workerId },
|
||||
});
|
||||
worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
|
||||
worker.on("error", reject);
|
||||
worker.on("exit", (code) => {
|
||||
if (code !== 0) {
|
||||
reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
|
||||
}
|
||||
else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
export const downloader = async () => {
|
||||
try {
|
||||
const linksPath = path.join(__dirname, "..", "data", "scraped.json");
|
||||
const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
|
||||
const numWorkers = 4;
|
||||
const chunkSize = Math.ceil(links.length / numWorkers);
|
||||
const workers = [];
|
||||
for (let i = 0; i < numWorkers; i++) {
|
||||
const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
|
||||
workers.push(createWorker(chunk, i));
|
||||
}
|
||||
await Promise.all(workers);
|
||||
console.log("Scraped files successfully.");
|
||||
}
|
||||
catch (error) {
|
||||
if (error instanceof Error) {
|
||||
console.error(`Error: ${error.message}.`);
|
||||
}
|
||||
else {
|
||||
console.error(`Unknown error.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
60
dist/src/index.js
vendored
Normal file
60
dist/src/index.js
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
import fetch from "node-fetch";
|
||||
import fs from "fs-extra";
|
||||
import { token, channels_to_fetch } from "../data/config.js";
|
||||
import { isMainThread } from "worker_threads";
|
||||
import { downloader } from "./downloader.js";
|
||||
const createDataFolder = () => {
|
||||
if (!fs.existsSync("../data")) {
|
||||
fs.mkdirSync("../data");
|
||||
}
|
||||
};
|
||||
const request = async (channel_id, before) => {
|
||||
const options = {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: token,
|
||||
Accept: "application/json",
|
||||
},
|
||||
};
|
||||
const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${before ? `&before=${before}` : ""}`;
|
||||
const response = await fetch(url, options);
|
||||
return (await response.json());
|
||||
};
|
||||
const getAllMessages = async (channel_id) => {
|
||||
let page = await request(channel_id);
|
||||
let result = page;
|
||||
while (page.length >= 100) {
|
||||
page = await request(channel_id, page[page.length - 1].id);
|
||||
result = result.concat(page);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
const getAllAttachments = async (channel_id) => {
|
||||
const messages = await getAllMessages(channel_id);
|
||||
const attachments = messages
|
||||
.map((msg) => msg.attachments)
|
||||
.flat()
|
||||
.map((attachment) => attachment.proxy_url);
|
||||
return attachments.filter((attachment) => attachment);
|
||||
};
|
||||
const saveToFile = (fileName, data) => {
|
||||
fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
|
||||
};
|
||||
const main = async () => {
|
||||
console.log("Scraping links...");
|
||||
let allAttachments = [];
|
||||
for (const channel_id of channels_to_fetch) {
|
||||
const channelAttachments = await getAllAttachments(channel_id);
|
||||
allAttachments = allAttachments.concat(channelAttachments);
|
||||
console.log(`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`);
|
||||
}
|
||||
const uniqueAttachments = [...new Set(allAttachments)];
|
||||
createDataFolder();
|
||||
saveToFile("scraped.json", uniqueAttachments);
|
||||
console.log(`Scraped and saved ${uniqueAttachments.length} attachment links.`);
|
||||
console.log("Scraping files...");
|
||||
if (isMainThread) {
|
||||
downloader();
|
||||
}
|
||||
};
|
||||
main();
|
||||
38
dist/src/worker.js
vendored
Normal file
38
dist/src/worker.js
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
import { workerData, parentPort } from "worker_threads";
|
||||
import fetch from "node-fetch";
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { randomBytes } from "crypto";
|
||||
const generateRandomFilename = () => randomBytes(8).toString("hex");
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const imgsDir = path.join(__dirname, "..", "..", "files");
|
||||
fs.ensureDirSync(imgsDir);
|
||||
const downloadFile = async (url) => {
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok)
|
||||
throw new Error(`Failed to fetch ${url}.`);
|
||||
const buffer = await response.buffer();
|
||||
const fileName = path.basename(url.split("?")[0]);
|
||||
const filePath = path.join(imgsDir, `${generateRandomFilename()}-${fileName}`);
|
||||
await fs.outputFile(filePath, buffer);
|
||||
parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
|
||||
}
|
||||
catch (error) {
|
||||
if (error instanceof Error) {
|
||||
parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
|
||||
}
|
||||
else {
|
||||
parentPort?.postMessage(`Unknown error scraping ${url}.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
const main = async () => {
|
||||
const { urlsChunk } = workerData;
|
||||
for (const url of urlsChunk) {
|
||||
await downloadFile(url);
|
||||
}
|
||||
};
|
||||
main();
|
||||
213
package-lock.json
generated
Normal file
213
package-lock.json
generated
Normal file
@@ -0,0 +1,213 @@
|
||||
{
|
||||
"name": "scraper",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "scraper",
|
||||
"version": "1.0.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"crypto": "^1.0.1",
|
||||
"fs-extra": "^11.2.0",
|
||||
"node-fetch": "^3.3.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/fs-extra": "^11.0.4",
|
||||
"@types/node": "^22.5.4",
|
||||
"typescript": "^5.5.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/fs-extra": {
|
||||
"version": "11.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.4.tgz",
|
||||
"integrity": "sha512-yTbItCNreRooED33qjunPthRcSjERP1r4MqCZc7wv0u2sUkzTFp45tgUfS5+r7FrZPdmCCNflLhVSP/o+SemsQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/jsonfile": "*",
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/jsonfile": {
|
||||
"version": "6.1.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.4.tgz",
|
||||
"integrity": "sha512-D5qGUYwjvnNNextdU59/+fI+spnwtTFmyQP0h+PfIOSkNfpU6AOICUOkm4i0OnSk+NyjdPJrxCDro0sJsWlRpQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "22.9.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.9.0.tgz",
|
||||
"integrity": "sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.19.8"
|
||||
}
|
||||
},
|
||||
"node_modules/crypto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/crypto/-/crypto-1.0.1.tgz",
|
||||
"integrity": "sha512-VxBKmeNcqQdiUQUW2Tzq0t377b54N2bMtXO/qiLa+6eRRmmC4qT3D4OnTGoT/U6O9aklQ/jTwbOtRMTTY8G0Ig==",
|
||||
"deprecated": "This package is no longer supported. It's now a built-in Node module. If you've depended on crypto, you should switch to the one that's built-in.",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/data-uri-to-buffer": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
|
||||
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 12"
|
||||
}
|
||||
},
|
||||
"node_modules/fetch-blob": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
|
||||
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/jimmywarting"
|
||||
},
|
||||
{
|
||||
"type": "paypal",
|
||||
"url": "https://paypal.me/jimmywarting"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-domexception": "^1.0.0",
|
||||
"web-streams-polyfill": "^3.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^12.20 || >= 14.13"
|
||||
}
|
||||
},
|
||||
"node_modules/formdata-polyfill": {
|
||||
"version": "4.0.10",
|
||||
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
|
||||
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fetch-blob": "^3.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.20.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fs-extra": {
|
||||
"version": "11.2.0",
|
||||
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz",
|
||||
"integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"graceful-fs": "^4.2.0",
|
||||
"jsonfile": "^6.0.1",
|
||||
"universalify": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.14"
|
||||
}
|
||||
},
|
||||
"node_modules/graceful-fs": {
|
||||
"version": "4.2.11",
|
||||
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
|
||||
"integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/jsonfile": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
|
||||
"integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"universalify": "^2.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"graceful-fs": "^4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/node-domexception": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
|
||||
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/jimmywarting"
|
||||
},
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://paypal.me/jimmywarting"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "3.3.2",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
|
||||
"integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"data-uri-to-buffer": "^4.0.0",
|
||||
"fetch-blob": "^3.1.4",
|
||||
"formdata-polyfill": "^4.0.10"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/node-fetch"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.6.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz",
|
||||
"integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.19.8",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
|
||||
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/universalify": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
|
||||
"integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/web-streams-polyfill": {
|
||||
"version": "3.3.3",
|
||||
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
|
||||
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 8"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
24
package.json
Normal file
24
package.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"name": "scraper",
|
||||
"version": "1.0.0",
|
||||
"main": "dist\\src\\index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1",
|
||||
"start": "tsc && cd dist/src && node index.js"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "syscallwzrd [https://github.com/syscallwizard]",
|
||||
"license": "MIT",
|
||||
"description": "Discord attachment scraping utility.",
|
||||
"dependencies": {
|
||||
"crypto": "^1.0.1",
|
||||
"fs-extra": "^11.2.0",
|
||||
"node-fetch": "^3.3.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/fs-extra": "^11.0.4",
|
||||
"@types/node": "^22.5.4",
|
||||
"typescript": "^5.5.4"
|
||||
}
|
||||
}
|
||||
49
src/downloader.ts
Normal file
49
src/downloader.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import { Worker } from "worker_threads";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
const createWorker = (urlsChunk: string[], workerId: number) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const worker = new Worker(path.join(__dirname, "worker.js"), {
|
||||
workerData: { urlsChunk, workerId },
|
||||
});
|
||||
worker.on("message", (msg) => console.log(`Worker ${workerId}: ${msg}.`));
|
||||
worker.on("error", reject);
|
||||
worker.on("exit", (code) => {
|
||||
if (code !== 0) {
|
||||
reject(new Error(`Worker ${workerId} stopped with exit code ${code}.`));
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const downloader = async () => {
|
||||
try {
|
||||
const linksPath = path.join(__dirname, "..", "data", "scraped.json");
|
||||
const links = JSON.parse(await fs.readFile(linksPath, "utf-8"));
|
||||
|
||||
const numWorkers = 4;
|
||||
const chunkSize = Math.ceil(links.length / numWorkers);
|
||||
const workers = [];
|
||||
|
||||
for (let i = 0; i < numWorkers; i++) {
|
||||
const chunk = links.slice(i * chunkSize, (i + 1) * chunkSize);
|
||||
workers.push(createWorker(chunk, i));
|
||||
}
|
||||
|
||||
await Promise.all(workers);
|
||||
console.log("Scraped files successfully.");
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
console.error(`Error: ${error.message}.`);
|
||||
} else {
|
||||
console.error(`Unknown error.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
94
src/index.ts
Normal file
94
src/index.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import fetch from "node-fetch";
|
||||
import fs from "fs-extra";
|
||||
import { token, channels_to_fetch } from "../data/config.js";
|
||||
import { isMainThread } from "worker_threads";
|
||||
import { downloader } from "./downloader.js";
|
||||
|
||||
type Attachment = {
|
||||
id: string;
|
||||
proxy_url: string;
|
||||
};
|
||||
|
||||
type DiscordMessage = {
|
||||
id: string;
|
||||
attachments: Attachment[];
|
||||
};
|
||||
|
||||
const createDataFolder = () => {
|
||||
if (!fs.existsSync("../data")) {
|
||||
fs.mkdirSync("../data");
|
||||
}
|
||||
};
|
||||
|
||||
const request = async (channel_id: string, before?: string) => {
|
||||
const options = {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: token,
|
||||
Accept: "application/json",
|
||||
},
|
||||
};
|
||||
|
||||
const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${
|
||||
before ? `&before=${before}` : ""
|
||||
}`;
|
||||
const response = await fetch(url, options);
|
||||
return (await response.json()) as DiscordMessage[];
|
||||
};
|
||||
|
||||
const getAllMessages = async (channel_id: string) => {
|
||||
let page: DiscordMessage[] = await request(channel_id);
|
||||
let result: DiscordMessage[] = page;
|
||||
|
||||
while (page.length >= 100) {
|
||||
page = await request(channel_id, page[page.length - 1].id);
|
||||
result = result.concat(page);
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
const getAllAttachments = async (channel_id: string) => {
|
||||
const messages = await getAllMessages(channel_id);
|
||||
const attachments = messages
|
||||
.map((msg) => msg.attachments)
|
||||
.flat()
|
||||
.map((attachment) => attachment.proxy_url);
|
||||
|
||||
return attachments.filter((attachment: string) => attachment);
|
||||
};
|
||||
|
||||
const saveToFile = (fileName: string, data: any) => {
|
||||
fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
|
||||
};
|
||||
|
||||
const main = async () => {
|
||||
console.log("Scraping links...");
|
||||
|
||||
let allAttachments: string[] = [];
|
||||
|
||||
for (const channel_id of channels_to_fetch) {
|
||||
const channelAttachments = await getAllAttachments(channel_id);
|
||||
allAttachments = allAttachments.concat(channelAttachments);
|
||||
console.log(
|
||||
`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`
|
||||
);
|
||||
}
|
||||
|
||||
const uniqueAttachments = [...new Set(allAttachments)];
|
||||
|
||||
createDataFolder();
|
||||
saveToFile("scraped.json", uniqueAttachments);
|
||||
|
||||
console.log(
|
||||
`Scraped and saved ${uniqueAttachments.length} attachment links.`
|
||||
);
|
||||
|
||||
console.log("Scraping files...");
|
||||
|
||||
if (isMainThread) {
|
||||
downloader();
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
||||
47
src/worker.ts
Normal file
47
src/worker.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { workerData, parentPort } from "worker_threads";
|
||||
import fetch from "node-fetch";
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { randomBytes } from "crypto";
|
||||
|
||||
const generateRandomFilename = () => randomBytes(8).toString("hex");
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
const imgsDir = path.join(__dirname, "..", "..", "files");
|
||||
fs.ensureDirSync(imgsDir);
|
||||
|
||||
const downloadFile = async (url: string) => {
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error(`Failed to fetch ${url}.`);
|
||||
const buffer = await response.buffer();
|
||||
const fileName = path.basename(url.split("?")[0]);
|
||||
const filePath = path.join(
|
||||
imgsDir,
|
||||
`${generateRandomFilename()}-${fileName}`
|
||||
);
|
||||
await fs.outputFile(filePath, buffer);
|
||||
parentPort?.postMessage(`Scraped ${url} to ${filePath}.`);
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
parentPort?.postMessage(`Error scraping ${url}: ${error.message}.`);
|
||||
} else {
|
||||
parentPort?.postMessage(`Unknown error scraping ${url}.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const main = async () => {
|
||||
const { urlsChunk } = workerData as {
|
||||
urlsChunk: string[];
|
||||
};
|
||||
|
||||
for (const url of urlsChunk) {
|
||||
await downloadFile(url);
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
||||
13
tsconfig.json
Normal file
13
tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"module": "ES2020",
|
||||
"moduleResolution": "node",
|
||||
"outDir": "./dist",
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
Reference in New Issue
Block a user