95 lines
2.3 KiB
TypeScript
95 lines
2.3 KiB
TypeScript
import fetch from "node-fetch";
|
|
import fs from "fs-extra";
|
|
import { token, channels_to_fetch } from "../data/config.js";
|
|
import { isMainThread } from "worker_threads";
|
|
import { downloader } from "./downloader.js";
|
|
|
|
type Attachment = {
|
|
id: string;
|
|
proxy_url: string;
|
|
};
|
|
|
|
type DiscordMessage = {
|
|
id: string;
|
|
attachments: Attachment[];
|
|
};
|
|
|
|
const createDataFolder = () => {
|
|
if (!fs.existsSync("../data")) {
|
|
fs.mkdirSync("../data");
|
|
}
|
|
};
|
|
|
|
const request = async (channel_id: string, before?: string) => {
|
|
const options = {
|
|
method: "GET",
|
|
headers: {
|
|
Authorization: token,
|
|
Accept: "application/json",
|
|
},
|
|
};
|
|
|
|
const url = `https://discord.com/api/channels/${channel_id}/messages?limit=100${
|
|
before ? `&before=${before}` : ""
|
|
}`;
|
|
const response = await fetch(url, options);
|
|
return (await response.json()) as DiscordMessage[];
|
|
};
|
|
|
|
const getAllMessages = async (channel_id: string) => {
|
|
let page: DiscordMessage[] = await request(channel_id);
|
|
let result: DiscordMessage[] = page;
|
|
|
|
while (page.length >= 100) {
|
|
page = await request(channel_id, page[page.length - 1].id);
|
|
result = result.concat(page);
|
|
}
|
|
|
|
return result;
|
|
};
|
|
|
|
const getAllAttachments = async (channel_id: string) => {
|
|
const messages = await getAllMessages(channel_id);
|
|
const attachments = messages
|
|
.map((msg) => msg.attachments)
|
|
.flat()
|
|
.map((attachment) => attachment.proxy_url);
|
|
|
|
return attachments.filter((attachment: string) => attachment);
|
|
};
|
|
|
|
const saveToFile = (fileName: string, data: any) => {
|
|
fs.writeFileSync(`../data/${fileName}`, JSON.stringify(data, null, 2));
|
|
};
|
|
|
|
const main = async () => {
|
|
console.log("Scraping links...");
|
|
|
|
let allAttachments: string[] = [];
|
|
|
|
for (const channel_id of channels_to_fetch) {
|
|
const channelAttachments = await getAllAttachments(channel_id);
|
|
allAttachments = allAttachments.concat(channelAttachments);
|
|
console.log(
|
|
`Scraped ${channelAttachments.length} attachment links from channel ${channel_id}.`
|
|
);
|
|
}
|
|
|
|
const uniqueAttachments = [...new Set(allAttachments)];
|
|
|
|
createDataFolder();
|
|
saveToFile("scraped.json", uniqueAttachments);
|
|
|
|
console.log(
|
|
`Scraped and saved ${uniqueAttachments.length} attachment links.`
|
|
);
|
|
|
|
console.log("Scraping files...");
|
|
|
|
if (isMainThread) {
|
|
downloader();
|
|
}
|
|
};
|
|
|
|
main();
|