starting message scrapeing

This commit is contained in:
Oliver Atkinson 2024-07-23 15:07:56 -06:00
parent 67ee89bad9
commit 87267fb708
4 changed files with 105 additions and 51 deletions

1
Cargo.lock generated
View File

@ -352,6 +352,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"dotenv", "dotenv",
"once_cell",
"poise", "poise",
"tokio", "tokio",
] ]

View File

@ -14,3 +14,4 @@ tokio = { version = "1.21.2", features = ["macros", "rt-multi-thread"] }
poise = { version = "0.6", features = ["cache"] } poise = { version = "0.6", features = ["cache"] }
dotenv = "0.15.0" dotenv = "0.15.0"
anyhow = "1.0.75" anyhow = "1.0.75"
once_cell = "1.19.0"

View File

@ -1,8 +1,8 @@
use std::fmt::Display; use std::{collections::HashMap, fmt::Display, sync::Arc};
use crate::Context; use crate::Context;
use anyhow::Error; use anyhow::Error;
use poise::serenity_prelude::{ChannelId, ChannelType, GuildChannel}; use poise::serenity_prelude::{Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message};
struct Server { struct Server {
channels: Vec<Channel>, channels: Vec<Channel>,
@ -12,7 +12,19 @@ struct Server {
struct Channel { struct Channel {
this: GuildChannel, this: GuildChannel,
children: Vec<Channel> children: Vec<Channel>,
messages: Vec<Message>,
}
impl Channel {
fn new(this: GuildChannel) -> Self {
Self {
this,
// Empty vecs don't allocate until a push
children: Vec::new(),
messages: Vec::new(),
}
}
} }
impl Display for Server { impl Display for Server {
@ -69,12 +81,12 @@ impl Server {
} }
// TODO this might be broken // TODO this might be broken
fn search<'a>(target: &'a mut Vec<Channel>, find: &ChannelId) -> Option<&'a mut Channel> { fn search_by_id<'a>(target: &'a mut Vec<Channel>, find: &ChannelId) -> Option<&'a mut Channel> {
for child in target { for child in target {
if child.this.id == *find { if child.this.id == *find {
return Some(child); return Some(child);
} }
match Self::search(&mut child.children, find) { match Self::search_by_id(&mut child.children, find) {
Some(x) => return Some(x), Some(x) => return Some(x),
None => {}, None => {},
} }
@ -87,9 +99,9 @@ impl Server {
if let Some(parent_id) = &insert.parent_id { if let Some(parent_id) = &insert.parent_id {
// find the parent (needs to go thru all nodes) // find the parent (needs to go thru all nodes)
match Self::search(&mut self.channels, &parent_id) { match Self::search_by_id(&mut self.channels, &parent_id) {
Some(parent_node) => { Some(parent_node) => {
parent_node.children.push(Channel { this: insert, children: Vec::new() }); parent_node.children.push(Channel::new(insert));
}, },
None => { None => {
// couldn't find parent, store somewhere else until it's parent is added... // couldn't find parent, store somewhere else until it's parent is added...
@ -98,18 +110,19 @@ impl Server {
}, },
} }
} else { } else {
self.channels.push(Channel { this: insert, children: Vec::new() }) self.channels.push(Channel::new(insert));
} }
} }
/// Cleans out the orphan channels, finding them parents. You'll want to use this before displaying anything.
fn clean(&mut self) { fn clean(&mut self) {
if !self.needs_clean {return;} if !self.needs_clean {return;}
// Look thru the orphanage and try to find parents // Look thru the orphanage and try to find parents
for orphan in &self.orphanage { for orphan in &self.orphanage {
if let Some(parent_id) = orphan.parent_id { if let Some(parent_id) = orphan.parent_id {
if let Some(found) = Self::search(&mut self.channels, &parent_id) { if let Some(found) = Self::search_by_id(&mut self.channels, &parent_id) {
found.children.push(Channel { this: orphan.clone(), children: Vec::new() }); found.children.push(Channel::new(orphan.clone()));
} else { } else {
panic!("⚠️ Couldn't find parent for orphan!"); panic!("⚠️ Couldn't find parent for orphan!");
} }
@ -120,50 +133,83 @@ impl Server {
self.orphanage.clear(); self.orphanage.clear();
self.needs_clean = false; self.needs_clean = false;
} }
/// Scrapes messages for all the channels in `self`.
async fn scrape_all(&mut self) {
let settings = GetMessages::default().limit(5);
let cache: (&Arc<Cache>, &Http) = (&Arc::new(Cache::new()), &Http::new(&crate::ENV.token));
walk_channels(&mut self.channels, cache, settings).await;
// recursive walk thru the channels
async fn walk_channels(all: &mut Vec<Channel>, cache: impl CacheHttp + Clone, settings: GetMessages) {
for channel in all {
// get the messages
match channel.this.messages(cache.clone(), settings).await {
Ok(mesgs) => {
// store messages in our server object
channel.messages = mesgs;
if channel.messages.is_empty() {
eprintln!("{} was empty - (Or incorrect permissions)", channel.this.name);
} }
},
Err(_) => todo!(),
}
// Clone *should* be cheap - it's Arc under the hood
walk_channels(&mut channel.children, cache.clone(), settings).await;
}
}
}
/// Walk thru all the channels and count the saved messages. Will only give relevant data if
/// done after `scrape_all()`.
fn message_count(&self) -> usize {
fn walk(this: &Vec<Channel>) -> usize {
let mut total = 0;
for channel in this {
total += walk(&channel.children);
};
total
}
walk(&self.channels)
}
}
#[poise::command(slash_command, rename = "scrape_all", guild_only)]
pub async fn scrape_all(ctx: Context<'_>) -> Result<(), Error> {
let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap();
if let Ok(map) = guild.channels(ctx.http()).await {
let mut server = index(map).await;
server.scrape_all().await;
let _ = ctx.reply(&format!("Scraped {} messages", server.message_count())).await;
}
Ok(())
}
/// Get server's topology (and runs clean)
async fn index(map: HashMap<ChannelId, GuildChannel>) -> Server {
let mut server = Server::new();
// iterate thru all channels
map.into_iter().for_each(|(_id, current)| {
// println!("{} {} {:?}", current.name, current.id, current.parent_id);
server.add(current);
// TODO take note of position
// Take node of vc user limit
});
server.clean();
server
}
// NOTE!!! Make sure these names in quotes are lowercase! // NOTE!!! Make sure these names in quotes are lowercase!
#[poise::command(slash_command, rename = "index", guild_only)] #[poise::command(slash_command, rename = "index", guild_only)]
pub async fn index(ctx: Context<'_>) -> Result<(), Error> { pub async fn index_cmd(ctx: Context<'_>) -> Result<(), Error> {
let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap(); let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap();
match guild.channels(ctx.http()).await { match guild.channels(ctx.http()).await {
Ok(ok) => { Ok(ok) => {
let server = index(ok).await;
let mut server = Server::new(); let _ = ctx.reply(server.to_string()).await;
// iterate thru all channels
ok.into_iter().for_each(|(_id, current)| {
match current.kind {
poise::serenity_prelude::ChannelType::Text => {
server.add(current);
// current.position,
},
poise::serenity_prelude::ChannelType::Private => todo!(),
poise::serenity_prelude::ChannelType::Voice => {
server.add(current);
// current.user_limit,
// current.parent_id,
},
poise::serenity_prelude::ChannelType::GroupDm => todo!(),
poise::serenity_prelude::ChannelType::Category => {
server.add(current);
},
poise::serenity_prelude::ChannelType::News => todo!(),
poise::serenity_prelude::ChannelType::NewsThread => todo!(),
poise::serenity_prelude::ChannelType::PublicThread => todo!(),
poise::serenity_prelude::ChannelType::PrivateThread => todo!(),
poise::serenity_prelude::ChannelType::Stage => todo!(),
poise::serenity_prelude::ChannelType::Directory => todo!(),
poise::serenity_prelude::ChannelType::Forum => todo!(),
poise::serenity_prelude::ChannelType::Unknown(_) => todo!(),
_ => todo!(),
}
});
server.clean();
println!("{}", server);
}, },
Err(_) => todo!(), Err(_) => todo!(),
} }

View File

@ -1,20 +1,25 @@
use once_cell::sync::Lazy;
use poise::serenity_prelude::{self as serenity, GatewayIntents}; use poise::serenity_prelude::{self as serenity, GatewayIntents};
mod command; mod command;
pub struct Data {} // User data, which is stored and accessible in all command invocations pub struct Data {} // User data, which is stored and accessible in all command invocations
type Context<'a> = poise::Context<'a, Data, anyhow::Error>; type Context<'a> = poise::Context<'a, Data, anyhow::Error>;
static ENV: Lazy<BotEnv> = Lazy::new(|| {
read_env()
});
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let env = read_env();
// Generate sick text like this: // Generate sick text like this:
// http://www.patorjk.com/software/taag/#p=testall&f=Graffiti&t=hello%20world // http://www.patorjk.com/software/taag/#p=testall&f=Graffiti&t=hello%20world
println!(r#" println!(r#"
Invite this bot with: Invite this bot with:
"#); "#);
println!("https://discord.com/api/oauth2/authorize?client_id={}&permissions={}&scope=bot", println!("https://discord.com/api/oauth2/authorize?client_id={}&permissions={}&scope=bot",
env.id, ENV.id,
env.intents.bits(), ENV.intents.bits(),
); );
print!("\n"); print!("\n");
@ -22,7 +27,8 @@ async fn main() {
let framework = poise::Framework::builder() let framework = poise::Framework::builder()
.options(poise::FrameworkOptions { .options(poise::FrameworkOptions {
commands: vec![ commands: vec![
command::index(), command::index_cmd(),
command::scrape_all(),
], ],
..Default::default() ..Default::default()
}) })
@ -38,7 +44,7 @@ async fn main() {
.build(); .build();
// Start the Bot. // Start the Bot.
let client = serenity::ClientBuilder::new(env.token, env.intents) let client = serenity::ClientBuilder::new(&ENV.token, ENV.intents)
.framework(framework) .framework(framework)
.await; .await;