re-write scrape code so it actually works

This commit is contained in:
Oliver Atkinson 2024-07-25 13:16:46 -06:00
parent b7c9a9b0e3
commit 54c533abff

View File

@ -2,8 +2,8 @@ use std::{collections::HashMap, fmt::Display, sync::Arc};
use crate::Context;
use anyhow::Error;
use poise::serenity_prelude::{Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message};
use tracing::error;
use poise::{serenity_prelude::{Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message}, CreateReply};
use tracing::{debug, error, trace};
struct Server {
channels: Vec<Channel>,
@ -81,7 +81,6 @@ impl Server {
}
}
// TODO this might be broken
fn search_by_id<'a>(target: &'a mut Vec<Channel>, find: &ChannelId) -> Option<&'a mut Channel> {
for child in target {
if child.this.id == *find {
@ -137,28 +136,44 @@ impl Server {
/// Scrapes messages for all the channels in `self`.
async fn scrape_all(&mut self) {
let settings = GetMessages::default().limit(5);
let cache: (&Arc<Cache>, &Http) = (&Arc::new(Cache::new()), &Http::new(&crate::ENV.token));
walk_channels(&mut self.channels, cache, settings).await;
walk_channels(&mut self.channels, cache).await;
// recursive walk thru the channels
async fn walk_channels(all: &mut Vec<Channel>, cache: impl CacheHttp + Clone, settings: GetMessages) {
/// Recursive walk thru the channels
async fn walk_channels(all: &mut Vec<Channel>, cache: impl CacheHttp + Clone) {
let settings = GetMessages::default().limit(5);
for channel in all {
// get the messages
match Box::pin(channel.this.messages(cache.clone(), settings)).await {
Ok(mesgs) => {
// store messages in our server object
channel.messages = mesgs;
if channel.messages.is_empty() {
error!("{} was empty - (Or incorrect permissions)", channel.this.name);
}
},
Err(e) => {
error!("{}", e);
},
}
// Clone *should* be cheap - it's Arc under the hood
Box::pin(walk_channels(&mut channel.children, cache.clone(), settings)).await;
// get the messages
get_messages(channel, cache.clone(), settings).await;
}
/// Get all messages for 1 channel and children
async fn get_messages(channel: &mut Channel, cache: impl CacheHttp + Clone, settings: GetMessages) {
// Loop thru all the messages in the channel in batches.
// Adding each batch to the current channel's messages the whole time.
let mut last_id = channel.this.last_message_id;
while let Some(last) = last_id {
match channel.this.messages(cache.clone(), settings.before(last)).await {
Ok(mut ok) => {
if ok.is_empty() {
debug!("Reached the beginning of {}", channel.this.name);
// Stop the loop if there are no more messages.
last_id = None;
} else {
trace!("Adding {} messages to \"{}\"", ok.len(), channel.this.name);
channel.messages.append(&mut ok);
// extract message id
if let Some(l) = channel.messages.last() {
last_id = Some(l.id);
}
}
},
Err(e) => error!("Error while trying to get messages - {e}"),
}
}
// Then recurse into children channels
Box::pin(walk_channels(&mut channel.children, cache.clone())).await;
}
}
}
@ -183,10 +198,14 @@ pub async fn scrape_all(ctx: Context<'_>) -> Result<(), Error> {
let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap();
if let Ok(map) = guild.channels(ctx.http()).await {
let mut server = index(map).await;
server.scrape_all().await;
let _ = ctx.reply(&format!("Scraped {} messages", server.message_count())).await;
}
match ctx.reply("Starting scrape...").await {
Ok(ok) => {
server.scrape_all().await;
let _ = ok.edit(ctx, CreateReply::default().content(&format!("Scraped {} messages", server.message_count()))).await;
},
Err(e) => error!("{e} - While trying to reply to scrape command"),
}
}
Ok(())
}