Compare commits

...

4 Commits

Author SHA1 Message Date
Oliver Atkinson
4d3a41db43 fix copied code to be applicable to this codebase 2024-07-25 13:17:03 -06:00
Oliver Atkinson
54c533abff re-write scrape code so it actually works 2024-07-25 13:16:46 -06:00
Oliver Atkinson
b7c9a9b0e3 Fix message counting logic 2024-07-25 13:15:31 -06:00
Oliver Atkinson
79638ed324 add tracing 2024-07-25 10:56:45 -06:00
4 changed files with 202 additions and 39 deletions

131
Cargo.lock generated
View File

@ -355,6 +355,8 @@ dependencies = [
"once_cell",
"poise",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
@ -731,6 +733,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.155"
@ -759,6 +767,15 @@ version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "memchr"
version = "2.7.4"
@ -816,6 +833,16 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-conv"
version = "0.1.0"
@ -856,6 +883,12 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.3"
@ -1014,8 +1047,17 @@ checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
"regex-automata 0.4.7",
"regex-syntax 0.8.4",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
@ -1026,9 +1068,15 @@ checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.8.4",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.4"
@ -1321,6 +1369,15 @@ dependencies = [
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "skeptic"
version = "0.13.7"
@ -1466,6 +1523,16 @@ dependencies = [
"syn 2.0.72",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "time"
version = "0.3.36"
@ -1626,6 +1693,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
@ -1756,6 +1853,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "version_check"
version = "0.9.4"
@ -1891,6 +1994,22 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.8"
@ -1900,6 +2019,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.52.0"

View File

@ -15,3 +15,5 @@ poise = { version = "0.6", features = ["cache"] }
dotenv = "0.15.0"
anyhow = "1.0.75"
once_cell = "1.19.0"
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }

View File

@ -2,7 +2,8 @@ use std::{collections::HashMap, fmt::Display, sync::Arc};
use crate::Context;
use anyhow::Error;
use poise::serenity_prelude::{Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message};
use poise::{serenity_prelude::{Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message}, CreateReply};
use tracing::{debug, error, trace};
struct Server {
channels: Vec<Channel>,
@ -80,7 +81,6 @@ impl Server {
}
}
// TODO this might be broken
fn search_by_id<'a>(target: &'a mut Vec<Channel>, find: &ChannelId) -> Option<&'a mut Channel> {
for child in target {
if child.this.id == *find {
@ -136,31 +136,47 @@ impl Server {
/// Scrapes messages for all the channels in `self`.
async fn scrape_all(&mut self) {
let settings = GetMessages::default().limit(5);
let cache: (&Arc<Cache>, &Http) = (&Arc::new(Cache::new()), &Http::new(&crate::ENV.token));
walk_channels(&mut self.channels, cache, settings).await;
walk_channels(&mut self.channels, cache).await;
// recursive walk thru the channels
async fn walk_channels(all: &mut Vec<Channel>, cache: impl CacheHttp + Clone, settings: GetMessages) {
/// Recursive walk thru the channels
async fn walk_channels(all: &mut Vec<Channel>, cache: impl CacheHttp + Clone) {
let settings = GetMessages::default().limit(5);
for channel in all {
// get the messages
match Box::pin(channel.this.messages(cache.clone(), settings)).await {
Ok(mesgs) => {
// store messages in our server object
channel.messages = mesgs;
if channel.messages.is_empty() {
eprintln!("{} was empty - (Or incorrect permissions)", channel.this.name);
}
},
Err(e) => {
eprintln!("{}", e);
},
}
// Clone *should* be cheap - it's Arc under the hood
Box::pin(walk_channels(&mut channel.children, cache.clone(), settings)).await;
// get the messages
get_messages(channel, cache.clone(), settings).await;
}
/// Get all messages for 1 channel and children
async fn get_messages(channel: &mut Channel, cache: impl CacheHttp + Clone, settings: GetMessages) {
// Loop thru all the messages in the channel in batches.
// Adding each batch to the current channel's messages the whole time.
let mut last_id = channel.this.last_message_id;
while let Some(last) = last_id {
match channel.this.messages(cache.clone(), settings.before(last)).await {
Ok(mut ok) => {
if ok.is_empty() {
debug!("Reached the beginning of {}", channel.this.name);
// Stop the loop if there are no more messages.
last_id = None;
} else {
trace!("Adding {} messages to \"{}\"", ok.len(), channel.this.name);
channel.messages.append(&mut ok);
// extract message id
if let Some(l) = channel.messages.last() {
last_id = Some(l.id);
}
}
},
Err(e) => error!("Error while trying to get messages - {e}"),
}
}
// Then recurse into children channels
Box::pin(walk_channels(&mut channel.children, cache.clone())).await;
}
}
}
}
/// Walk thru all the channels and count the saved messages. Will only give relevant data if
/// done after `scrape_all()`.
@ -168,6 +184,7 @@ impl Server {
fn walk(this: &Vec<Channel>) -> usize {
let mut total = 0;
for channel in this {
total += channel.messages.len();
total += walk(&channel.children);
};
total
@ -181,10 +198,14 @@ pub async fn scrape_all(ctx: Context<'_>) -> Result<(), Error> {
let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap();
if let Ok(map) = guild.channels(ctx.http()).await {
let mut server = index(map).await;
server.scrape_all().await;
let _ = ctx.reply(&format!("Scraped {} messages", server.message_count())).await;
}
match ctx.reply("Starting scrape...").await {
Ok(ok) => {
server.scrape_all().await;
let _ = ok.edit(ctx, CreateReply::default().content(&format!("Scraped {} messages", server.message_count()))).await;
},
Err(e) => error!("{e} - While trying to reply to scrape command"),
}
}
Ok(())
}
@ -195,8 +216,8 @@ async fn index(map: HashMap<ChannelId, GuildChannel>) -> Server {
map.into_iter().for_each(|(_id, current)| {
// println!("{} {} {:?}", current.name, current.id, current.parent_id);
server.add(current);
// TODO take note of position
// Take node of vc user limit
// TODO Take note of position
// TODO Take node of vc user limit
});
server.clean();
server

View File

@ -1,5 +1,7 @@
use once_cell::sync::Lazy;
use poise::serenity_prelude::{self as serenity, GatewayIntents};
use tracing::{debug, error, info, warn, Level};
use tracing_subscriber::EnvFilter;
mod command;
pub struct Data {} // User data, which is stored and accessible in all command invocations
@ -12,16 +14,31 @@ static ENV: Lazy<BotEnv> = Lazy::new(|| {
#[tokio::main]
async fn main() {
// Start the tracing subscriber
let filter = EnvFilter::builder()
.parse("discord_egress=trace,tokio=warn")
.expect("Could not create env filter.")
;
tracing_subscriber::fmt::fmt()
.with_max_level(Level::TRACE)
.with_target(true)
.with_env_filter(filter)
.with_thread_ids(false)
.with_file(false)
.without_time()
.init();
// Generate sick text like this:
// http://www.patorjk.com/software/taag/#p=testall&f=Graffiti&t=hello%20world
println!(r#"
info!(r#"
Invite this bot with:
"#);
println!("https://discord.com/api/oauth2/authorize?client_id={}&permissions={}&scope=bot",
info!("https://discord.com/api/oauth2/authorize?client_id={}&permissions={}&scope=bot",
ENV.id,
ENV.intents.bits(),
);
print!("\n");
info!("\n");
// Setup framework
let framework = poise::Framework::builder()
@ -65,9 +82,7 @@ fn read_env() -> BotEnv {
// ==================== ID ===========================
let id: String = std::env::var(DISCORD_ID)
.unwrap_or_else(|_| {
println!(r#"
WARN: Missing {DISCORD_ID}
> This isn't really that problematic, just that the generated invite link won't work."#);
warn!("Missing {DISCORD_ID} This isn't really that problematic, just that the generated invite link won't work.");
String::from("")
});
// ==================== Token ========================
@ -76,8 +91,8 @@ WARN: Missing {DISCORD_ID}
// ==================== Intents ======================
let intents_env: String = std::env::var(DISCORD_INTENTS)
.unwrap_or_else(|msg| {
println!("DEBUG: what is: {msg}");
println!("ERROR: Missing {DISCORD_INTENTS}");
debug!("What is: {msg}");
error!("Missing {DISCORD_INTENTS}");
"0".to_string()
});
// ==================== Parse Intents =================
@ -85,7 +100,7 @@ WARN: Missing {DISCORD_ID}
let intents_u64 = intents_env.parse::<u64>().unwrap_or(0u64);
let intents_truncated: GatewayIntents = GatewayIntents::from_bits_truncate(intents_u64);
if intents_truncated.bits() != intents_u64 {
println!("WARN: Intents integer got truncated from {} to {}!", intents_u64, intents_truncated.bits())
warn!("Intents integer got truncated from {} to {}!", intents_u64, intents_truncated.bits())
};
BotEnv {
intents: intents_truncated,