Compare commits

...

24 Commits

Author SHA1 Message Date
66e72aeb54 surreal testing
All checks were successful
Test Rust project / test (ubuntu-latest, stable) (push) Successful in 4m29s
2024-07-29 16:08:04 -06:00
a701716958 surreal v1 2024-07-29 15:00:38 -06:00
8e53f2a234 add deserializing 2024-07-29 15:00:10 -06:00
9492f378d3 re-arrange 2024-07-29 13:38:51 -06:00
c5cb4eb524 don't serialize empty vecs
All checks were successful
Test Rust project / test (ubuntu-latest, stable) (push) Successful in 2m47s
2024-07-29 12:27:22 -06:00
620b855f1d update deps 2024-07-29 12:27:10 -06:00
da7de4a19e not going to dockerize this 2024-07-29 12:09:39 -06:00
3f3159ba23 git settings updates 2024-07-29 12:09:24 -06:00
c4b68a130e add clippy
All checks were successful
Test Rust project / test (ubuntu-latest, nightly) (push) Successful in 2m47s
Test Rust project / test (ubuntu-latest, stable) (push) Successful in 2m44s
2024-07-29 08:57:47 -06:00
f74cca0083 Merge branch 'main' of https://git.oliveratkinson.net/Oliver/discord-egress 2024-07-29 08:56:28 -06:00
88dff426b1 use custom poise fork until it gets updated 2024-07-29 08:55:56 -06:00
f54b60b50b Update .gitea/workflows/clippy.yaml
Some checks failed
Test Rust project / test (ubuntu-latest, nightly) (push) Failing after 26s
Test Rust project / test (ubuntu-latest, stable) (push) Failing after 26s
2024-07-29 14:43:42 +00:00
a58d0d3969 Update .gitea/workflows/clippy.yaml
Some checks failed
Test Rust project / test (ubuntu-latest, nightly) (push) Failing after 4s
Test Rust project / test (ubuntu-latest, stable) (push) Failing after 4s
2024-07-29 14:35:49 +00:00
6df1e9fa7c Update .gitea/workflows/clippy.yaml 2024-07-29 14:34:56 +00:00
43ca1e6089 maybe?
Some checks failed
Test Rust project / test (ubuntu-latest, nightly) (push) Failing after 37s
Test Rust project / test (ubuntu-latest, stable) (push) Failing after 24s
2024-07-29 08:31:33 -06:00
Oliver Atkinson
09df72de7b trying new git workflow
Some checks failed
Test Rust project / test (ubuntu-latest, nightly) (push) Failing after 15s
Test Rust project / test (ubuntu-latest, stable) (push) Failing after 4s
2024-07-29 08:07:26 -06:00
460e832cbf pull env every time
Some checks failed
Cargo Build & Test / Rust project - latest (nightly) (push) Failing after 54s
2024-07-27 15:06:42 -06:00
369553f7c6 env not working :(
Some checks failed
Cargo Build & Test / Rust project - latest (nightly) (push) Failing after 27s
2024-07-27 15:02:43 -06:00
86da8a032c pull in env
Some checks failed
Cargo Build & Test / Rust project - latest (nightly) (push) Failing after 29s
2024-07-27 15:00:14 -06:00
d597b90fc2 manually install rust
Some checks failed
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 5s
Cargo Build & Test / Rust project - latest (nightly) (push) Failing after 36s
2024-07-27 14:58:38 -06:00
cb79724a48 rust support?
Some checks failed
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 5s
Cargo Build & Test / Rust project - latest (beta) (push) Failing after 3s
Cargo Build & Test / Rust project - latest (nightly) (push) Failing after 3s
Cargo Build & Test / Rust project - latest (stable) (push) Failing after 3s
2024-07-27 14:52:36 -06:00
65dd0d5c1d testing actions
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 59s
2024-07-27 14:46:30 -06:00
Oliver Atkinson
e005918cc3 add serde 2024-07-25 15:07:09 -06:00
Oliver Atkinson
8dbc7ee863 Put scrape behind owner privliages
I don't want normies tyring to scrape the whole server
2024-07-25 14:42:15 -06:00
8 changed files with 2395 additions and 129 deletions

View File

@@ -0,0 +1,20 @@
name: Test Rust project
on: [push]
jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
rust: [stable]
steps:
- uses: https://git.oliveratkinson.net/Oliver/setup-rust-action@master
with:
rust-version: ${{ matrix.rust }}
- uses: actions/checkout@master
- name: Run tests
run: cargo test --verbose
- name: Clippy
run: cargo clippy

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
/target
/.vscode
.env
server.json

2055
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -10,10 +10,12 @@ edition = "2021"
[dependencies]
tokio = { version = "1.21.2", features = ["macros", "rt-multi-thread"] }
# songbird = { version = "0.3.2", features = ["yt-dlp"] }
poise = { version = "0.6", features = ["cache"], path="../poise" }
poise = { features = ["cache"], git="https://github.com/Rushmore75/poise.git", rev="6afaf44a791cd3ff590c89fda26c94b4411b3266" }
dotenv = "0.15.0"
anyhow = "1.0.75"
once_cell = "1.19.0"
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
serde_json = "1.0.120"
serde = { version = "1.0.204", features = ["derive"] }
surrealdb = "1.5.4"

View File

@@ -1,15 +0,0 @@
# Build the execuitible
FROM rustlang/rust:nightly as builder
ENV RUSTFLAGS=""
WORKDIR /bot
COPY . .
RUN cargo build --release
# Now make the runtime container
FROM debian:bookworm-slim
COPY --from=builder /bot/target/release/discord_egress /usr/local/bin/discord_egress
COPY Cargo.lock /
CMD ["/usr/local/bin/discord_egress"]

View File

@@ -1,30 +0,0 @@
version: '3.1'
name: discord-egress
services:
bot:
container_name: discord-egress_bot
build: https://git.oliveratkinson.net/Oliver/discord-egress.git
restart: always
environment:
# These will read from the .env file
DISCORD_INTENTS: ${DISCORD_INTENTS}
DISCORD_TOKEN: ${DISCORD_TOKEN}
DISCORD_ID: ${DISCORD_ID}
networks:
- external
- internal
redis:
container_name: discord-egress_redis
image: redis
restart: always
ports:
- 6379:6379
networks:
- internal
networks:
internal:
driver: bridge
internal: true
external:
driver: bridge

View File

@@ -1,23 +1,26 @@
use std::{collections::HashMap, fmt::Display, fs, sync::Arc};
use std::{collections::HashMap, fmt::Display, fs, hint::black_box, sync::Arc};
use crate::Context;
use anyhow::Error;
use poise::{serenity_prelude::{Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message}, CreateReply};
use serde::Serialize;
use poise::{
serenity_prelude::{
Cache, CacheHttp, ChannelId, ChannelType, GetMessages, GuildChannel, Http, Message,
},
CreateReply,
};
use serde::{Deserialize, Serialize};
use surrealdb::{engine::remote::ws::Ws, opt::auth::Root, sql::Thing, Surreal};
use tokio::time::Instant;
use tracing::{debug, error, info, trace};
#[derive(Serialize)]
struct Server {
channels: Vec<Channel>,
orphanage: Vec<GuildChannel>,
needs_clean: bool,
}
#[derive(Serialize)]
#[derive(Serialize, Deserialize)]
struct Channel {
this: GuildChannel,
#[serde(skip_serializing_if = "Vec::is_empty")]
#[serde(default)]
children: Vec<Channel>,
#[serde(skip_serializing_if = "Vec::is_empty")]
#[serde(default)]
messages: Vec<Message>,
}
@@ -30,16 +33,34 @@ impl Channel {
messages: Vec::new(),
}
}
fn is_category(&self) -> bool {
!self.children.is_empty() && self.messages.is_empty()
}
}
#[derive(Serialize, Deserialize)]
struct Server {
#[serde(default)]
name: String,
channels: Vec<Channel>,
#[serde(skip_serializing_if = "Vec::is_empty")]
#[serde(default)]
orphanage: Vec<GuildChannel>,
#[serde(skip_serializing)]
#[serde(default)]
needs_clean: bool,
}
impl Display for Server {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn print(f: &mut std::fmt::Formatter<'_>, tab: usize, channel: &Vec<Channel>) {
for i in channel {
for _ in 0..tab { let _ = write!(f, "\t"); }
let _ = writeln!(f, "{} {}", prefix(i.this.kind),i.this.name);
print(f, tab+1, &i.children);
for _ in 0..tab {
let _ = write!(f, "\t");
}
let _ = writeln!(f, "{} {}", prefix(i.this.kind), i.this.name);
print(f, tab + 1, &i.children);
}
}
@@ -63,11 +84,10 @@ impl Display for Server {
print(f, 0, &self.channels);
if self.needs_clean {
let _ = writeln!(f, "Orphans: (please clean() before displaying...)");
for i in &self.orphanage {
let _ = write!(f, "{} {},", prefix(i.kind),i.name);
let _ = write!(f, "{} {},", prefix(i.kind), i.name);
}
}
@@ -76,12 +96,12 @@ impl Display for Server {
}
impl Server {
fn new() -> Self {
fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
channels: Vec::new(),
orphanage: Vec::new(),
needs_clean: false
needs_clean: false,
}
}
@@ -105,12 +125,12 @@ impl Server {
match Self::search_by_id(&mut self.channels, parent_id) {
Some(parent_node) => {
parent_node.children.push(Channel::new(insert));
},
}
None => {
// couldn't find parent, store somewhere else until it's parent is added...
self.orphanage.push(insert);
self.needs_clean = true;
},
}
}
} else {
self.channels.push(Channel::new(insert));
@@ -119,7 +139,9 @@ impl Server {
/// Cleans out the orphan channels, finding them parents. You'll want to use this before displaying anything.
fn clean(&mut self) {
if !self.needs_clean {return;}
if !self.needs_clean {
return;
}
// Look thru the orphanage and try to find parents
for orphan in &self.orphanage {
@@ -156,12 +178,20 @@ impl Server {
}
/// Get all messages for 1 channel and children
async fn get_messages(channel: &mut Channel, cache: impl CacheHttp + Clone, settings: GetMessages) {
async fn get_messages(
channel: &mut Channel,
cache: impl CacheHttp + Clone,
settings: GetMessages,
) {
// Loop thru all the messages in the channel in batches.
// Adding each batch to the current channel's messages the whole time.
let mut last_id = channel.this.last_message_id;
while let Some(last) = last_id {
match channel.this.messages(cache.clone(), settings.before(last)).await {
match channel
.this
.messages(cache.clone(), settings.before(last))
.await
{
Ok(mut ok) => {
if ok.is_empty() {
debug!("Reached the beginning of {}", channel.this.name);
@@ -175,12 +205,15 @@ impl Server {
last_id = Some(l.id);
}
}
},
}
Err(e) => {
error!("While reading messages in \"{}\" before `{}` - {e}", channel.this.name, last);
error!(
"While reading messages in \"{}\" before `{}` - {e}",
channel.this.name, last
);
// Stop reading this channel on an error.
last_id = None;
},
}
}
}
// Then recurse into children channels
@@ -197,17 +230,228 @@ impl Server {
for channel in this {
total += channel.messages.len();
total += walk(&channel.children);
};
}
total
}
walk(&self.channels)
}
async fn to_surreal(&self) -> surrealdb::Result<()> {
trace!("Connecting to surrealdb...");
// Connect to the server
let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
db.signin(Root {
username: "root",
password: "root",
})
.await?;
db.use_ns("egress").use_db(self.name.clone()).await?;
// =========================================================
// Ingress data
// Data will only be in three layers
// Layer 1: Categories (no parent)
// Layer 2: Channels (might have parent)
// Layer 3: Messages (has parent)
trace!("Starting ingress...");
for cat in self.channels.iter() {
match cat.this.kind {
ChannelType::Text => {
// This is a text channel
#[derive(Serialize)]
struct ChannelWrapper {
name: String,
nsfw: bool,
discord_id: u64,
discord_parent_id: Option<u64>,
topic: String,
}
let chan = &cat.this;
let dpi = if let Some(val) = chan.parent_id {Some(val.get())} else { None };
let new_channel: Vec<Thing> = db
.create("channel")
.content(ChannelWrapper {
name: chan.name.to_owned(),
nsfw: chan.nsfw,
discord_id: chan.id.get(),
discord_parent_id: dpi,
topic: chan.topic.to_owned().unwrap_or(String::new()),
})
.await?;
for msg in cat.messages.iter() {
#[derive(Serialize)]
struct Author {
nickname: String,
username: String,
/// B64 encoded string of image (for now)
avatar: String,
id: u64,
}
#[derive(Serialize)]
struct Attachment {
content_type: String,
filename: String,
url: String,
}
#[derive(Serialize)]
struct Reaction {
count: u64,
emoji: String,
}
#[derive(Serialize)]
struct MessageWrapper {
// FIXME learn how to do references
parent: Thing,
author: Author,
content: String,
utc_timestamp: String,
mentions: Vec<u64>,
attachments: Vec<Attachment>,
reactions: Vec<Reaction>,
pinned: bool,
}
let _: Vec<Thing> = db
.create("message")
.content(MessageWrapper {
parent: new_channel[0].clone(),
author: Author {
id: msg.author.id.get(),
nickname: msg.author.name.to_owned(),
username: msg.author.global_name.clone().unwrap_or(String::new()),
avatar: {
match msg.author.avatar {
Some(hash) => {
format!(
"https://cdn.discordapp.com/avatars/{}/{}.webp",
msg.author.id,
hash,
)
},
None => String::new(),
}
}
},
content: msg.content.clone(),
utc_timestamp: msg.timestamp.to_utc().to_string(),
mentions: msg.mentions.iter().map(|f| f.id.get()).collect(),
attachments: msg.attachments.iter().map(|f| Attachment {
content_type: f.content_type.clone().unwrap_or(String::new()),
filename: f.filename.to_owned(),
url: f.url.to_owned(),
}).collect(),
reactions: msg.reactions.iter().map(|f| Reaction {
count: f.count,
emoji: f.reaction_type.as_data(),
}).collect(),
pinned: msg.pinned,
})
.await?;
}
}
ChannelType::Private => todo!(),
ChannelType::Voice => todo!(),
ChannelType::GroupDm => todo!(),
ChannelType::Category => todo!(),
ChannelType::News => todo!(),
ChannelType::NewsThread => todo!(),
ChannelType::PublicThread => todo!(),
ChannelType::PrivateThread => todo!(),
ChannelType::Stage => todo!(),
ChannelType::Directory => todo!(),
ChannelType::Forum => todo!(),
ChannelType::Unknown(_) => todo!(),
_ => todo!(),
}
// TODO learn why this is a vec
// Do the first iteration of channels a bit different, so as to name it "category".
let new_category: Vec<Thing> = db.create("category").content(&cat.this).await?;
import_messages(&cat.messages, &new_category[0], &db).await?;
// Ok, now automatically recurse the rest of the structure and auto import as channels
// and messages.
import_channel(&cat.children, &new_category[0], &db).await?;
}
async fn import_channel(
channels: &Vec<Channel>,
parent: &Thing,
db: &Surreal<surrealdb::engine::remote::ws::Client>,
) -> surrealdb::Result<()> {
for channel in channels.iter() {
trace!("Importing channel \"{}\"", channel.this.name);
#[derive(Serialize)]
struct ChannelWrapper<'a, 'b> {
channel: &'a GuildChannel,
surreal_parent: &'b Thing,
}
let new_channel: Vec<Thing> = db
.create("channel")
.content(ChannelWrapper {
channel: &channel.this,
surreal_parent: &parent,
})
.await?;
import_messages(&channel.messages, &new_channel[0], &db).await?;
// async recursion - thus box
Box::pin(import_channel(&channel.children, &new_channel[0], &db)).await?;
}
Ok(())
}
async fn import_messages(
msgs: &Vec<Message>,
parent: &Thing,
db: &Surreal<surrealdb::engine::remote::ws::Client>,
) -> surrealdb::Result<()> {
trace!("Importing {} messages...", msgs.len());
for msg in msgs.iter() {
#[derive(Serialize)]
struct MessageWrapper<'a, 'b> {
message: &'a Message,
surreal_parent: &'b Thing,
}
let created: Vec<Thing> = db
.create("message")
.content(MessageWrapper {
message: &msg,
surreal_parent: &parent,
})
.await?;
trace!("Imported message {:?}", created);
}
Ok(())
}
// Data is all in
// =========================================================
Ok(())
}
}
#[poise::command(slash_command, rename = "scrape_all", guild_only)]
pub async fn scrape_all(ctx: Context<'_>) -> Result<(), Error> {
let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap();
#[poise::command(slash_command, rename = "scrape_all", guild_only, owners_only)]
pub async fn scrape_all(ctx: Context<'_>, pretty_print: bool) -> Result<(), Error> {
let guild = ctx
.guild_id()
.unwrap()
.to_partial_guild(ctx.serenity_context())
.await
.unwrap();
let invoker = ctx.author().name.clone();
if let Some(nickname) = ctx.author().nick_in(ctx.http(), guild.id).await {
@@ -217,7 +461,7 @@ pub async fn scrape_all(ctx: Context<'_>) -> Result<(), Error> {
}
if let Ok(map) = guild.channels(ctx.http()).await {
let mut server = index(map).await;
let mut server = index(map, guild.name).await;
match ctx.reply("Starting scrape...").await {
Ok(ok) => {
let start = Instant::now();
@@ -225,32 +469,48 @@ pub async fn scrape_all(ctx: Context<'_>) -> Result<(), Error> {
let end = start.elapsed().as_millis();
let msg_count = server.message_count();
match serde_json::to_string(&server) {
Ok(ok) => {
if let Err(e) = fs::write("server.json", ok) {
error!("Problem writing server to disk: {e}");
}
},
Err(err) => {
error!("Trying to serialize server: {err}");
},
}
if let Err(e) = server.to_surreal().await {
error!("{e}");
};
// let print = if pretty_print {
// serde_json::to_string_pretty(&server)
// } else {
// serde_json::to_string(&server)
// };
// match print {
// Ok(ok) => {
// if let Err(e) = fs::write("server.json", ok) {
// error!("Problem writing server to disk: {e}");
// }
// },
// Err(err) => {
// error!("Trying to serialize server: {err}");
// },
// }
// Done. Print stats.
let _ = ok.edit(ctx, CreateReply::default().content(
&format!("Done. Stats: \n```toml\nMessages saved: {msg_count}\nElapsed time: {end}ms\n```")
)).await;
debug!("Scraped server in {}ms", end);
},
}
Err(e) => error!("{e} - While trying to reply to scrape command"),
}
}
Ok(())
}
pub async fn from_json() {
let data = fs::read_to_string("server.json").unwrap();
let server: Server = serde_json::from_str(&data).unwrap();
server.to_surreal().await.unwrap();
}
/// Get server's topology (and runs clean)
async fn index(map: HashMap<ChannelId, GuildChannel>) -> Server {
let mut server = Server::new();
async fn index(map: HashMap<ChannelId, GuildChannel>, name: impl Into<String>) -> Server {
let mut server = Server::new(name);
// iterate thru all channels
map.into_iter().for_each(|(_id, current)| {
// println!("{} {} {:?}", current.name, current.id, current.parent_id);
@@ -265,7 +525,12 @@ async fn index(map: HashMap<ChannelId, GuildChannel>) -> Server {
// NOTE!!! Make sure these names in quotes are lowercase!
#[poise::command(slash_command, rename = "index", guild_only)]
pub async fn index_cmd(ctx: Context<'_>) -> Result<(), Error> {
let guild = ctx.guild_id().unwrap().to_partial_guild(ctx.serenity_context()).await.unwrap();
let guild = ctx
.guild_id()
.unwrap()
.to_partial_guild(ctx.serenity_context())
.await
.unwrap();
let invoker = ctx.author().name.clone();
if let Some(nickname) = ctx.author().nick_in(ctx.http(), guild.id).await {
info!("{invoker} ({nickname}) is indexing {}", guild.name);
@@ -275,9 +540,9 @@ pub async fn index_cmd(ctx: Context<'_>) -> Result<(), Error> {
match guild.channels(ctx.http()).await {
Ok(ok) => {
let server = index(ok).await;
let server = index(ok, guild.name).await;
let _ = ctx.reply(server.to_string()).await;
},
}
Err(_) => todo!(),
}
Ok(())

View File

@@ -1,5 +1,7 @@
use std::collections::HashSet;
use once_cell::sync::Lazy;
use poise::serenity_prelude::{self as serenity, GatewayIntents};
use poise::serenity_prelude::{self as serenity, GatewayIntents, UserId};
use tracing::{debug, error, info, warn, Level};
use tracing_subscriber::EnvFilter;
mod command;
@@ -13,6 +15,8 @@ static ENV: Lazy<BotEnv> = Lazy::new(|| {
#[tokio::main]
async fn main() {
command::from_json().await;
return;
// Start the tracing subscriber
let filter = EnvFilter::builder()
@@ -30,14 +34,13 @@ async fn main() {
// Generate sick text like this:
// http://www.patorjk.com/software/taag/#p=testall&f=Graffiti&t=hello%20world
info!(r#"
Invite this bot with:
"#);
info!("https://discord.com/api/oauth2/authorize?client_id={}&permissions={}&scope=bot",
info!("Invite with: https://discord.com/api/oauth2/authorize?client_id={}&permissions={}&scope=bot",
ENV.id,
ENV.intents.bits(),
);
info!("\n");
let mut owners = HashSet::new();
owners.insert(UserId::new(423970006334832650));
// Setup framework
let framework = poise::Framework::builder()
@@ -46,6 +49,7 @@ async fn main() {
command::index_cmd(),
command::scrape_all(),
],
owners,
..Default::default()
})
.setup(|ctx, _ready, framework| {