improve csv parsing

This commit is contained in:
2025-11-12 13:23:06 -07:00
parent ed01b2ff70
commit 5c57c87775

View File

@@ -1,4 +1,9 @@
use std::{fmt::Display, fs, io::{Read, Write}, path::PathBuf}; use std::{
fmt::Display,
fs,
io::{Read, Write},
path::PathBuf,
};
use evalexpr::*; use evalexpr::*;
@@ -6,7 +11,6 @@ use crate::app::logic::ctx;
pub const LEN: usize = 1000; pub const LEN: usize = 1000;
pub struct Grid { pub struct Grid {
// a b c ... // a b c ...
// 0 // 0
@@ -22,9 +26,7 @@ pub struct Grid {
impl std::fmt::Debug for Grid { impl std::fmt::Debug for Grid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Grid") f.debug_struct("Grid").field("cells", &"Too many to print").finish()
.field("cells", &"Too many to print")
.finish()
} }
} }
@@ -45,8 +47,10 @@ impl Grid {
if ext != "csv" { if ext != "csv" {
path.add_extension("csv"); path.add_extension("csv");
} }
}, }
None => {path.add_extension("csv");}, None => {
path.add_extension("csv");
}
} }
let mut f = fs::OpenOptions::new().write(true).append(false).truncate(true).create(true).open(path)?; let mut f = fs::OpenOptions::new().write(true).append(false).truncate(true).create(true).open(path)?;
@@ -57,7 +61,8 @@ impl Grid {
let mut display = cell.as_ref().map(|f| f.to_string()).unwrap_or(String::new()); let mut display = cell.as_ref().map(|f| f.to_string()).unwrap_or(String::new());
// escape quotes " -> "" // escape quotes " -> ""
let needs_escaping = display.char_indices().filter(|f| f.1==CSV_ESCAPE).map(|f| f.0).collect::<Vec<usize>>(); let needs_escaping =
display.char_indices().filter(|f| f.1 == CSV_ESCAPE).map(|f| f.0).collect::<Vec<usize>>();
for idx in needs_escaping.iter().rev() { for idx in needs_escaping.iter().rev() {
display.insert(*idx, CSV_ESCAPE); display.insert(*idx, CSV_ESCAPE);
} }
@@ -107,51 +112,7 @@ impl Grid {
let mut buf = String::new(); let mut buf = String::new();
file.read_to_string(&mut buf)?; file.read_to_string(&mut buf)?;
for (yi, line) in buf.lines().enumerate() { for (yi, line) in buf.lines().enumerate() {
let cells = Self::parse_csv_line(line);
// 1, 2, "=avg(A0,B0)", she said: """wow""",
let mut cells = Vec::new();
let mut inside_quotes = false;
let mut token = Vec::new();
let mut iter = line.as_bytes().iter().map(|f| *f as char).peekable();
while let Some(c) = iter.next() {
// we just finished
if c == CSV_DELIMITER && !inside_quotes {
if !token.is_empty() {
cells.push(Some(token.iter().collect::<String>()));
} else {
cells.push(None);
}
token.clear();
continue;
}
// start reading an escaped cell
if c == '"' {
if inside_quotes {
// we might be escaping a quote
if let Some(next) = iter.peek() {
// check if the next cell is a quote, if it is, that's because it's being escaped by the current quote
if *next == '"' {
// don't save the escape char
continue;
} else {
// escaped cell over
inside_quotes = false;
continue;
}
} else {
// we are at the end of the row, so idk if it matters anymore, as there won't be a next()
}
} else {
inside_quotes = true;
// don't save the scape char
continue;
}
}
token.push(c)
}
for (xi, cell) in cells.into_iter().enumerate() { for (xi, cell) in cells.into_iter().enumerate() {
// This gets automatically duck-typed // This gets automatically duck-typed
@@ -162,6 +123,75 @@ impl Grid {
Ok(grid) Ok(grid)
} }
fn parse_csv_line(line: &str) -> Vec<Option<String>> {
let mut iter = line.as_bytes().iter().map(|f| *f as char).peekable();
let mut cells = Vec::new();
let mut token = Vec::new();
let mut inside_quotes = false;
let mut is_escaped = false;
while let Some(c) = iter.next() {
// we just finished
if c == CSV_DELIMITER && !inside_quotes {
if !token.is_empty() {
cells.push(Some(token.iter().collect::<String>()));
} else {
cells.push(None);
}
token.clear();
continue;
}
// start reading an escaped cell
if c == '"' {
if inside_quotes {
// we might be escaping a quote
if let Some(next) = iter.peek() {
// check if the next cell is a quote, if it is, that's because it's being escaped by the current quote
// only escape the next char if this char isn't escaped it's self
if *next == '"' && !is_escaped {
// don't save the escape char
is_escaped = true;
continue;
} else if is_escaped {
is_escaped = false;
} else {
// escaped cell over
inside_quotes = false;
continue;
}
} else {
// we are at the end of the row, so idk if it matters anymore, as there won't be a next()
todo!()
}
} else {
// not inside quotes, must be escaping another one
if let Some(next) = iter.peek() {
if *next == '"' && !is_escaped {
// the current char is " and the next char is "
// forget this one and mark to save the next
is_escaped = true;
continue;
} else if is_escaped {
is_escaped = false;
} else {
inside_quotes = true;
continue;
}
} else {
// single quote at the end of a line, is odd
todo!()
}
}
}
token.push(c)
}
if !token.is_empty() {
cells.push(Some(token.iter().collect::<String>()));
}
cells
}
pub fn new() -> Self { pub fn new() -> Self {
let mut a = Vec::with_capacity(LEN); let mut a = Vec::with_capacity(LEN);
for _ in 0..LEN { for _ in 0..LEN {
@@ -199,19 +229,22 @@ impl Grid {
return Ok(val); return Ok(val);
} else if e.is_int() { } else if e.is_int() {
let i = e.as_int().expect("Value lied about being an int"); let i = e.as_int().expect("Value lied about being an int");
return Ok(i as f64) return Ok(i as f64);
} }
} }
return Err("Result is NaN".to_string()) return Err("Result is NaN".to_string());
} }
Err(e) => match e { Err(e) => match e {
EvalexprError::VariableIdentifierNotFound(e) => { EvalexprError::VariableIdentifierNotFound(e) => {
// panic!("Will not be able to parse this equation, cell {e} not found") // panic!("Will not be able to parse this equation, cell {e} not found")
return Err(format!("{e} is not a variable")) return Err(format!("{e} is not a variable"));
} }
EvalexprError::TypeError { expected: e, actual: a } => { EvalexprError::TypeError {
expected: e,
actual: a,
} => {
// IE: You put a string into a function that wants a float // IE: You put a string into a function that wants a float
return Err(format!("Wanted {e:?}, got {a}")) return Err(format!("Wanted {e:?}, got {a}"));
} }
_ => return Err(e.to_string()), _ => return Err(e.to_string()),
}, },
@@ -221,15 +254,8 @@ impl Grid {
/// Parse values in the format of A0, C10 ZZ99, etc, and /// Parse values in the format of A0, C10 ZZ99, etc, and
/// turn them into an X,Y index. /// turn them into an X,Y index.
fn parse_to_idx(i: &str) -> Option<(usize, usize)> { fn parse_to_idx(i: &str) -> Option<(usize, usize)> {
let chars = i let chars = i.chars().take_while(|c| c.is_alphabetic()).collect::<Vec<char>>();
.chars() let nums = i.chars().skip(chars.len()).take_while(|c| c.is_numeric()).collect::<String>();
.take_while(|c| c.is_alphabetic())
.collect::<Vec<char>>();
let nums = i
.chars()
.skip(chars.len())
.take_while(|c| c.is_numeric())
.collect::<String>();
// get the x index from the chars // get the x index from the chars
let x_idx = chars let x_idx = chars
@@ -245,9 +271,8 @@ impl Grid {
if let Ok(y_idx) = nums.parse::<usize>() { if let Ok(y_idx) = nums.parse::<usize>() {
return Some((x_idx, y_idx)); return Some((x_idx, y_idx));
} else { } else {
return None return None;
} }
} }
/// Helper for tests /// Helper for tests
@@ -258,7 +283,7 @@ impl Grid {
} }
} }
pub fn set_cell_raw<T: Into<CellType>>(&mut self, (x,y): (usize, usize), val: Option<T>) { pub fn set_cell_raw<T: Into<CellType>>(&mut self, (x, y): (usize, usize), val: Option<T>) {
// TODO check oob // TODO check oob
self.cells[x][y] = val.map(|v| v.into()); self.cells[x][y] = val.map(|v| v.into());
self.dirty = true; self.dirty = true;
@@ -270,35 +295,34 @@ impl Grid {
/// etc /// etc
pub fn get_cell(&self, cell_id: &str) -> &Option<CellType> { pub fn get_cell(&self, cell_id: &str) -> &Option<CellType> {
if let Some((x, y)) = Self::parse_to_idx(cell_id) { if let Some((x, y)) = Self::parse_to_idx(cell_id) {
return self.get_cell_raw(x, y) return self.get_cell_raw(x, y);
} }
&None &None
} }
pub fn get_cell_raw(&self, x: usize, y: usize) -> &Option<CellType> { pub fn get_cell_raw(&self, x: usize, y: usize) -> &Option<CellType> {
if x >= LEN || y >= LEN { if x >= LEN || y >= LEN {
return &None return &None;
} }
&self.cells[x][y] &self.cells[x][y]
} }
pub fn num_to_char(idx: usize) -> String { pub fn num_to_char(idx: usize) -> String {
/* /*
A = 0 A = 0
AA = 26 AA = 26
AAA = Not going to worry about it yet AAA = Not going to worry about it yet
*/ */
let mut word: [char; 2] = [' '; 2]; let mut word: [char; 2] = [' '; 2];
if idx >= 26 { if idx >= 26 {
word[0]= ((idx/26) + 65 -1) as u8 as char; word[0] = ((idx / 26) + 65 - 1) as u8 as char;
} }
word[1]= ((idx%26) + 65) as u8 as char; word[1] = ((idx % 26) + 65) as u8 as char;
word.iter().collect() word.iter().collect()
} }
} }
impl Default for Grid { impl Default for Grid {
@@ -332,11 +356,7 @@ impl CellType {
if let Ok(parse) = value.parse::<f64>() { if let Ok(parse) = value.parse::<f64>() {
Self::Number(parse) Self::Number(parse)
} else { } else {
if value.starts_with('=') { if value.starts_with('=') { Self::Equation(value) } else { Self::String(value) }
Self::Equation(value)
} else {
Self::String(value)
}
} }
} }
} }
@@ -361,10 +381,7 @@ fn cell_strings() {
grid.set_cell("A0", "Hello".to_string()); grid.set_cell("A0", "Hello".to_string());
assert!(grid.get_cell("A0").is_some()); assert!(grid.get_cell("A0").is_some());
assert_eq!( assert_eq!(grid.get_cell("A0").as_ref().unwrap().to_string(), String::from("Hello"));
grid.get_cell("A0").as_ref().unwrap().to_string(),
String::from("Hello")
);
} }
// Testing if A0 -> 0,0 and if 0,0 -> A0 // Testing if A0 -> 0,0 and if 0,0 -> A0
@@ -376,7 +393,7 @@ fn alphanumeric_indexing() {
assert_eq!(Grid::parse_to_idx("A10"), Some((0, 10))); assert_eq!(Grid::parse_to_idx("A10"), Some((0, 10)));
assert_eq!(Grid::parse_to_idx("Aa10"), Some((26, 10))); assert_eq!(Grid::parse_to_idx("Aa10"), Some((26, 10)));
assert_eq!(Grid::parse_to_idx("invalid"), None); assert_eq!(Grid::parse_to_idx("invalid"), None);
assert_eq!(Grid::num_to_char(0).trim(), "A"); assert_eq!(Grid::num_to_char(0).trim(), "A");
assert_eq!(Grid::num_to_char(25).trim(), "Z"); assert_eq!(Grid::num_to_char(25).trim(), "Z");
assert_eq!(Grid::num_to_char(26), "AA"); assert_eq!(Grid::num_to_char(26), "AA");
@@ -402,13 +419,13 @@ fn valid_equations() {
grid.set_cell("D0", "=5./2.".to_string()); grid.set_cell("D0", "=5./2.".to_string());
let cell = grid.get_cell("D0").as_ref().expect("I just set this"); let cell = grid.get_cell("D0").as_ref().expect("I just set this");
let res = grid.evaluate(&cell.to_string()).expect("Should be ok"); let res = grid.evaluate(&cell.to_string()).expect("Should be ok");
assert_eq!(res, 2.5); assert_eq!(res, 2.5);
// Float / Int mix // Float / Int mix
grid.set_cell("D0", "=5./2".to_string()); grid.set_cell("D0", "=5./2".to_string());
let cell = grid.get_cell("D0").as_ref().expect("I just set this"); let cell = grid.get_cell("D0").as_ref().expect("I just set this");
let res = grid.evaluate(&cell.to_string()).expect("Should be ok"); let res = grid.evaluate(&cell.to_string()).expect("Should be ok");
assert_eq!(res, 2.5); assert_eq!(res, 2.5);
// divide "ints" (should become floats) // divide "ints" (should become floats)
grid.set_cell("D0", "=5/2".to_string()); grid.set_cell("D0", "=5/2".to_string());
@@ -474,7 +491,6 @@ fn invalid_equations() {
let res = grid.evaluate(&cell.to_string()); let res = grid.evaluate(&cell.to_string());
assert!(res.is_ok()); assert!(res.is_ok());
assert!(res.is_ok_and(|v| v == 10.)); assert!(res.is_ok_and(|v| v == 10.));
} }
#[test] #[test]
@@ -567,3 +583,15 @@ fn sum_function() {
assert!(res.is_err()); assert!(res.is_err());
} }
#[test]
fn parse_csv() {
assert_eq!(Grid::parse_csv_line("1,2,3"), vec![Some("1".to_string()), Some("2".to_string()), Some("3".to_string())]);
assert_eq!(Grid::parse_csv_line("1,\",\",3"), vec![Some("1".to_string()), Some(",".to_string()), Some("3".to_string())]);
assert_eq!(Grid::parse_csv_line("1,she said \"\"wow\"\",3"), vec![Some("1".to_string()), Some("she said \"wow\"".to_string()), Some("3".to_string())]);
assert_eq!(Grid::parse_csv_line("1,\"she said \"\"hello, world\"\"\",3"), vec![Some("1".to_string()), Some("she said \"hello, world\"".to_string()), Some("3".to_string())]);
assert_eq!(Grid::parse_csv_line("1,she said \"\"hello world\"\"\"\",3"), vec![Some("1".to_string()), Some("she said \"hello world\"\"".to_string()), Some("3".to_string())]);
}