New parser #2

Merged
ThePerfectComputer merged 51 commits from new_parser into main 2022-08-01 21:00:00 +00:00
4 changed files with 109 additions and 62 deletions
Showing only changes of commit 4c7417c729 - Show all commits

View file

@ -9,4 +9,5 @@ edition = "2021"
num = "0.4" num = "0.4"
clap = { version = "3.1.8", features = ["derive"] } clap = { version = "3.1.8", features = ["derive"] }
chrono = "0.4" chrono = "0.4"
function_name = "0.3.0" function_name = "0.3.0"
itertools = "0.10.3"

View file

@ -14,7 +14,7 @@ fn main() -> std::io::Result<()> {
let args = Cli::parse(); let args = Cli::parse();
let file = File::open(&args.path)?; let file = File::open(&args.path)?;
dbg!(["hello", "goodbye", "myworld"].contains(&"myworlde")); // dbg!(["hello", "goodbye", "myworld"].contains(&"myworlde"));
// let mut word_gen = WordReader::new(file); // let mut word_gen = WordReader::new(file);
// let mut word_count = 0; // let mut word_count = 0;
@ -26,8 +26,8 @@ fn main() -> std::io::Result<()> {
// let word1 = "hello world"; // let word1 = "hello world";
// let word2 = "hello planet"; // let word2 = "hello planet";
// dbg!(&word1[0..6].len()); // dbg!(&word1[0..6].len());
dbg!(take_until("tea time now: and later", b':')); // dbg!(take_until("tea time now: and later", b':'));
// parse_vcd(file); parse_vcd(file);
// tag("my oh my"); // tag("my oh my");

View file

@ -1,4 +1,6 @@
use super::*; use super::*;
use chrono::prelude::*;
use itertools::Itertools;
use std::fs::File; use std::fs::File;
use ::function_name::named; use ::function_name::named;
@ -10,7 +12,7 @@ pub fn take_until<'a>(word : &'a str, pattern : u8) -> Option<(&'a str, Residual
for chr in word.as_bytes() { for chr in word.as_bytes() {
if (*chr == pattern) { if (*chr == pattern) {
return Some((&word[0..new_start], Residual(&word[new_start..]))); return Some((&word[0..new_start], Residual(&word[new_start+1..])));
} }
else { else {
new_start += 1; new_start += 1;
@ -37,21 +39,17 @@ fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
} }
#[named] #[named]
fn parse_date(word_reader : &mut WordReader) -> Result<(), String> { fn parse_date(
let mut parsed_day = false; word_and_ctx1 : (&str, Cursor),
let mut parsed_month = false; word_and_ctx2 : (&str, Cursor),
let mut parsed_date = false; word_and_ctx3 : (&str, Cursor),
let mut parsed_hh = false; word_and_ctx4 : (&str, Cursor),
let mut parsed_mm = false; word_and_ctx5 : (&str, Cursor),
let mut parsed_ss = false; ) -> Result<DateTime<Utc>, String> {
let mut parsed_year = false;
let mut parsed_end = false;
let day = { let day = {
// check for another word in the file // check for another word in the file
let (word, cursor) = word_reader.next_word().expect( let (word, cursor) = word_and_ctx1;
format!("reached end of file without parser leaving {}", function_name!()).as_str()
);
let days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]; let days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"];
if !days.contains(&word) { if !days.contains(&word) {
@ -66,9 +64,7 @@ fn parse_date(word_reader : &mut WordReader) -> Result<(), String> {
let month = { let month = {
// check for another word in the file // check for another word in the file
let (word, cursor) = word_reader.next_word().expect( let (word, cursor) = word_and_ctx2;
format!("reached end of file without parser leaving {}", function_name!()).as_str()
);
let months = [ let months = [
"Jan", "Feb", "Mar", "Apr", "Jan", "Feb", "Mar", "Apr",
@ -88,9 +84,7 @@ fn parse_date(word_reader : &mut WordReader) -> Result<(), String> {
let date = { let date = {
// check for another word in the file // check for another word in the file
let (word, cursor) = word_reader.next_word().expect( let (word, cursor) = word_and_ctx3;
format!("reached end of file without parser leaving {}", function_name!()).as_str()
);
let date : u8 = word.to_string().parse().unwrap(); let date : u8 = word.to_string().parse().unwrap();
@ -106,48 +100,63 @@ fn parse_date(word_reader : &mut WordReader) -> Result<(), String> {
}; };
let (hh, mm, ss) = { let (hh, mm, ss) = {
// check for another word in the file // get hour
let (word, cursor) = word_reader.next_word().expect( let (word, cursor) = word_and_ctx4;
format!("reached end of file without parser leaving {}", function_name!()).as_str()
);
let date : u8 = word.to_string().parse().unwrap(); let (hh, Residual(remainder)) = take_until(word, b':').unwrap();
// let hh = take_until(word, b':').unwrap(); let hh : u8 = hh.to_string().parse().unwrap();
if date > 31 { if hh > 23 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!()); let msg = format!("reached end of file without parser leaving {}\n", function_name!());
let msg2 = format!("{word} is not a valid date : must be between 0 and 31\n"); let msg2 = format!("{hh} is not a valid hour : must be between 0 and 23\n");
let msg3 = format!("failure location: {cursor:?}"); let msg3 = format!("failure location: {cursor:?}");
return Err(format!("{}{}{}", msg, msg2, msg3)) return Err(format!("{}{}{}", msg, msg2, msg3))
} }
("", "", "")
// get minute
let (mm, Residual(remainder)) = take_until(remainder, b':').unwrap();
let mm : u8 = mm.to_string().parse().unwrap();
if mm > 60 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!());
let msg2 = format!("{mm} is not a valid minute : must be between 0 and 60\n");
let msg3 = format!("failure location: {cursor:?}");
return Err(format!("{}{}{}", msg, msg2, msg3))
}
// get second
let ss : u8 = remainder.to_string().parse().unwrap();
if ss > 60 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!());
let msg2 = format!("{ss} is not a valid second : must be between 0 and 60\n");
let msg3 = format!("failure location: {cursor:?}");
return Err(format!("{}{}{}", msg, msg2, msg3))
}
(hh.to_string(), mm.to_string(), ss.to_string())
}; };
// else if !parsed_date { let year = {
// check for another word in the file
let (word, cursor) = word_and_ctx5;
word.to_string()
};
// } let date = Utc.datetime_from_str(
// else if !parsed_hh { format!("{day} {month} {date} {mm}:{hh}:{ss} {year}").as_str(),
"%a %b %e %T %Y").unwrap();
// } Ok(date)
// else if !parsed_mm {
// }
// else if !parsed_ss {
// }
// else if !parsed_year {
// }
// else if !parsed_end {
// }
Ok(())
} }
#[named] #[named]
fn parse_header(word_reader : &mut WordReader) -> Result<(), String> { fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
let mut header = Metadata {
date : None,
version : None,
timescale : (None, Timescale::unit)
};
loop { loop {
// check for another word in the file // check for another word in the file
let word = word_reader.next_word(); let word = word_reader.next_word();
@ -165,10 +174,47 @@ fn parse_header(word_reader : &mut WordReader) -> Result<(), String> {
// we hope that this word stars with a `$` // we hope that this word stars with a `$`
Some(ident) => { Some(ident) => {
match ident { match ident {
"date" => {println!("got date")} "date" => {
let err_msg = format!("reached end of file without parser leaving {}", function_name!());
// a date is typically composed of the 5 following words which can
// occur in any order:
// {Day, Month, Date(number in month), hh:mm:ss, year}.
// Thus, we must lookahead read the 5 next words, and try our date
// parser on 5! = 120 permutations of the 5 words.
//
// While looking ahead, if one of the 5 words in `$end`, we have to
// immediately stop trying to get more words.
let mut found_end = false;
let mut lookahead_5_words : Vec<(String, Cursor)> = Vec::new();
for word in 0..5 {
let (word, cursor) = word_reader.next_word().expect(err_msg.as_str());
let word = word.to_string();
match word.as_str() {
"$end" => {
found_end = true;
break;
}
_ => {
lookahead_5_words.push((word, cursor));
}
};
}
// we no longer attempt to parse date if we weren't able to lookahead 5
// words
if found_end {continue}
let iter = lookahead_5_words
.iter()
.permutations(lookahead_5_words.len());
// let parsed_date = parse_date(word_reader).unwrap();
// header.date = Some(parsed_date);
}
"version" => {println!("got version")} "version" => {println!("got version")}
"timescale" => {println!("got timescale")} "timescale" => {println!("got timescale")}
"scope" => {return Ok(())} "scope" => {break}
_ => {} _ => {}
} }
} }
@ -176,13 +222,13 @@ fn parse_header(word_reader : &mut WordReader) -> Result<(), String> {
None => {} None => {}
} }
} }
// Ok() return Ok(header)
} }
pub fn parse_vcd(file : File) { pub fn parse_vcd(file : File) {
let mut word_gen = WordReader::new(file); let mut word_gen = WordReader::new(file);
parse_header(&mut word_gen); let header = parse_header(&mut word_gen).unwrap();
dbg!(header);
} }

View file

@ -3,16 +3,16 @@ use chrono::prelude::*;
use num::BigInt; use num::BigInt;
#[derive(Debug)] #[derive(Debug)]
struct Version(String); pub(super) struct Version(String);
#[derive(Debug)] #[derive(Debug)]
enum Timescale {ps, ns, us, ms, s, unit} pub(super) enum Timescale {ps, ns, us, ms, s, unit}
#[derive(Debug)] #[derive(Debug)]
pub(super) struct Metadata { pub(super) struct Metadata {
date : Option<DateTime<Utc>>, pub(super) date : Option<DateTime<Utc>>,
version : Option<Version>, pub(super) version : Option<Version>,
timescale : (Option<u32>, Timescale)} pub(super) timescale : (Option<u32>, Timescale)}
#[derive(Debug)] #[derive(Debug)]
struct Scope_Idx(usize); struct Scope_Idx(usize);