From 14af6e94e386a46f31523160337dc4b1913b8732 Mon Sep 17 00:00:00 2001
From: Yehowshua Immanuel <programmed4jesus@gmail.com>
Date: Sat, 11 Jun 2022 00:01:53 -0400
Subject: [PATCH] now parsing date more robustly

---
 README.md        |   1 +
 src/vcd/parse.rs | 134 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 107 insertions(+), 28 deletions(-)
diff --git a/README.md b/README.md
index 76499fc..c8055ef 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ The first build of the program may take some time.
  - [ ] Consider what to do with don't care values
       will probably just convert them to strings for now.
  - [ ] Test for speed and see if stream of bytes is helpful
+ - [ ] Split ``parse.rs``. It's getting too large.
  - [ ] Include line and possible column numbers
  - [ ] Change states to lowercase
  - [ ] We need to start regression testing the parser over all files
diff --git a/src/vcd/parse.rs b/src/vcd/parse.rs
index 74ffb4c..ea9072a 100644
--- a/src/vcd/parse.rs
+++ b/src/vcd/parse.rs
@@ -40,11 +40,11 @@ fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
 
 #[named]
 fn parse_date(
-    word_and_ctx1 : (&str, Cursor),
-    word_and_ctx2 : (&str, Cursor),
-    word_and_ctx3 : (&str, Cursor),
-    word_and_ctx4 : (&str, Cursor),
-    word_and_ctx5 : (&str, Cursor),
+    word_and_ctx1 : (&str, &Cursor),
+    word_and_ctx2 : (&str, &Cursor),
+    word_and_ctx3 : (&str, &Cursor),
+    word_and_ctx4 : (&str, &Cursor),
+    word_and_ctx5 : (&str, &Cursor),
 ) -> Result<DateTime<Utc>, String> {
 
     let day = {
@@ -86,7 +86,11 @@ fn parse_date(
         // check for another word in the file
         let (word, cursor) = word_and_ctx3;
 
-        let date : u8 = word.to_string().parse().unwrap();
+        // let date : u8 = word.to_string().parse().unwrap();
+        let date : u8 = match word.to_string().parse() {
+            Ok(date) => date,
+            Err(_) => {return Err("".to_string())}
+        };
 
         if date > 31 {
             let msg  = format!("reached end of file without parser leaving {}\n", function_name!());
@@ -96,15 +100,17 @@ fn parse_date(
 
         }
 
-        word.to_string()
+        date.to_string()
     };
 
     let (hh, mm, ss) = {
         // get hour
         let (word, cursor) = word_and_ctx4;
 
-        let (hh, Residual(remainder)) = take_until(word, b':').unwrap();
-        let hh : u8 = hh.to_string().parse().unwrap();
+        let (hh, Residual(remainder)) = take_until(word, b':').ok_or("did not find colon")?;
+        let hh : u8 = hh.to_string()
+                        .parse()
+                        .map_err(|_| "failed to parse".to_string())?;
 
         if hh > 23 {
             let msg  = format!("reached end of file without parser leaving {}\n", function_name!());
@@ -114,8 +120,10 @@ fn parse_date(
         }
 
         // get minute
-        let (mm, Residual(remainder)) = take_until(remainder, b':').unwrap();
-        let mm : u8 = mm.to_string().parse().unwrap();
+        let (mm, Residual(remainder)) = take_until(remainder, b':').ok_or("did not find colon")?;
+        let mm : u8 = mm.to_string()
+                        .parse()
+                        .map_err(|_| "failed to parse".to_string())?;
 
         if mm > 60 {
             let msg  = format!("reached end of file without parser leaving {}\n", function_name!());
@@ -125,7 +133,10 @@ fn parse_date(
         }
 
         // get second
-        let ss : u8 = remainder.to_string().parse().unwrap();
+        // let ss : u8 = remainder.to_string().parse().unwrap();
+        let ss : u8 = remainder.to_string()
+                        .parse()
+                        .map_err(|_| "failed to parse".to_string())?;
 
         if ss > 60 {
             let msg  = format!("reached end of file without parser leaving {}\n", function_name!());
@@ -142,16 +153,51 @@ fn parse_date(
         word.to_string()
     };
 
-    let date = Utc.datetime_from_str(
-        format!("{day} {month} {date} {mm}:{hh}:{ss} {year}").as_str(),
-        "%a %b %e %T %Y").unwrap();
+    // unfortunately, the minutes, seconds, and hour could occur in an 
+    // unexpected order
+    let full_date = format!("{day} {month} {date} {mm}:{hh}:{ss} {year}");
+    let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
+    if full_date.is_ok() {
+        return Ok(full_date.unwrap())
+    }
+
+    let full_date = format!("{day} {month} {date} {mm}:{ss}:{hh} {year}");
+    let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
+    if full_date.is_ok() {
+        return Ok(full_date.unwrap())
+    }
+
+    let full_date = format!("{day} {month} {date} {ss}:{mm}:{hh} {year}");
+    let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
+    if full_date.is_ok() {
+        return Ok(full_date.unwrap())
+    }
+
+    let full_date = format!("{day} {month} {date} {ss}:{hh}:{mm} {year}");
+    let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
+    if full_date.is_ok() {
+        return Ok(full_date.unwrap())
+    }
+
+    let full_date = format!("{day} {month} {date} {hh}:{ss}:{mm} {year}");
+    let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
+    if full_date.is_ok() {
+        return Ok(full_date.unwrap())
+    }
+
+    let full_date = format!("{day} {month} {date} {hh}:{mm}:{ss} {year}");
+    let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
+    if full_date.is_ok() {
+        return Ok(full_date.unwrap())
+    }
+
+    Err("failed to parse dat".to_string())
 
-    Ok(date)
 }
 
 #[named]
-fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
-    let mut header = Metadata {
+fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
+    let mut metadata = Metadata {
         date : None,
         version : None,
         timescale : (None, Timescale::unit)
@@ -168,7 +214,6 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
 
         // destructure
         let (word, cursor) = word.unwrap();
-        let ident = tag(word, "$");
 
         match tag(word, "$") {
             // we hope that this word stars with a `$`
@@ -182,6 +227,11 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
                         // Thus, we must lookahead read the 5 next words, and try our date
                         // parser on 5! = 120 permutations of the 5 words.
                         //
+                        // It is also possible that within each permutation, the hours,
+                        // minutes, and seconds could be in an unusual order, which means
+                        // that we may search up to 6 different permutations oh hh::mm:ss,
+                        // for an upper bound total of 720 permutations
+                        //
                         // While looking ahead, if one of the 5 words in `$end`, we have to 
                         // immediately stop trying to get more words.
 
@@ -206,15 +256,43 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
                         // words
                         if found_end {continue}
 
-                        let iter =  lookahead_5_words
-                                    .iter()
-                                    .permutations(lookahead_5_words.len());
-                        // let parsed_date = parse_date(word_reader).unwrap();
-                        // header.date     = Some(parsed_date);
+                        let permutations =  lookahead_5_words
+                                            .iter()
+                                            .permutations(lookahead_5_words.len());
+                        
+                        // go ahead and search for a match amongst permuted date text
+                        for mut permutations in permutations {
+                            let (w1, s1) = permutations.pop().unwrap();
+                            let arg_1 = (&w1[..], s1);
+
+                            let (w2, s2) = permutations.pop().unwrap();
+                            let arg_2 = (&w2[..], s2);
+
+                            let (w3, s3) = permutations.pop().unwrap();
+                            let arg_3 = (&w3[..], s3);
+
+                            let (w4, s4) = permutations.pop().unwrap();
+                            let arg_4 = (&w4[..], s4);
+
+                            let (w5, s5) = permutations.pop().unwrap();
+                            let arg_5 = (&w5[..], s5);
+
+                            let parsed_date = parse_date(arg_1, arg_2, arg_3, arg_4, arg_5);
+
+                            // store date and exit loop if a match is found
+                            if parsed_date.is_ok() {
+                                metadata.date = Some(parsed_date.unwrap());
+                                break
+                            }
+
+                        }
                     }
-                    "version"   => {println!("got version")}
-                    "timescale" => {println!("got timescale")}
+                    "version"   => {println!("found version")}
+                    "timescale" => {println!("found timescale")}
+                    // in VCDs, the scope keyword indicates the end of the metadata section
                     "scope"     => {break}
+                    // we keep searching for words until we've found one of the following
+                    // keywords, ["version", "timescale", "scope"]
                     _ => {}
                 }
             }
@@ -223,12 +301,12 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
         }
 
     }
-    return Ok(header)
+    return Ok(metadata)
 }
 
 pub fn parse_vcd(file : File) {
     let mut word_gen = WordReader::new(file);
 
-    let header = parse_header(&mut word_gen).unwrap();
+    let header = parse_metadata(&mut word_gen).unwrap();
     dbg!(header);
 }
\ No newline at end of file