FastWaveBackend/src/main.rs

use std::io::prelude::*;
use std::io;
use std::fs::File;
use std::collections::BTreeMap;
use chrono::prelude::*;
use ::function_name::named;

use num::*;
use clap::Parser;

use std::slice;
use std::str;

use std::collections::VecDeque;

#[derive(Parser)]
struct Cli {
    /// The path to the file to read
    #[clap(parse(from_os_str))]
    path: std::path::PathBuf}

#[derive(Debug)]
struct Version(String);

#[derive(Debug)]
enum Timescale {ps, ns, us, ms, s, unit}

#[derive(Debug)]
struct Metadata {
    date      : Option<DateTime<Utc>>,
    version   : Option<Version>,
    timescale : (Option<u32>, Timescale)}

#[derive(Debug)]
struct Scope_Idx(usize);

#[derive(Debug)]
struct Signal_Idx(usize);

#[derive(Debug)]
enum SignalGeneric{
    Signal{
        name           : String,
        timeline       : BTreeMap<BigInt, BigInt>,
        scope_parent   : Scope_Idx},
    SignalAlias{
        name          : String,
        signal_alias  : Signal_Idx}
}

#[derive(Debug)]
struct Scope {
    name          : String,
    child_signals : Vec<Signal_Idx>,
    child_scopes  : Vec<Scope_Idx>}


#[derive(Debug)]
struct VCD {
    metadata    : Metadata,
    all_signals : Vec<SignalGeneric>,
    // the root scope should always be placed at index 0
    all_scopes  : Vec<Scope>}

impl VCD {
    pub fn new() -> Self {
        let metadata = Metadata {
            date      : None,
            version   : None,
            timescale : (None, Timescale::unit)};
        VCD {
            metadata    : metadata,
            all_signals : Vec::<SignalGeneric>::new(),
            all_scopes  : Vec::<Scope>::new()}
        }
    }


#[derive(Debug)]
struct Line(usize);
#[derive(Debug)]
struct Word(usize);
#[derive(Debug)]
struct Cursor(Line, Word);

struct YieldByWord {
    reader       : io::BufReader<File>,
    EOF          : bool,
    buffers      : Vec<String>,
    curr_line    : usize,
    str_slices   : VecDeque<(*const u8, usize, Cursor)>,
}

impl YieldByWord {
    fn new(file : File) -> YieldByWord {
        let mut reader = io::BufReader::new(file);
        YieldByWord {
            reader       : reader,
            EOF          : false,
            buffers      : vec![],
            curr_line    : 0,
            str_slices   : VecDeque::new()
        }
    }

    fn next_word(&mut self) -> Option<(&str, Cursor)> {
        // if there are no more words, attempt to read more content
        // from the file
        if self.str_slices.is_empty() {
            self.buffers.clear();

            if self.EOF {return None}

            let num_buffers = 10;

            for buf_idx in 0..num_buffers {
                self.buffers.push(String::new());
                self.curr_line += 1;
                let bytes_read = self.reader.read_line(&mut self.buffers[buf_idx]).unwrap();

                // if we've reached the end of the file on the first attempt to read
                // a line in this for loop, no further attempts are necessary and we
                if bytes_read == 0 {
                    self.EOF = true; 
                    break;
                }

                let mut words = self.buffers[buf_idx].split_ascii_whitespace();
                
                for word in words.enumerate() {
                    let (word_idx, word) = word;
                    let position = Cursor(Line(self.curr_line), Word(word_idx + 1));
                    self.str_slices.push_back((word.as_ptr(), word.len(), position))
                }

            }
        }

        // if after we've attempted to read in more content from the file,
        // there are still no words...
        if self.str_slices.is_empty() {
            return None
        }

        // if we make it here, we return the next word
        unsafe {
            let (ptr, len, position) = self.str_slices.pop_front().unwrap();
            let slice = slice::from_raw_parts(ptr, len);
            return Some((str::from_utf8(slice).unwrap(), position));
        };
    }
}

fn main() -> std::io::Result<()> {
    let args = Cli::parse();

    let file           = File::open(&args.path)?;
    let mut word_gen   = YieldByWord::new(file);
    let mut word_count = 0;

    while word_gen.next_word().is_some() {
        word_count += 1;
    }
    dbg!(word_count);

    // loop {
    //     let word = word_gen.next_word();
    //     if word.is_none() {break};

    //     dbg!(word.unwrap());
    // }


    Ok(())
}
first commit 2022-04-14 04:50:37 +00:00			`use std::io::prelude::*;`
			`use std::io;`
			`use std::fs::File;`
now parsing by space 2022-05-18 02:04:32 +00:00			`use std::collections::BTreeMap;`
preliminary parser progress 2022-05-19 07:44:24 +00:00			`use chrono::prelude::*;`
notable refactoring and simplification; now able to parse version 2022-05-24 03:45:14 +00:00			`use ::function_name::named;`
first commit 2022-04-14 04:50:37 +00:00
			`use num::*;`
			`use clap::Parser;`

now using pointer and string slices 2022-06-02 20:51:56 +00:00			`use std::slice;`
			`use std::str;`

nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`use std::collections::VecDeque;`

first commit 2022-04-14 04:50:37 +00:00			`#[derive(Parser)]`
			`struct Cli {`
			`/// The path to the file to read`
			`#[clap(parse(from_os_str))]`
Some changes including: - modify data structures to support arenas - preliminary work on parser 2022-05-21 02:52:26 +00:00			`path: std::path::PathBuf}`
first commit 2022-04-14 04:50:37 +00:00
state machine seems to be working 2022-05-23 03:00:03 +00:00			`#[derive(Debug)]`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`struct Version(String);`
Some changes including: - modify data structures to support arenas - preliminary work on parser 2022-05-21 02:52:26 +00:00
state machine seems to be working 2022-05-23 03:00:03 +00:00			`#[derive(Debug)]`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`enum Timescale {ps, ns, us, ms, s, unit}`
preliminary parser progress 2022-05-19 07:44:24 +00:00
state machine seems to be working 2022-05-23 03:00:03 +00:00			`#[derive(Debug)]`
			`struct Metadata {`
cleaner types 2022-05-23 23:19:17 +00:00			`date : Option<DateTime<Utc>>,`
			`version : Option<Version>,`
now parses timelines 2022-05-28 00:48:17 +00:00			`timescale : (Option<u32>, Timescale)}`
preliminary parser progress 2022-05-19 07:44:24 +00:00
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`#[derive(Debug)]`
			`struct Scope_Idx(usize);`

			`#[derive(Debug)]`
			`struct Signal_Idx(usize);`

state machine seems to be working 2022-05-23 03:00:03 +00:00			`#[derive(Debug)]`
preliminary parser progress 2022-05-19 07:44:24 +00:00			`enum SignalGeneric{`
state machine seems to be working 2022-05-23 03:00:03 +00:00			`Signal{`
			`name : String,`
			`timeline : BTreeMap<BigInt, BigInt>,`
			`scope_parent : Scope_Idx},`
			`SignalAlias{`
			`name : String,`
			`signal_alias : Signal_Idx}`
			`}`
now parsing by space 2022-05-18 02:04:32 +00:00
state machine seems to be working 2022-05-23 03:00:03 +00:00			`#[derive(Debug)]`
preliminary parser progress 2022-05-19 07:44:24 +00:00			`struct Scope {`
Some changes including: - modify data structures to support arenas - preliminary work on parser 2022-05-21 02:52:26 +00:00			`name : String,`
			`child_signals : Vec<Signal_Idx>,`
			`child_scopes : Vec<Scope_Idx>}`
preliminary parser progress 2022-05-19 07:44:24 +00:00
state machine seems to be working 2022-05-23 03:00:03 +00:00
			`#[derive(Debug)]`
preliminary parser progress 2022-05-19 07:44:24 +00:00			`struct VCD {`
Some changes including: - modify data structures to support arenas - preliminary work on parser 2022-05-21 02:52:26 +00:00			`metadata : Metadata,`
			`all_signals : Vec<SignalGeneric>,`
			`// the root scope should always be placed at index 0`
			`all_scopes : Vec<Scope>}`

			`impl VCD {`
			`pub fn new() -> Self {`
			`let metadata = Metadata {`
cleaner types 2022-05-23 23:19:17 +00:00			`date : None,`
			`version : None,`
now parses timelines 2022-05-28 00:48:17 +00:00			`timescale : (None, Timescale::unit)};`
Some changes including: - modify data structures to support arenas - preliminary work on parser 2022-05-21 02:52:26 +00:00			`VCD {`
			`metadata : metadata,`
			`all_signals : Vec::<SignalGeneric>::new(),`
shutting down for the night 2022-05-24 03:59:57 +00:00			`all_scopes : Vec::<Scope>::new()}`
			`}`
			`}`
Some changes including: - modify data structures to support arenas - preliminary work on parser 2022-05-21 02:52:26 +00:00
this loop is too slow - but may be useful for future reference 2022-05-19 00:47:55 +00:00
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`#[derive(Debug)]`
			`struct Line(usize);`
			`#[derive(Debug)]`
			`struct Word(usize);`
			`#[derive(Debug)]`
			`struct Cursor(Line, Word);`
now we have an iterator - albeit somewhat slow 2022-05-19 02:57:42 +00:00
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`struct YieldByWord {`
			`reader : io::BufReader<File>,`
			`EOF : bool,`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`buffers : Vec<String>,`
			`curr_line : usize,`
			`str_slices : VecDeque<(*const u8, usize, Cursor)>,`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`}`

			`impl YieldByWord {`
			`fn new(file : File) -> YieldByWord {`
			`let mut reader = io::BufReader::new(file);`
			`YieldByWord {`
			`reader : reader,`
			`EOF : false,`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`buffers : vec![],`
			`curr_line : 0,`
			`str_slices : VecDeque::new()`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`}`
			`}`

nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`fn next_word(&mut self) -> Option<(&str, Cursor)> {`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`// if there are no more words, attempt to read more content`
			`// from the file`
			`if self.str_slices.is_empty() {`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`self.buffers.clear();`
now using pointer and string slices 2022-06-02 20:51:56 +00:00
			`if self.EOF {return None}`

nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`let num_buffers = 10;`

			`for buf_idx in 0..num_buffers {`
			`self.buffers.push(String::new());`
			`self.curr_line += 1;`
			`let bytes_read = self.reader.read_line(&mut self.buffers[buf_idx]).unwrap();`

			`// if we've reached the end of the file on the first attempt to read`
			`// a line in this for loop, no further attempts are necessary and we`
			`if bytes_read == 0 {`
			`self.EOF = true;`
			`break;`
			`}`

			`let mut words = self.buffers[buf_idx].split_ascii_whitespace();`

			`for word in words.enumerate() {`
			`let (word_idx, word) = word;`
			`let position = Cursor(Line(self.curr_line), Word(word_idx + 1));`
			`self.str_slices.push_back((word.as_ptr(), word.len(), position))`
			`}`
now using pointer and string slices 2022-06-02 20:51:56 +00:00
			`}`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`}`
now using pointer and string slices 2022-06-02 20:51:56 +00:00
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`// if after we've attempted to read in more content from the file,`
			`// there are still no words...`
			`if self.str_slices.is_empty() {`
			`return None`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`}`

			`// if we make it here, we return the next word`
			`unsafe {`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`let (ptr, len, position) = self.str_slices.pop_front().unwrap();`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`let slice = slice::from_raw_parts(ptr, len);`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`return Some((str::from_utf8(slice).unwrap(), position));`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`};`
			`}`
			`}`

now we have an iterator - albeit somewhat slow 2022-05-19 02:57:42 +00:00			`fn main() -> std::io::Result<()> {`
			`let args = Cli::parse();`

preliminary parser progress 2022-05-19 07:44:24 +00:00			`let file = File::open(&args.path)?;`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`let mut word_gen = YieldByWord::new(file);`
			`let mut word_count = 0;`

			`while word_gen.next_word().is_some() {`
			`word_count += 1;`
			`}`
			`dbg!(word_count);`

			`// loop {`
nearly as fast as wc and now yield words 2022-06-03 00:02:09 +00:00			`// let word = word_gen.next_word();`
			`// if word.is_none() {break};`

			`// dbg!(word.unwrap());`
now using pointer and string slices 2022-06-02 20:51:56 +00:00			`// }`

state machine seems to be working 2022-05-23 03:00:03 +00:00
first commit 2022-04-14 04:50:37 +00:00			`Ok(())`
This is starting to go somewhere and needs a re-factor 2022-05-21 19:22:05 +00:00			`}`