2022-06-03 16:06:20 +00:00
|
|
|
use std::fs::File;
|
|
|
|
use std::collections::VecDeque;
|
|
|
|
use std::slice;
|
|
|
|
use std::str;
|
|
|
|
use std::io::prelude::*;
|
|
|
|
use std::io;
|
|
|
|
|
2022-08-04 17:19:52 +00:00
|
|
|
use backtrace::{ Backtrace, BacktraceFrame, BacktraceSymbol };
|
|
|
|
|
2022-07-26 01:16:15 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2022-07-20 02:05:00 +00:00
|
|
|
pub(super) struct Line(pub(super) usize);
|
2022-07-26 01:16:15 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2022-07-20 02:05:00 +00:00
|
|
|
pub(super) struct Word(pub(super) usize);
|
2022-07-26 01:16:15 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2022-07-20 02:05:00 +00:00
|
|
|
pub(super) struct Cursor(pub(super) Line, pub(super) Word);
|
2022-08-04 17:19:52 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub(super) enum FileStatus{Eof}
|
2022-06-03 16:06:20 +00:00
|
|
|
|
|
|
|
pub struct WordReader {
|
|
|
|
reader : io::BufReader<File>,
|
2022-08-02 23:31:35 +00:00
|
|
|
eof : bool,
|
2022-06-03 16:06:20 +00:00
|
|
|
buffers : Vec<String>,
|
|
|
|
curr_line : usize,
|
|
|
|
str_slices : VecDeque<(*const u8, usize, Cursor)>,
|
2022-07-26 01:16:15 +00:00
|
|
|
curr_slice : Option<(*const u8, usize, Cursor)>,
|
2022-06-03 16:06:20 +00:00
|
|
|
}
|
|
|
|
|
2022-08-04 17:19:52 +00:00
|
|
|
|
2022-06-03 16:06:20 +00:00
|
|
|
impl WordReader {
|
2022-07-13 00:02:45 +00:00
|
|
|
pub(super) fn new(file : File) -> WordReader {
|
2022-08-02 23:31:35 +00:00
|
|
|
let reader = io::BufReader::new(file);
|
2022-06-03 16:06:20 +00:00
|
|
|
WordReader {
|
|
|
|
reader : reader,
|
2022-08-02 23:31:35 +00:00
|
|
|
eof : false,
|
2022-06-03 16:06:20 +00:00
|
|
|
buffers : vec![],
|
|
|
|
curr_line : 0,
|
2022-07-26 01:16:15 +00:00
|
|
|
str_slices : VecDeque::new(),
|
|
|
|
curr_slice : None
|
2022-06-03 16:06:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-04 17:19:52 +00:00
|
|
|
|
|
|
|
pub(super) fn next_word(&mut self) -> Result<(&str, Cursor), FileStatus> {
|
|
|
|
|
|
|
|
// although reaching the eof is not technically an error, in most cases,
|
|
|
|
// we treat it like one in the rest of the codebase.
|
|
|
|
|
|
|
|
// if there are no more words in the buffer, attempt to read more content
|
2022-06-03 16:06:20 +00:00
|
|
|
// from the file
|
|
|
|
if self.str_slices.is_empty() {
|
|
|
|
self.buffers.clear();
|
|
|
|
|
2022-08-04 17:19:52 +00:00
|
|
|
if self.eof {return Err(FileStatus::Eof)}
|
2022-06-03 16:06:20 +00:00
|
|
|
|
|
|
|
let num_buffers = 10;
|
|
|
|
|
|
|
|
for buf_idx in 0..num_buffers {
|
|
|
|
self.buffers.push(String::new());
|
|
|
|
self.curr_line += 1;
|
|
|
|
let bytes_read = self.reader.read_line(&mut self.buffers[buf_idx]).unwrap();
|
|
|
|
|
|
|
|
// if we've reached the end of the file on the first attempt to read
|
|
|
|
// a line in this for loop, no further attempts are necessary and we
|
|
|
|
if bytes_read == 0 {
|
2022-08-02 23:31:35 +00:00
|
|
|
self.eof = true;
|
2022-06-03 16:06:20 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-08-02 23:31:35 +00:00
|
|
|
let words = self.buffers[buf_idx].split_ascii_whitespace();
|
2022-06-03 16:06:20 +00:00
|
|
|
|
|
|
|
for word in words.enumerate() {
|
|
|
|
let (word_idx, word) = word;
|
|
|
|
let position = Cursor(Line(self.curr_line), Word(word_idx + 1));
|
|
|
|
self.str_slices.push_back((word.as_ptr(), word.len(), position))
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if after we've attempted to read in more content from the file,
|
|
|
|
// there are still no words...
|
|
|
|
if self.str_slices.is_empty() {
|
2022-08-04 17:19:52 +00:00
|
|
|
return Err(FileStatus::Eof)
|
2022-06-03 16:06:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// if we make it here, we return the next word
|
|
|
|
unsafe {
|
|
|
|
let (ptr, len, position) = self.str_slices.pop_front().unwrap();
|
|
|
|
let slice = slice::from_raw_parts(ptr, len);
|
2022-07-26 01:16:15 +00:00
|
|
|
self.curr_slice = Some((ptr, len, position.clone()));
|
2022-08-04 17:19:52 +00:00
|
|
|
return Ok((str::from_utf8(slice).unwrap(), position));
|
2022-06-03 16:06:20 +00:00
|
|
|
};
|
|
|
|
}
|
2022-07-26 01:16:15 +00:00
|
|
|
|
2022-08-04 17:19:52 +00:00
|
|
|
pub(super) fn curr_word(&mut self) -> Result<(&str, Cursor), FileStatus> {
|
2022-07-26 01:16:15 +00:00
|
|
|
match &self.curr_slice {
|
|
|
|
Some(slice) => {
|
|
|
|
unsafe {
|
|
|
|
let (ptr, len, position) = slice.clone();
|
|
|
|
let slice = slice::from_raw_parts(ptr, len);
|
2022-08-04 17:19:52 +00:00
|
|
|
Ok((str::from_utf8(slice).unwrap(), position))
|
2022-07-26 01:16:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2022-08-04 17:19:52 +00:00
|
|
|
None => {Err(FileStatus::Eof)}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn previous_symbol(level: u32) -> Option<BacktraceSymbol> {
|
|
|
|
let (trace, curr_file, curr_line) = (Backtrace::new(), file!(), line!());
|
|
|
|
let frames = trace.frames();
|
|
|
|
frames.iter()
|
|
|
|
.flat_map(BacktraceFrame::symbols)
|
|
|
|
.skip_while(|s| s.filename().map(|p| !p.ends_with(curr_file)).unwrap_or(true)
|
|
|
|
|| s.lineno() != Some(curr_line))
|
|
|
|
.nth(1 + level as usize).cloned()
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<FileStatus> for String {
|
|
|
|
fn from(f: FileStatus) -> String {
|
2022-08-04 18:28:58 +00:00
|
|
|
let sym = previous_symbol(2);
|
2022-08-04 17:19:52 +00:00
|
|
|
let filename = sym
|
|
|
|
.as_ref()
|
|
|
|
.and_then(BacktraceSymbol::filename)
|
|
|
|
.map_or(None, |path| {path.to_str()})
|
|
|
|
.unwrap_or("(Couldn't determine filename)");
|
|
|
|
let lineno = sym
|
|
|
|
.as_ref()
|
|
|
|
.and_then(BacktraceSymbol::lineno)
|
|
|
|
.map_or(None, |path| {Some(path.to_string())})
|
|
|
|
.unwrap_or("(Couldn't determine line number)".to_string());
|
|
|
|
|
|
|
|
match f {
|
|
|
|
FileStatus::Eof => format!(
|
|
|
|
"Error near {filename}:{lineno} \
|
|
|
|
No more words left in vcd file."),
|
2022-07-26 01:16:15 +00:00
|
|
|
}
|
|
|
|
}
|
2022-06-03 16:06:20 +00:00
|
|
|
}
|