Skip to content

Commit

Permalink
Refactor: Rename patterns to dfas in CompiledScannerMode struct
Browse files Browse the repository at this point in the history
  • Loading branch information
jsinger67 committed Sep 28, 2024
1 parent 9c2b2cb commit 85bc7bd
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 30 deletions.
2 changes: 1 addition & 1 deletion doc/scnr.puml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ package internal {
}
struct CompiledScannerMode {
~ name: String
~ patterns: Vec<(Regex, TerminalID)>
~ dfas: Vec<(CompiledDfa, TerminalID)>
~ transitions: Vec<(TerminalID, ScannerModeID)>
}
struct CompiledDfa {
Expand Down
6 changes: 3 additions & 3 deletions src/internal/compiled_scanner_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub(crate) struct CompiledScannerMode {
/// type numbers.
/// The priorities of the patterns are determined by their order in the vector. Lower indices
/// have higher priority if multiple patterns match the input and have the same length.
pub(crate) patterns: Vec<(CompiledDfa, TerminalID)>,
pub(crate) dfas: Vec<(CompiledDfa, TerminalID)>,
pub(crate) transitions: Vec<(TerminalID, ScannerModeID)>,
}

Expand Down Expand Up @@ -61,7 +61,7 @@ impl CompiledScannerMode {
)?;
Ok(Self {
name,
patterns,
dfas: patterns,
transitions,
})
}
Expand Down Expand Up @@ -115,7 +115,7 @@ mod tests {
CompiledScannerMode::try_from_scanner_mode(scanner_mode, &mut character_class_registry)
.unwrap();
assert_eq!(compiled_scanner_mode.name, "test");
assert_eq!(compiled_scanner_mode.patterns.len(), 1);
assert_eq!(compiled_scanner_mode.dfas.len(), 1);
assert_eq!(compiled_scanner_mode.transitions.len(), 1);
}

Expand Down
20 changes: 9 additions & 11 deletions src/internal/scanner_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl ScannerImpl {
/// It starts the search at the position of the given CharIndices iterator.
/// During the search, all DFAs are advanced in parallel by one character at a time.
pub(crate) fn find_from(&mut self, char_indices: std::str::CharIndices) -> Option<Match> {
let patterns = &mut self.scanner_modes[self.current_mode].patterns;
let patterns = &mut self.scanner_modes[self.current_mode].dfas;
for (dfa, _) in patterns.iter_mut() {
dfa.reset();
}
Expand Down Expand Up @@ -112,23 +112,21 @@ impl ScannerImpl {
/// It is called by the `peek_n` method of the `FindMatches` iterator on a copy of the
/// `CharIndices` iterator. Thus, the original `CharIndices` iterator is not advanced.
pub(crate) fn peek_from(&mut self, char_indices: std::str::CharIndices) -> Option<Match> {
let patterns = &mut self.scanner_modes[self.current_mode].patterns;
for (dfa, _) in patterns.iter_mut() {
let dfas = &mut self.scanner_modes[self.current_mode].dfas;
for (dfa, _) in dfas.iter_mut() {
dfa.reset();
}

// All indices of the DFAs that are still active.
let mut active_dfas = (0..patterns.len()).collect::<Vec<_>>();
let mut active_dfas = (0..dfas.len()).collect::<Vec<_>>();

for (i, c) in char_indices {
for dfa_index in &active_dfas {
patterns[*dfa_index]
.0
.advance(i, c, &*self.match_char_class);
dfas[*dfa_index].0.advance(i, c, &*self.match_char_class);
}

// We remove all DFAs from `active_dfas` that finished or did not find a match so far.
active_dfas.retain(|&dfa_index| patterns[dfa_index].0.search_for_longer_match());
active_dfas.retain(|&dfa_index| dfas[dfa_index].0.search_for_longer_match());

// If all DFAs have finished, we can stop the search.
if active_dfas.is_empty() {
Expand All @@ -145,7 +143,7 @@ impl ScannerImpl {
fn find_first_longest_match(&mut self) -> Option<Match> {
let mut current_match: Option<Match> = None;
{
let patterns = &self.scanner_modes[self.current_mode].patterns;
let patterns = &self.scanner_modes[self.current_mode].dfas;
for (dfa, tok_type) in patterns.iter() {
if let Some(dfa_match) = dfa.current_match() {
if current_match.is_none()
Expand Down Expand Up @@ -197,7 +195,7 @@ impl ScannerImpl {
pub(crate) fn log_compiled_dfas_as_dot(&self, modes: &[ScannerMode]) -> Result<()> {
use std::io::Read;
for (i, scanner_mode) in self.scanner_modes.iter().enumerate() {
for (j, (dfa, t)) in scanner_mode.patterns.iter().enumerate() {
for (j, (dfa, t)) in scanner_mode.dfas.iter().enumerate() {
debug!("Compiled DFA: Mode {} Pattern {} Token {}\n{}", i, j, t, {
let mut cursor = std::io::Cursor::new(Vec::new());
let title = format!(
Expand Down Expand Up @@ -234,7 +232,7 @@ impl ScannerImpl {
{
use std::fs::File;
for (i, scanner_mode) in self.scanner_modes.iter().enumerate() {
for (j, (dfa, t)) in scanner_mode.patterns.iter().enumerate() {
for (j, (dfa, t)) in scanner_mode.dfas.iter().enumerate() {
let title = format!(
"Compiled DFA {} - {}",
modes[i].name,
Expand Down
2 changes: 1 addition & 1 deletion src/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ mod tests {
.build()
.unwrap();
assert_eq!("INITIAL", scanner.inner.scanner_modes[0].name);
let compiled_dfa = &scanner.inner.scanner_modes[0].patterns[1].0;
let compiled_dfa = &scanner.inner.scanner_modes[0].dfas[1].0;

compiled_dfa_render_to!(
&compiled_dfa,
Expand Down
54 changes: 40 additions & 14 deletions src/scanner_builder.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{ scanner::Scanner, scanner_mode::ScannerMode, Result };
use crate::{scanner::Scanner, scanner_mode::ScannerMode, Result};

/// A builder for creating a scanner.
#[derive(Debug, Clone, Default)]
Expand All @@ -20,7 +20,9 @@ impl ScannerBuilder {
/// Adding more scanner modes as well as transitions between scanner modes are not supported.
/// Note that all previously added scanner modes will be ignored after calling this method.
pub fn add_patterns<P, S>(self, patterns: P) -> SimpleScannerBuilder
where P: IntoIterator<Item = S>, S: AsRef<str>
where
P: IntoIterator<Item = S>,
S: AsRef<str>,
{
SimpleScannerBuilder::new(patterns)
}
Expand Down Expand Up @@ -56,7 +58,11 @@ pub struct SimpleScannerBuilder {

impl SimpleScannerBuilder {
/// Creates a new simple scanner builder.
fn new<P, S>(patterns: P) -> Self where P: IntoIterator<Item = S>, S: AsRef<str> {
fn new<P, S>(patterns: P) -> Self
where
P: IntoIterator<Item = S>,
S: AsRef<str>,
{
let patterns = patterns
.into_iter()
.enumerate()
Expand Down Expand Up @@ -98,13 +104,20 @@ mod tests {
let scanner_mode = ScannerMode::new(
"INITIAL",
vec![(r"\r\n|\r|\n", 1), (r"(//.*(\r\n|\r|\n))", 3)],
vec![(1, 1), (3, 1)]
vec![(1, 1), (3, 1)],
);
let scanner = ScannerBuilder::new().add_scanner_mode(scanner_mode).build().unwrap();
let scanner = ScannerBuilder::new()
.add_scanner_mode(scanner_mode)
.build()
.unwrap();
assert_eq!("INITIAL", scanner.inner.scanner_modes[0].name);
let compiled_dfa = &scanner.inner.scanner_modes[0].patterns[1].0;
let compiled_dfa = &scanner.inner.scanner_modes[0].dfas[1].0;

compiled_dfa_render_to!(&compiled_dfa, "LineComment", scanner.inner.character_classes);
compiled_dfa_render_to!(
&compiled_dfa,
"LineComment",
scanner.inner.character_classes
);
}

#[test]
Expand All @@ -114,15 +127,22 @@ mod tests {
ScannerMode::new(
"INITIAL",
vec![(r"\r\n|\r|\n", 1), (r"(//.*(\r\n|\r|\n))", 3)],
vec![(1, 1), (3, 1)]
vec![(1, 1), (3, 1)],
),
ScannerMode::new("STRING", vec![(r#""[^"]*""#, 2)], vec![(2, 0)])
ScannerMode::new("STRING", vec![(r#""[^"]*""#, 2)], vec![(2, 0)]),
];
let scanner = ScannerBuilder::new().add_scanner_modes(&scanner_modes).build().unwrap();
let scanner = ScannerBuilder::new()
.add_scanner_modes(&scanner_modes)
.build()
.unwrap();
assert_eq!("INITIAL", scanner.inner.scanner_modes[0].name);
let compiled_dfa = &scanner.inner.scanner_modes[0].patterns[1].0;
let compiled_dfa = &scanner.inner.scanner_modes[0].dfas[1].0;

compiled_dfa_render_to!(&compiled_dfa, "LineComment", scanner.inner.character_classes);
compiled_dfa_render_to!(
&compiled_dfa,
"LineComment",
scanner.inner.character_classes
);
}

#[test]
Expand All @@ -143,9 +163,15 @@ mod tests {
assert_eq!(matches.len(), 4);
assert_eq!(matches[0].token_type(), 0);
assert_eq!(matches[1].token_type(), 1);
assert_eq!(&input[matches[1].span().range()].to_string().trim(), &"// Line comment1");
assert_eq!(
&input[matches[1].span().range()].to_string().trim(),
&"// Line comment1"
);
assert_eq!(matches[2].token_type(), 0);
assert_eq!(matches[3].token_type(), 1);
assert_eq!(&input[matches[3].span().range()].to_string().trim(), &"// Line comment2");
assert_eq!(
&input[matches[3].span().range()].to_string().trim(),
&"// Line comment2"
);
}
}

0 comments on commit 85bc7bd

Please sign in to comment.