Skip to content

Commit 63b51c8

Browse files
committed
Add 'strip_patterns' function
1 parent e2d3211 commit 63b51c8

File tree

7 files changed

+31
-96
lines changed

7 files changed

+31
-96
lines changed

zspell/src/affix.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
mod node;
44
mod parse;
5-
mod rule;
5+
mod parse_rule;
66
mod types;
77

88
use std::collections::BTreeMap;

zspell/src/affix/parse.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ use std::sync::Arc;
88

99
use lazy_static::lazy_static;
1010
pub use node::AffixNode;
11+
pub use parse_rule::{ParsedRule, ParsedRuleGroup};
1112
use regex::Regex;
12-
pub use rule::{ParsedRule, ParsedRuleGroup};
1313

14-
use super::{node, rule};
14+
use super::{node, parse_rule};
1515
use crate::affix::{
1616
CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, Phonetic, RuleType,
1717
};
File renamed without changes.

zspell/src/dict/meta.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ pub enum Source {
5555
/// The full rule that created this
5656
rule: Arc<AfxRule>,
5757
/// Index of the relevant pattern within the rule. This could potentially be a reference
58-
/// but that might require a RefCell, and I don't want to risk reference
58+
/// but that might require a RefCell, and I don't want to risk reference cycles.
5959
pat_idx: usize,
6060
},
6161
/// This meta came from a .dic file, only contains morphinfo

zspell/src/dict/rule.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,14 @@ impl AfxRule {
9292
}
9393

9494
/// Do the opposite of [`apply_patterns`], try to strip this pattern from a word
95-
pub fn strip_patterns(&self, word: &str) {
96-
todo!()
95+
pub fn strip_patterns<'a>(
96+
&'a self,
97+
word: &'a str,
98+
) -> impl Iterator<Item = (usize, Cow<'a, str>)> + 'a {
99+
self.patterns
100+
.iter()
101+
.enumerate()
102+
.filter_map(|(idx, pat)| pat.strip_pattern(word, self.kind).map(|s| (idx, s)))
97103
}
98104

99105
pub(crate) fn patterns(&self) -> &[AfxRulePattern] {
@@ -207,6 +213,8 @@ impl AfxRulePattern {
207213
};
208214

209215
if let Some(re) = &self.condition {
216+
// FIXME: we probably want to change this to return `None` if it turns out
217+
// these patterns come up
210218
debug_assert!(
211219
re.is_match(ret.as_ref()),
212220
"created word {ret} does not match {re:?}!"

zspell/src/dict/rules_apply.rs

-73
Original file line numberDiff line numberDiff line change
@@ -208,76 +208,3 @@ mod tests {
208208
// FIXME: do something with these
209209
}
210210
}
211-
212-
// TODO: evaluate this for hyphenation
213-
// mod peek_map {
214-
// use std::iter::Peekable;
215-
// use unicode_segmentation::UnicodeSegmentation;
216-
217-
// pub struct PeekMap<I: Iterator, F>(Peekable<I>, F);
218-
219-
// pub fn peek_map<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R>(
220-
// it: Peekable<I>,
221-
// f: F,
222-
// ) -> PeekMap<I, F> {
223-
// PeekMap(it, f)
224-
// }
225-
226-
// impl<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R> Iterator for PeekMap<I, F> {
227-
// type Item = R;
228-
// fn next(&mut self) -> Option<R> {
229-
// let x = self.0.next()?;
230-
// Some((self.1)(x, self.0.peek()))
231-
// }
232-
// }
233-
234-
// #[cfg(test)]
235-
// mod tests {
236-
// use super::*;
237-
238-
// #[test]
239-
// fn test_x() {
240-
// let s = "the quick brown. Fox Jum-ped -- where? 'over' (the) very-lazy dog";
241-
242-
// enum HyphenState {
243-
// None,
244-
// AwaitingHyphen(usize),
245-
// AwaitingWord(usize)
246-
// }
247-
248-
// let mut accum = HyphenState::None;
249-
250-
// let v: Vec<_> = peek_map(s.split_word_bound_indices().peekable(),
251-
// |(idx, w), next|{
252-
253-
// let c1 = w.chars().next().unwrap();
254-
// if !(c1.is_alphanumeric() || c1 == '-') {
255-
// accum = HyphenState::None;
256-
// return None;
257-
// }
258-
259-
// if let Some((nidx, nw)) = next {
260-
// // If our next item is a hyphen, start accumulating
261-
// if nw == "-" {
262-
// accum = HyphenState::AwaitingHyphen(idx);
263-
// return None;
264-
// }
265-
// }
266-
// match accum {
267-
// HyphenState::None => {
268-
// // No upcoming hyphen? Just return our value
269-
// Some((idx, w))
270-
// },
271-
// HyphenState::AwaitingHyphen(_) => {
272-
273-
// },
274-
// HyphenState::AwaitingWord(_) => todo!(),
275-
// }
276-
// }
277-
// ).collect();
278-
279-
// dbg!(v);
280-
281-
// }
282-
// }
283-
// }

zspell/src/dict/tests_rule.rs

+17-17
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,33 @@ use crate::affix::RuleType::{self, Prefix, Suffix};
44
#[test]
55
fn test_check_condition() {
66
let mut kind = RuleType::Suffix;
7-
let mut rule = AfxRulePattern::default();
8-
rule.set_pattern("[^aeiou]y", kind).unwrap();
7+
let mut rule_pat = AfxRulePattern::default();
8+
rule_pat.set_pattern("[^aeiou]y", kind).unwrap();
99

1010
// General tests, including with pattern in the middle
11-
assert!(rule.check_condition("xxxy"));
12-
assert!(!rule.check_condition("xxxay"));
13-
assert!(!rule.check_condition("xxxyxx"));
11+
assert!(rule_pat.check_condition("xxxy"));
12+
assert!(!rule_pat.check_condition("xxxay"));
13+
assert!(!rule_pat.check_condition("xxxyxx"));
1414

1515
// Test with prefix
1616
kind = RuleType::Prefix;
17-
rule.set_pattern("y[^aeiou]", kind).unwrap();
18-
assert!(rule.check_condition("yxxx"));
19-
assert!(!rule.check_condition("yaxxx"));
20-
assert!(!rule.check_condition("xxxyxxx"));
17+
rule_pat.set_pattern("y[^aeiou]", kind).unwrap();
18+
assert!(rule_pat.check_condition("yxxx"));
19+
assert!(!rule_pat.check_condition("yaxxx"));
20+
assert!(!rule_pat.check_condition("xxxyxxx"));
2121

2222
// Test other real rules
2323
kind = RuleType::Suffix;
24-
rule.set_pattern("[sxzh]", kind).unwrap();
25-
assert!(rule.check_condition("access"));
26-
assert!(rule.check_condition("abyss"));
27-
assert!(!rule.check_condition("accomplishment"));
28-
assert!(rule.check_condition("mmms"));
29-
assert!(!rule.check_condition("mmsmm"));
24+
rule_pat.set_pattern("[sxzh]", kind).unwrap();
25+
assert!(rule_pat.check_condition("access"));
26+
assert!(rule_pat.check_condition("abyss"));
27+
assert!(!rule_pat.check_condition("accomplishment"));
28+
assert!(rule_pat.check_condition("mmms"));
29+
assert!(!rule_pat.check_condition("mmsmm"));
3030

3131
// Check with default condition
32-
rule.set_pattern(".", kind).unwrap();
33-
assert!(rule.check_condition("xxx"));
32+
rule_pat.set_pattern(".", kind).unwrap();
33+
assert!(rule_pat.check_condition("xxx"));
3434
}
3535

3636
// affix, strip, condition, kind, input, output

0 commit comments

Comments
 (0)