Skip to content

Commit fc3fa0e

Browse files
committed
Add 'strip_patterns' function
1 parent e2d3211 commit fc3fa0e

File tree

7 files changed

+46
-105
lines changed

7 files changed

+46
-105
lines changed

zspell/src/affix.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
mod node;
44
mod parse;
5-
mod rule;
5+
mod parse_rule;
66
mod types;
77

88
use std::collections::BTreeMap;

zspell/src/affix/parse.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ use std::sync::Arc;
88

99
use lazy_static::lazy_static;
1010
pub use node::AffixNode;
11+
pub use parse_rule::{ParsedRule, ParsedRuleGroup};
1112
use regex::Regex;
12-
pub use rule::{ParsedRule, ParsedRuleGroup};
1313

14-
use super::{node, rule};
14+
use super::{node, parse_rule};
1515
use crate::affix::{
1616
CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, Phonetic, RuleType,
1717
};
File renamed without changes.

zspell/src/dict/meta.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ pub enum Source {
5555
/// The full rule that created this
5656
rule: Arc<AfxRule>,
5757
/// Index of the relevant pattern within the rule. This could potentially be a reference
58-
/// but that might require a RefCell, and I don't want to risk reference
58+
/// but that might require a RefCell, and I don't want to risk reference cycles.
5959
pat_idx: usize,
6060
},
6161
/// This meta came from a .dic file, only contains morphinfo

zspell/src/dict/rule.rs

+16-10
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,14 @@ impl AfxRule {
9292
}
9393

9494
/// Do the opposite of [`apply_patterns`], try to strip this pattern from a word
95-
pub fn strip_patterns(&self, word: &str) {
96-
todo!()
95+
pub fn strip_patterns<'a>(
96+
&'a self,
97+
word: &'a str,
98+
) -> impl Iterator<Item = (usize, Cow<'a, str>)> + 'a {
99+
self.patterns
100+
.iter()
101+
.enumerate()
102+
.filter_map(|(idx, pat)| pat.strip_pattern(word, self.kind).map(|s| (idx, s)))
97103
}
98104

99105
pub(crate) fn patterns(&self) -> &[AfxRulePattern] {
@@ -190,23 +196,23 @@ impl AfxRulePattern {
190196
let Some(base) = dbg!(word.strip_prefix(self.affix.as_ref())) else {
191197
return None;
192198
};
193-
match &self.strip {
194-
Some(add_back) => Cow::Owned(format!("{add_back}{base}")),
195-
None => Cow::Borrowed(base),
196-
}
199+
self.strip.as_ref().map_or(Cow::Borrowed(base), |add_back| {
200+
Cow::Owned(format!("{add_back}{base}"))
201+
})
197202
}
198203
RuleType::Suffix => {
199204
let Some(base) = dbg!(word.strip_suffix(self.affix.as_ref())) else {
200205
return None;
201206
};
202-
match &self.strip {
203-
Some(add_back) => Cow::Owned(format!("{base}{add_back}")),
204-
None => Cow::Borrowed(base),
205-
}
207+
self.strip.as_ref().map_or(Cow::Borrowed(base), |add_back| {
208+
Cow::Owned(format!("{base}{add_back}"))
209+
})
206210
}
207211
};
208212

209213
if let Some(re) = &self.condition {
214+
// FIXME: we probably want to change this to return `None` if it turns out
215+
// these patterns come up
210216
debug_assert!(
211217
re.is_match(ret.as_ref()),
212218
"created word {ret} does not match {re:?}!"

zspell/src/dict/rules_apply.rs

-73
Original file line numberDiff line numberDiff line change
@@ -208,76 +208,3 @@ mod tests {
208208
// FIXME: do something with these
209209
}
210210
}
211-
212-
// TODO: evaluate this for hyphenation
213-
// mod peek_map {
214-
// use std::iter::Peekable;
215-
// use unicode_segmentation::UnicodeSegmentation;
216-
217-
// pub struct PeekMap<I: Iterator, F>(Peekable<I>, F);
218-
219-
// pub fn peek_map<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R>(
220-
// it: Peekable<I>,
221-
// f: F,
222-
// ) -> PeekMap<I, F> {
223-
// PeekMap(it, f)
224-
// }
225-
226-
// impl<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R> Iterator for PeekMap<I, F> {
227-
// type Item = R;
228-
// fn next(&mut self) -> Option<R> {
229-
// let x = self.0.next()?;
230-
// Some((self.1)(x, self.0.peek()))
231-
// }
232-
// }
233-
234-
// #[cfg(test)]
235-
// mod tests {
236-
// use super::*;
237-
238-
// #[test]
239-
// fn test_x() {
240-
// let s = "the quick brown. Fox Jum-ped -- where? 'over' (the) very-lazy dog";
241-
242-
// enum HyphenState {
243-
// None,
244-
// AwaitingHyphen(usize),
245-
// AwaitingWord(usize)
246-
// }
247-
248-
// let mut accum = HyphenState::None;
249-
250-
// let v: Vec<_> = peek_map(s.split_word_bound_indices().peekable(),
251-
// |(idx, w), next|{
252-
253-
// let c1 = w.chars().next().unwrap();
254-
// if !(c1.is_alphanumeric() || c1 == '-') {
255-
// accum = HyphenState::None;
256-
// return None;
257-
// }
258-
259-
// if let Some((nidx, nw)) = next {
260-
// // If our next item is a hyphen, start accumulating
261-
// if nw == "-" {
262-
// accum = HyphenState::AwaitingHyphen(idx);
263-
// return None;
264-
// }
265-
// }
266-
// match accum {
267-
// HyphenState::None => {
268-
// // No upcoming hyphen? Just return our value
269-
// Some((idx, w))
270-
// },
271-
// HyphenState::AwaitingHyphen(_) => {
272-
273-
// },
274-
// HyphenState::AwaitingWord(_) => todo!(),
275-
// }
276-
// }
277-
// ).collect();
278-
279-
// dbg!(v);
280-
281-
// }
282-
// }
283-
// }

zspell/src/dict/tests_rule.rs

+26-18
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,45 @@ use crate::affix::RuleType::{self, Prefix, Suffix};
44
#[test]
55
fn test_check_condition() {
66
let mut kind = RuleType::Suffix;
7-
let mut rule = AfxRulePattern::default();
8-
rule.set_pattern("[^aeiou]y", kind).unwrap();
7+
let mut rule_pat = AfxRulePattern::default();
8+
rule_pat.set_pattern("[^aeiou]y", kind).unwrap();
99

1010
// General tests, including with pattern in the middle
11-
assert!(rule.check_condition("xxxy"));
12-
assert!(!rule.check_condition("xxxay"));
13-
assert!(!rule.check_condition("xxxyxx"));
11+
assert!(rule_pat.check_condition("xxxy"));
12+
assert!(!rule_pat.check_condition("xxxay"));
13+
assert!(!rule_pat.check_condition("xxxyxx"));
1414

1515
// Test with prefix
1616
kind = RuleType::Prefix;
17-
rule.set_pattern("y[^aeiou]", kind).unwrap();
18-
assert!(rule.check_condition("yxxx"));
19-
assert!(!rule.check_condition("yaxxx"));
20-
assert!(!rule.check_condition("xxxyxxx"));
17+
rule_pat.set_pattern("y[^aeiou]", kind).unwrap();
18+
assert!(rule_pat.check_condition("yxxx"));
19+
assert!(!rule_pat.check_condition("yaxxx"));
20+
assert!(!rule_pat.check_condition("xxxyxxx"));
2121

2222
// Test other real rules
2323
kind = RuleType::Suffix;
24-
rule.set_pattern("[sxzh]", kind).unwrap();
25-
assert!(rule.check_condition("access"));
26-
assert!(rule.check_condition("abyss"));
27-
assert!(!rule.check_condition("accomplishment"));
28-
assert!(rule.check_condition("mmms"));
29-
assert!(!rule.check_condition("mmsmm"));
24+
rule_pat.set_pattern("[sxzh]", kind).unwrap();
25+
assert!(rule_pat.check_condition("access"));
26+
assert!(rule_pat.check_condition("abyss"));
27+
assert!(!rule_pat.check_condition("accomplishment"));
28+
assert!(rule_pat.check_condition("mmms"));
29+
assert!(!rule_pat.check_condition("mmsmm"));
3030

3131
// Check with default condition
32-
rule.set_pattern(".", kind).unwrap();
33-
assert!(rule.check_condition("xxx"));
32+
rule_pat.set_pattern(".", kind).unwrap();
33+
assert!(rule_pat.check_condition("xxx"));
3434
}
3535

3636
// affix, strip, condition, kind, input, output
37-
const RULE_PATTERNS: &[(&str, Option<&str>, &str, RuleType, &str, &str)] = &[
37+
type TestRulePattern = (
38+
&'static str,
39+
Option<&'static str>,
40+
&'static str,
41+
RuleType,
42+
&'static str,
43+
&'static str,
44+
);
45+
const RULE_PATTERNS: &[TestRulePattern] = &[
3846
("zzz", Some("y"), "[^aeiou]y", Suffix, "xxxy", "xxxzzz"),
3947
("zzz", Some("y"), "y[^aeiou]", Prefix, "yxxx", "zzzxxx"),
4048
("zzz", None, ".", Suffix, "xxx", "xxxzzz"),

0 commit comments

Comments
 (0)