1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
use error::Error;
use nfa::{Nfa, NoLooks};
use runner::anchored::AnchoredEngine;
use runner::forward_backward::{ForwardBackwardEngine, Prefix};
use runner::Engine;
use std;
use std::fmt::Debug;
#[derive(Debug)]
pub struct Regex {
engine: Box<Engine<u8>>,
}
#[derive(Clone, Debug)]
struct EmptyEngine;
impl<Ret: Debug> Engine<Ret> for EmptyEngine {
fn find(&self, _: &str) -> Option<(usize, usize, Ret)> { None }
fn clone_box(&self) -> Box<Engine<Ret>> { Box::new(EmptyEngine) }
}
impl Clone for Regex {
fn clone(&self) -> Regex {
Regex {
engine: self.engine.clone_box(),
}
}
}
impl Regex {
pub fn new(re: &str) -> ::Result<Regex> {
Regex::new_bounded(re, std::usize::MAX)
}
pub fn new_bounded(re: &str, max_states: usize) -> ::Result<Regex> {
let nfa = try!(Nfa::from_regex(re));
let nfa = nfa.remove_looks();
let eng = if nfa.is_empty() {
Box::new(EmptyEngine) as Box<Engine<u8>>
} else if nfa.is_anchored() {
Box::new(try!(Regex::make_anchored(nfa, max_states))) as Box<Engine<u8>>
} else {
Box::new(try!(Regex::make_forward_backward(nfa, max_states))) as Box<Engine<u8>>
};
Ok(Regex { engine: eng })
}
fn make_anchored(nfa: Nfa<u32, NoLooks>, max_states: usize)
-> ::Result<AnchoredEngine<u8>> {
let nfa = try!(nfa.byte_me(max_states));
let dfa = try!(nfa.determinize(max_states))
.optimize()
.map_ret(|(_, bytes)| bytes);
let prog = dfa.compile();
Ok(AnchoredEngine::new(prog))
}
fn make_forward_backward(nfa: Nfa<u32, NoLooks>, max_states: usize)
-> ::Result<ForwardBackwardEngine<u8>> {
if nfa.is_anchored() {
return Err(Error::InvalidEngine("anchors rule out the forward-backward engine"));
}
let f_nfa = try!(try!(nfa.clone().byte_me(max_states)).anchor(max_states));
let b_nfa = try!(try!(nfa.byte_me(max_states)).reverse(max_states));
let f_dfa = try!(f_nfa.determinize(max_states)).optimize();
let b_dfa = try!(b_nfa.determinize_longest(max_states)).optimize();
let b_dfa = b_dfa.map_ret(|(_, bytes)| bytes);
let b_prog = b_dfa.compile();
let f_dfa = f_dfa.map_ret(|(look, bytes)| {
let b_dfa_state = b_dfa.init[look.as_usize()].expect("BUG: back dfa must have this init");
(b_dfa_state, bytes)
});
let mut f_prog = f_dfa.compile();
let prefix = Prefix::from_parts(f_dfa.prefix_strings());
match prefix {
Prefix::Empty => {},
_ => {
let f_dfa = f_dfa.cut_loop_to_init().optimize();
f_prog = f_dfa.compile();
},
}
Ok(ForwardBackwardEngine::new(f_prog, prefix, b_prog))
}
pub fn find(&self, s: &str) -> Option<(usize, usize)> {
if let Some((start, end, look_behind)) = self.engine.find(s) {
Some((start + look_behind as usize, end))
} else {
None
}
}
pub fn is_match(&self, s: &str) -> bool {
self.find(s).is_some()
}
}