| 20 |
22 |
| 21 /// Output file (defalt is stdout) |
23 /// Output file (defalt is stdout) |
| 22 #[arg(long, short = 'o')] |
24 #[arg(long, short = 'o')] |
| 23 output : Option<String>, |
25 output : Option<String>, |
| 24 |
26 |
| |
27 #[clap(flatten)] |
| |
28 config : Config |
| |
29 } |
| |
30 |
| |
31 #[derive(Parser, Debug)] |
| |
32 struct Config { |
| 25 #[arg(long, short = 'c')] |
33 #[arg(long, short = 'c')] |
| 26 /// Strip comments |
34 /// Strip comments |
| 27 strip_comments : bool, |
35 strip_comments : bool, |
| 28 |
36 |
| 29 #[arg(long, short = 'w')] |
37 #[arg(long, short = 'w')] |
| 30 /// Strip unnecessary whitespace |
38 /// Strip unnecessary whitespace |
| 31 strip_whitespace : bool, |
39 strip_whitespace : bool, |
| 32 } |
40 } |
| 33 |
41 |
| 34 #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
42 struct Context { |
| 35 enum Element { |
43 lineno : usize, |
| 36 Added, |
44 input_only_ws : bool, |
| 37 Deleted, |
45 cli : Config |
| 38 Replaced, |
46 } |
| 39 Other, |
47 |
| 40 Comment, |
48 type AnyChainRule = Box<dyn ChainRule>; |
| 41 } |
49 type AnyNestedRule = Box<dyn NestedRule>; |
| 42 |
50 |
| 43 #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
51 trait ChainRule { |
| 44 enum Status { |
52 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule; |
| 45 Output(Element), |
53 fn flush(self : Box<Self>, ctx : &Context); |
| 46 Ignore(Element), |
54 } |
| 47 Scan(Element, bool), |
55 |
| 48 } |
56 trait NestedRule : ChainRule { |
| 49 |
57 fn produce(&mut self, c : char, ctx : &Context); |
| 50 use Status::*; |
58 fn next(self : Box<Self>) -> AnyChainRule; |
| 51 use Element::*; |
59 fn produce_string(&mut self, s : String, ctx : &Context) { |
| |
60 s.chars().for_each(|c| self.produce(c, ctx)); |
| |
61 } |
| |
62 fn start_ignored_comment(&mut self, c : char); |
| |
63 } |
| |
64 |
| |
65 impl<W : Write + 'static> ChainRule for Out<W> { |
| |
66 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
67 basic_consume(self, c, ctx, true) |
| |
68 } |
| |
69 fn flush(mut self : Box<Self>, _ctx : &Context) { |
| |
70 self.output.flush().unwrap(); |
| |
71 } |
| |
72 } |
| |
73 |
| |
74 impl<W : Write + 'static> NestedRule for Out<W> { |
| |
75 fn produce(&mut self, c : char, ctx : &Context) { |
| |
76 if c == '\n' { |
| |
77 self.line_end(ctx.cli.strip_whitespace, ctx.input_only_ws) |
| |
78 } else if c.is_whitespace() { |
| |
79 self.stored_whitespace.push(c); |
| |
80 } else { |
| |
81 write!(self.output, "{}{}", self.stored_whitespace, c).unwrap(); |
| |
82 self.stored_whitespace.clear(); |
| |
83 self.only_whitespace = false; |
| |
84 self.whitespace_satisfied = false; |
| |
85 self.par_satisfied = false; |
| |
86 } |
| |
87 } |
| |
88 |
| |
89 fn next(self : Box<Self>) -> AnyChainRule { |
| |
90 self |
| |
91 } |
| |
92 |
| |
93 fn start_ignored_comment(&mut self, c : char) { |
| |
94 if self.stored_whitespace.is_empty() && !self.only_whitespace { |
| |
95 // The marker needs to be inserted if there is to be no whitespace inserted |
| |
96 write!(self.output, "{c}").unwrap(); |
| |
97 self.whitespace_satisfied = false; |
| |
98 self.par_satisfied = false; |
| |
99 self.only_whitespace = false; |
| |
100 } else if self.only_whitespace { |
| |
101 self.ignored_comment_only_line = true |
| |
102 } |
| |
103 } |
| |
104 } |
| |
105 |
| |
106 fn basic_consume(mut s : AnyNestedRule, c : char, ctx : &Context, print_end : bool) |
| |
107 -> AnyChainRule { |
| |
108 match c { |
| |
109 '{' => { |
| |
110 s.produce(c, ctx); |
| |
111 Box::new(Group(s)) |
| |
112 }, |
| |
113 '}' => { |
| |
114 if print_end { |
| |
115 s.produce(c, ctx); |
| |
116 } |
| |
117 s.next() |
| |
118 }, |
| |
119 '\\' => { |
| |
120 Box::new(CommandName{parent : s, command : "\\".to_string()}) |
| |
121 }, |
| |
122 '%' => { |
| |
123 if !ctx.cli.strip_comments { |
| |
124 s.produce(c, ctx); |
| |
125 Box::new(Comment(s)) |
| |
126 } else { |
| |
127 s.start_ignored_comment(c); |
| |
128 Box::new(IgnoreComment(s)) |
| |
129 } |
| |
130 }, |
| |
131 _ => { |
| |
132 s.produce(c, ctx); |
| |
133 s |
| |
134 } |
| |
135 } |
| |
136 } |
| |
137 |
| |
138 struct CommandName { |
| |
139 parent : AnyNestedRule, |
| |
140 command : String |
| |
141 } |
| |
142 |
| |
143 impl ChainRule for CommandName { |
| |
144 fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
145 match c { |
| |
146 '}' | '{' | '\\' if self.command.len() <= 1 => { |
| |
147 self.command.push(c); |
| |
148 self.handle(ctx) |
| |
149 }, |
| |
150 c if c.is_alphanumeric() => { |
| |
151 self.command.push(c); |
| |
152 self |
| |
153 }, |
| |
154 c => { |
| |
155 self.handle(ctx) |
| |
156 .consume(c, ctx) |
| |
157 } |
| |
158 } |
| |
159 } |
| |
160 |
| |
161 fn flush(self : Box<Self>, ctx : &Context) { |
| |
162 self.handle(ctx) |
| |
163 .flush(ctx) |
| |
164 } |
| |
165 } |
| |
166 |
| |
167 impl CommandName { |
| |
168 fn handle(mut self, ctx : &Context) -> AnyChainRule { |
| |
169 match self.command.as_str() { |
| |
170 "\\added" => { |
| |
171 Scan::new(Added(self.parent)) |
| |
172 }, |
| |
173 "\\replaced" => { |
| |
174 Scan::new(Replaced(self.parent)) |
| |
175 }, |
| |
176 "\\deleted" => { |
| |
177 Scan::new(Deleted(self.parent)) |
| |
178 }, |
| |
179 _ => { |
| |
180 self.parent.produce_string(self.command, ctx); |
| |
181 self.parent |
| |
182 } |
| |
183 } |
| |
184 } |
| |
185 } |
| |
186 |
| |
187 struct Comment(AnyNestedRule); |
| |
188 |
| |
189 impl ChainRule for Comment { |
| |
190 fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
191 if c == '\n' { |
| |
192 self.0.consume(c, ctx) |
| |
193 } else { |
| |
194 self.0.produce(c, ctx); |
| |
195 self |
| |
196 } |
| |
197 } |
| |
198 fn flush(self : Box<Self>, ctx : &Context) { |
| |
199 self.0.flush(ctx) |
| |
200 } |
| |
201 } |
| |
202 |
| |
203 struct IgnoreComment(AnyChainRule); |
| |
204 |
| |
205 impl ChainRule for IgnoreComment { |
| |
206 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
207 if c == '\n' { |
| |
208 self.0.consume(c, ctx) |
| |
209 } else { |
| |
210 self |
| |
211 } |
| |
212 } |
| |
213 fn flush(self : Box<Self>, ctx : &Context) { |
| |
214 self.0.flush(ctx) |
| |
215 } |
| |
216 } |
| |
217 |
| |
218 struct Group(AnyNestedRule); |
| |
219 |
| |
220 impl ChainRule for Group { |
| |
221 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
222 basic_consume(self, c, ctx, true) |
| |
223 } |
| |
224 fn flush(self : Box<Self>, ctx : &Context) { |
| |
225 self.0.flush(ctx) |
| |
226 } |
| |
227 } |
| |
228 |
| |
229 impl NestedRule for Group { |
| |
230 fn produce(&mut self, c : char, ctx : &Context) { |
| |
231 self.0.produce(c, ctx) |
| |
232 } |
| |
233 fn next(self : Box<Self>) -> AnyChainRule { |
| |
234 self.0 |
| |
235 } |
| |
236 fn start_ignored_comment(&mut self, c : char) { |
| |
237 self.0.start_ignored_comment(c) |
| |
238 } |
| |
239 } |
| |
240 |
| |
241 struct Added(AnyNestedRule); |
| |
242 |
| |
243 impl ChainRule for Added { |
| |
244 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
245 basic_consume(self, c, ctx, false) |
| |
246 } |
| |
247 fn flush(self : Box<Self>, ctx : &Context) { |
| |
248 self.0.flush(ctx) |
| |
249 } |
| |
250 } |
| |
251 |
| |
252 impl NestedRule for Added { |
| |
253 fn produce(&mut self, c : char, ctx : &Context) { |
| |
254 self.0.produce(c, ctx) |
| |
255 } |
| |
256 fn next(self : Box<Self>) -> AnyChainRule { |
| |
257 self.0 |
| |
258 } |
| |
259 fn start_ignored_comment(&mut self, c : char) { |
| |
260 self.0.start_ignored_comment(c) |
| |
261 } |
| |
262 } |
| |
263 struct Deleted(AnyNestedRule); |
| |
264 |
| |
265 impl ChainRule for Deleted { |
| |
266 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
267 basic_consume(self, c, ctx, false) |
| |
268 } |
| |
269 fn flush(self : Box<Self>, ctx : &Context) { |
| |
270 self.0.flush(ctx) |
| |
271 } |
| |
272 } |
| |
273 |
| |
274 impl NestedRule for Deleted { |
| |
275 fn produce(&mut self, _c : char, _ctx : &Context) { |
| |
276 } |
| |
277 fn next(self : Box<Self>) -> AnyChainRule { |
| |
278 self.0 |
| |
279 } |
| |
280 fn start_ignored_comment(&mut self, c : char) { |
| |
281 self.0.start_ignored_comment(c) |
| |
282 } |
| |
283 } |
| |
284 |
| |
285 struct Replaced(AnyNestedRule); |
| |
286 |
| |
287 impl ChainRule for Replaced { |
| |
288 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
289 basic_consume(self, c, ctx, false) |
| |
290 } |
| |
291 fn flush(self : Box<Self>, ctx : &Context) { |
| |
292 self.0.flush(ctx) |
| |
293 } |
| |
294 } |
| |
295 |
| |
296 impl NestedRule for Replaced { |
| |
297 fn produce(&mut self, c : char, ctx : &Context) { |
| |
298 self.0.produce(c, ctx) |
| |
299 } |
| |
300 fn next(self : Box<Self>) -> AnyChainRule { |
| |
301 Scan::new(Deleted(self.0)) |
| |
302 } |
| |
303 fn start_ignored_comment(&mut self, c : char) { |
| |
304 self.0.start_ignored_comment(c) |
| |
305 } |
| |
306 } |
| |
307 |
| |
308 struct Scan(AnyNestedRule); |
| |
309 |
| |
310 impl ChainRule for Scan { |
| |
311 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
| |
312 if c.is_whitespace() || c == '\n' { |
| |
313 self |
| |
314 } else if c == '{' { |
| |
315 self.0 |
| |
316 } else if c == '%' { |
| |
317 Box::new(IgnoreComment(self)) |
| |
318 } else { |
| |
319 panic!("Non-whitespace character ({c}) separating arguments on \ |
| |
320 line {lineno}", lineno = ctx.lineno) |
| |
321 } |
| |
322 } |
| |
323 fn flush(self : Box<Self>, ctx : &Context) { |
| |
324 self.0.flush(ctx) |
| |
325 } |
| |
326 } |
| |
327 |
| |
328 impl Scan { |
| |
329 fn new<R : NestedRule + 'static>(r : R) -> Box<dyn ChainRule> { |
| |
330 Box::new(Scan(Box::new(r))) |
| |
331 } |
| |
332 } |
| |
333 |
| 52 |
334 |
| 53 struct Out<W : Write> { |
335 struct Out<W : Write> { |
| 54 only_whitespace : bool, |
336 only_whitespace : bool, |
| 55 stored_whitespace : String, |
337 stored_whitespace : String, |
| 56 output : W, |
338 output : W, |
| 57 stack : Vec<Status>, |
|
| 58 whitespace_satisfied : bool, |
339 whitespace_satisfied : bool, |
| 59 par_satisfied : bool, |
340 par_satisfied : bool, |
| |
341 ignored_comment_only_line : bool |
| 60 } |
342 } |
| 61 |
343 |
| 62 impl<W : Write> Out<W> { |
344 impl<W : Write> Out<W> { |
| 63 fn current(&self) -> Status { |
|
| 64 self.stack.last().map_or(Output(Other), |s| *s) |
|
| 65 } |
|
| 66 |
|
| 67 fn raw_out(&mut self, c : char) { |
|
| 68 write!(self.output, "{}", c).unwrap(); |
|
| 69 } |
|
| 70 |
|
| 71 pub fn out(&mut self, c : char) { |
|
| 72 self.only_whitespace = false; |
|
| 73 write!(self.output, "{}{}", self.stored_whitespace, c).unwrap(); |
|
| 74 self.stored_whitespace.clear(); |
|
| 75 self.whitespace_satisfied = false; |
|
| 76 self.par_satisfied = false; |
|
| 77 } |
|
| 78 |
|
| 79 pub fn whitespace(&mut self, c : char) { |
|
| 80 self.stored_whitespace.push(c); |
|
| 81 } |
|
| 82 |
|
| 83 pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { |
345 pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { |
| 84 let cur = self.current(); |
|
| 85 let skip_linefeed = if input_only_ws { |
346 let skip_linefeed = if input_only_ws { |
| 86 // Need a paragraph break |
347 // Need a paragraph break |
| 87 strip_ws && self.par_satisfied |
348 strip_ws && self.par_satisfied |
| 88 } else if strip_ws { |
349 } else if strip_ws { |
| 89 self.only_whitespace && self.whitespace_satisfied |
350 self.only_whitespace && self.whitespace_satisfied |
| 90 } else if let Ignore(Comment) = cur { |
351 } else { |
| 91 // Skip comment-only lines if the comment is ignored |
352 // Skip comment-only lines if the comment is ignored |
| 92 self.only_whitespace |
353 self.ignored_comment_only_line |
| 93 } else if let Ignore(_) = cur { |
|
| 94 // Skip line feeds in ignored bits |
|
| 95 true |
|
| 96 } else { |
|
| 97 false |
|
| 98 }; |
354 }; |
| 99 |
355 |
| 100 if !skip_linefeed { |
356 if !skip_linefeed { |
| 101 if !strip_ws { |
357 if !strip_ws { |
| 102 write!(self.output, "{}", self.stored_whitespace).unwrap(); |
358 write!(self.output, "{}", self.stored_whitespace).unwrap(); |
| 103 } |
359 } |
| 104 self.raw_out('\n'); |
360 write!(self.output, "\n").unwrap(); |
| 105 self.whitespace_satisfied = true; |
361 self.whitespace_satisfied = true; |
| 106 self.par_satisfied = self.only_whitespace; |
362 self.par_satisfied = self.only_whitespace; |
| 107 } |
363 } |
| 108 |
364 |
| 109 if let Ignore(Comment) | Output(Comment) = cur { |
|
| 110 self.stack.pop(); |
|
| 111 } |
|
| 112 |
|
| 113 self.stored_whitespace.clear(); |
365 self.stored_whitespace.clear(); |
| 114 self.only_whitespace = true; |
366 self.only_whitespace = true; |
| 115 } |
367 self.ignored_comment_only_line = false; |
| 116 |
|
| 117 pub fn flush(&mut self) { |
|
| 118 self.output.flush().unwrap(); |
|
| 119 } |
368 } |
| 120 } |
369 } |
| 121 |
370 |
| 122 fn main() { |
371 fn main() { |
| 123 let cli = CommandLineArgs::parse(); |
372 let cli = CommandLineArgs::parse(); |
| 128 let output = cli.output.map_or_else( |
377 let output = cli.output.map_or_else( |
| 129 || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>, |
378 || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>, |
| 130 |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write> |
379 |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write> |
| 131 ); |
380 ); |
| 132 |
381 |
| 133 let mut o = Out { |
382 let mut rule : Box<dyn ChainRule> = Box::new(Out { |
| 134 only_whitespace : true, |
383 only_whitespace : true, |
| 135 stored_whitespace : String::new(), |
384 stored_whitespace : String::new(), |
| 136 output, |
385 output, |
| 137 stack : Vec::new(), |
|
| 138 whitespace_satisfied : true, |
386 whitespace_satisfied : true, |
| 139 par_satisfied : true, |
387 par_satisfied : true, |
| 140 }; |
388 ignored_comment_only_line : false |
| 141 |
389 }); |
| 142 let mut lineno = 0; |
390 |
| |
391 let mut ctx = Context{ lineno : 0, cli : cli.config, input_only_ws : true}; |
| 143 |
392 |
| 144 for l in input.lines().map(|l| l.unwrap()) { |
393 for l in input.lines().map(|l| l.unwrap()) { |
| 145 lineno += 1; |
394 ctx.lineno += 1; |
| 146 let mut chars = l.chars(); |
395 ctx.input_only_ws = true; |
| 147 let mut maybe_next_char = None; |
396 for c in l.chars() { |
| 148 let mut input_only_ws = true; |
397 ctx.input_only_ws = ctx.input_only_ws && c.is_whitespace(); |
| 149 |
398 rule = rule.consume(c, &ctx); |
| 150 'process_line: loop { |
399 } |
| 151 let next_char = match maybe_next_char { |
400 rule = rule.consume('\n', &ctx); |
| 152 None => chars.next(), |
401 } |
| 153 Some(c) => { |
402 |
| 154 maybe_next_char = None; |
403 rule.flush(&ctx); |
| 155 Some(c) |
404 } |
| 156 } |
|
| 157 }; |
|
| 158 input_only_ws = input_only_ws && next_char.map_or(true, |c| c.is_whitespace()); |
|
| 159 match(o.current(), next_char) { |
|
| 160 (_, None) => { |
|
| 161 break 'process_line; |
|
| 162 }, |
|
| 163 (st @ (Output(e) | Ignore(e)), Some('\\')) if e != Comment => { |
|
| 164 let mut command = String::new(); |
|
| 165 let mut first = true; |
|
| 166 maybe_next_char = 'scan_command: loop { |
|
| 167 match chars.next() { |
|
| 168 Some(c) if first && (c=='{' || c=='}' || c=='\\') => { |
|
| 169 command.push(c); |
|
| 170 break 'scan_command None; |
|
| 171 }, |
|
| 172 Some(c) if c.is_alphanumeric() => { |
|
| 173 command.push(c); |
|
| 174 }, |
|
| 175 maybe_c => { |
|
| 176 break 'scan_command maybe_c; |
|
| 177 } |
|
| 178 } |
|
| 179 first = false; |
|
| 180 }; |
|
| 181 let output_guard = if let Ignore(_) = st { false } else { true }; |
|
| 182 match command.as_str() { |
|
| 183 "added" => { |
|
| 184 o.stack.push(Scan(Added, true && output_guard)); |
|
| 185 }, |
|
| 186 "replaced" => { |
|
| 187 o.stack.push(Scan(Replaced, true && output_guard)); |
|
| 188 }, |
|
| 189 "deleted" => { |
|
| 190 o.stack.push(Scan(Deleted, false)); |
|
| 191 }, |
|
| 192 _ => { |
|
| 193 if output_guard { |
|
| 194 o.out('\\'); |
|
| 195 command.chars().for_each(|c| o.out(c.clone())); |
|
| 196 } |
|
| 197 } |
|
| 198 }; |
|
| 199 }, |
|
| 200 (Scan(next, out), Some(c)) => { |
|
| 201 match c { |
|
| 202 '{' => { |
|
| 203 o.stack.pop(); |
|
| 204 o.stack.push(if out { Output(next) } else { Ignore(next) }); |
|
| 205 }, |
|
| 206 ' ' => { |
|
| 207 }, |
|
| 208 _ => panic!("Non-whitespace character ({c}) separating arguments on\ |
|
| 209 line {lineno}"), |
|
| 210 } |
|
| 211 }, |
|
| 212 (Output(e), Some('{')) if e != Comment => { |
|
| 213 o.out('{'); |
|
| 214 o.stack.push(Output(Other)); |
|
| 215 }, |
|
| 216 (Ignore(e), Some('{')) if e != Comment => { |
|
| 217 o.stack.push(Ignore(Other)); |
|
| 218 }, |
|
| 219 (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => { |
|
| 220 o.stack.pop(); |
|
| 221 }, |
|
| 222 (Output(Replaced) | Ignore(Replaced), Some('}')) => { |
|
| 223 o.stack.pop(); |
|
| 224 o.stack.push(Scan(Deleted, false)); |
|
| 225 }, |
|
| 226 (Output(Other), Some('}')) => { |
|
| 227 o.out('}'); |
|
| 228 o.stack.pop(); |
|
| 229 }, |
|
| 230 (Ignore(e), Some('}')) if e != Comment => { |
|
| 231 o.stack.pop(); |
|
| 232 }, |
|
| 233 (Output(e), Some('%')) if e != Comment=> { |
|
| 234 if cli.strip_comments { |
|
| 235 if o.stored_whitespace.is_empty() && !o.only_whitespace { |
|
| 236 // Output comment marker if it is required to maintain |
|
| 237 // lack of whitespace. |
|
| 238 o.out('%'); |
|
| 239 } |
|
| 240 o.stack.push(Ignore(Comment)); |
|
| 241 } else { |
|
| 242 o.out('%'); |
|
| 243 o.stack.push(Output(Comment)); |
|
| 244 } |
|
| 245 }, |
|
| 246 (Ignore(e), Some('%')) if e != Comment => { |
|
| 247 o.stack.push(Ignore(Comment)); |
|
| 248 }, |
|
| 249 (Output(_), Some(c)) if c.is_whitespace() => { |
|
| 250 o.whitespace(c); |
|
| 251 }, |
|
| 252 (Output(_), Some(c)) => { |
|
| 253 o.out(c); |
|
| 254 }, |
|
| 255 (Ignore(_), Some(_)) => { |
|
| 256 }, |
|
| 257 }; |
|
| 258 } |
|
| 259 |
|
| 260 o.line_end(cli.strip_whitespace, input_only_ws); |
|
| 261 } |
|
| 262 |
|
| 263 o.flush(); |
|
| 264 } |
|