20 |
22 |
21 /// Output file (defalt is stdout) |
23 /// Output file (defalt is stdout) |
22 #[arg(long, short = 'o')] |
24 #[arg(long, short = 'o')] |
23 output : Option<String>, |
25 output : Option<String>, |
24 |
26 |
|
27 #[clap(flatten)] |
|
28 config : Config |
|
29 } |
|
30 |
|
31 #[derive(Parser, Debug)] |
|
32 struct Config { |
25 #[arg(long, short = 'c')] |
33 #[arg(long, short = 'c')] |
26 /// Strip comments |
34 /// Strip comments |
27 strip_comments : bool, |
35 strip_comments : bool, |
28 |
36 |
29 #[arg(long, short = 'w')] |
37 #[arg(long, short = 'w')] |
30 /// Strip unnecessary whitespace |
38 /// Strip unnecessary whitespace |
31 strip_whitespace : bool, |
39 strip_whitespace : bool, |
32 } |
40 } |
33 |
41 |
34 #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
42 struct Context { |
35 enum Element { |
43 lineno : usize, |
36 Added, |
44 input_only_ws : bool, |
37 Deleted, |
45 cli : Config |
38 Replaced, |
46 } |
39 Other, |
47 |
40 Comment, |
48 type AnyChainRule = Box<dyn ChainRule>; |
41 } |
49 type AnyNestedRule = Box<dyn NestedRule>; |
42 |
50 |
43 #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
51 trait ChainRule { |
44 enum Status { |
52 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule; |
45 Output(Element), |
53 fn flush(self : Box<Self>, ctx : &Context); |
46 Ignore(Element), |
54 } |
47 Scan(Element, bool), |
55 |
48 } |
56 trait NestedRule : ChainRule { |
49 |
57 fn produce(&mut self, c : char, ctx : &Context); |
50 use Status::*; |
58 fn next(self : Box<Self>) -> AnyChainRule; |
51 use Element::*; |
59 fn produce_string(&mut self, s : String, ctx : &Context) { |
|
60 s.chars().for_each(|c| self.produce(c, ctx)); |
|
61 } |
|
62 fn start_ignored_comment(&mut self, c : char); |
|
63 } |
|
64 |
|
65 impl<W : Write + 'static> ChainRule for Out<W> { |
|
66 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
67 basic_consume(self, c, ctx, true) |
|
68 } |
|
69 fn flush(mut self : Box<Self>, _ctx : &Context) { |
|
70 self.output.flush().unwrap(); |
|
71 } |
|
72 } |
|
73 |
|
74 impl<W : Write + 'static> NestedRule for Out<W> { |
|
75 fn produce(&mut self, c : char, ctx : &Context) { |
|
76 if c == '\n' { |
|
77 self.line_end(ctx.cli.strip_whitespace, ctx.input_only_ws) |
|
78 } else if c.is_whitespace() { |
|
79 self.stored_whitespace.push(c); |
|
80 } else { |
|
81 write!(self.output, "{}{}", self.stored_whitespace, c).unwrap(); |
|
82 self.stored_whitespace.clear(); |
|
83 self.only_whitespace = false; |
|
84 self.whitespace_satisfied = false; |
|
85 self.par_satisfied = false; |
|
86 } |
|
87 } |
|
88 |
|
89 fn next(self : Box<Self>) -> AnyChainRule { |
|
90 self |
|
91 } |
|
92 |
|
93 fn start_ignored_comment(&mut self, c : char) { |
|
94 if self.stored_whitespace.is_empty() && !self.only_whitespace { |
|
95 // The marker needs to be inserted if there is to be no whitespace inserted |
|
96 write!(self.output, "{c}").unwrap(); |
|
97 self.whitespace_satisfied = false; |
|
98 self.par_satisfied = false; |
|
99 self.only_whitespace = false; |
|
100 } else if self.only_whitespace { |
|
101 self.ignored_comment_only_line = true |
|
102 } |
|
103 } |
|
104 } |
|
105 |
|
106 fn basic_consume(mut s : AnyNestedRule, c : char, ctx : &Context, print_end : bool) |
|
107 -> AnyChainRule { |
|
108 match c { |
|
109 '{' => { |
|
110 s.produce(c, ctx); |
|
111 Box::new(Group(s)) |
|
112 }, |
|
113 '}' => { |
|
114 if print_end { |
|
115 s.produce(c, ctx); |
|
116 } |
|
117 s.next() |
|
118 }, |
|
119 '\\' => { |
|
120 Box::new(CommandName{parent : s, command : "\\".to_string()}) |
|
121 }, |
|
122 '%' => { |
|
123 if !ctx.cli.strip_comments { |
|
124 s.produce(c, ctx); |
|
125 Box::new(Comment(s)) |
|
126 } else { |
|
127 s.start_ignored_comment(c); |
|
128 Box::new(IgnoreComment(s)) |
|
129 } |
|
130 }, |
|
131 _ => { |
|
132 s.produce(c, ctx); |
|
133 s |
|
134 } |
|
135 } |
|
136 } |
|
137 |
|
138 struct CommandName { |
|
139 parent : AnyNestedRule, |
|
140 command : String |
|
141 } |
|
142 |
|
143 impl ChainRule for CommandName { |
|
144 fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
145 match c { |
|
146 '}' | '{' | '\\' if self.command.len() <= 1 => { |
|
147 self.command.push(c); |
|
148 self.handle(ctx) |
|
149 }, |
|
150 c if c.is_alphanumeric() => { |
|
151 self.command.push(c); |
|
152 self |
|
153 }, |
|
154 c => { |
|
155 self.handle(ctx) |
|
156 .consume(c, ctx) |
|
157 } |
|
158 } |
|
159 } |
|
160 |
|
161 fn flush(self : Box<Self>, ctx : &Context) { |
|
162 self.handle(ctx) |
|
163 .flush(ctx) |
|
164 } |
|
165 } |
|
166 |
|
167 impl CommandName { |
|
168 fn handle(mut self, ctx : &Context) -> AnyChainRule { |
|
169 match self.command.as_str() { |
|
170 "\\added" => { |
|
171 Scan::new(Added(self.parent)) |
|
172 }, |
|
173 "\\replaced" => { |
|
174 Scan::new(Replaced(self.parent)) |
|
175 }, |
|
176 "\\deleted" => { |
|
177 Scan::new(Deleted(self.parent)) |
|
178 }, |
|
179 _ => { |
|
180 self.parent.produce_string(self.command, ctx); |
|
181 self.parent |
|
182 } |
|
183 } |
|
184 } |
|
185 } |
|
186 |
|
187 struct Comment(AnyNestedRule); |
|
188 |
|
189 impl ChainRule for Comment { |
|
190 fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
191 if c == '\n' { |
|
192 self.0.consume(c, ctx) |
|
193 } else { |
|
194 self.0.produce(c, ctx); |
|
195 self |
|
196 } |
|
197 } |
|
198 fn flush(self : Box<Self>, ctx : &Context) { |
|
199 self.0.flush(ctx) |
|
200 } |
|
201 } |
|
202 |
|
203 struct IgnoreComment(AnyChainRule); |
|
204 |
|
205 impl ChainRule for IgnoreComment { |
|
206 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
207 if c == '\n' { |
|
208 self.0.consume(c, ctx) |
|
209 } else { |
|
210 self |
|
211 } |
|
212 } |
|
213 fn flush(self : Box<Self>, ctx : &Context) { |
|
214 self.0.flush(ctx) |
|
215 } |
|
216 } |
|
217 |
|
218 struct Group(AnyNestedRule); |
|
219 |
|
220 impl ChainRule for Group { |
|
221 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
222 basic_consume(self, c, ctx, true) |
|
223 } |
|
224 fn flush(self : Box<Self>, ctx : &Context) { |
|
225 self.0.flush(ctx) |
|
226 } |
|
227 } |
|
228 |
|
229 impl NestedRule for Group { |
|
230 fn produce(&mut self, c : char, ctx : &Context) { |
|
231 self.0.produce(c, ctx) |
|
232 } |
|
233 fn next(self : Box<Self>) -> AnyChainRule { |
|
234 self.0 |
|
235 } |
|
236 fn start_ignored_comment(&mut self, c : char) { |
|
237 self.0.start_ignored_comment(c) |
|
238 } |
|
239 } |
|
240 |
|
241 struct Added(AnyNestedRule); |
|
242 |
|
243 impl ChainRule for Added { |
|
244 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
245 basic_consume(self, c, ctx, false) |
|
246 } |
|
247 fn flush(self : Box<Self>, ctx : &Context) { |
|
248 self.0.flush(ctx) |
|
249 } |
|
250 } |
|
251 |
|
252 impl NestedRule for Added { |
|
253 fn produce(&mut self, c : char, ctx : &Context) { |
|
254 self.0.produce(c, ctx) |
|
255 } |
|
256 fn next(self : Box<Self>) -> AnyChainRule { |
|
257 self.0 |
|
258 } |
|
259 fn start_ignored_comment(&mut self, c : char) { |
|
260 self.0.start_ignored_comment(c) |
|
261 } |
|
262 } |
|
263 struct Deleted(AnyNestedRule); |
|
264 |
|
265 impl ChainRule for Deleted { |
|
266 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
267 basic_consume(self, c, ctx, false) |
|
268 } |
|
269 fn flush(self : Box<Self>, ctx : &Context) { |
|
270 self.0.flush(ctx) |
|
271 } |
|
272 } |
|
273 |
|
274 impl NestedRule for Deleted { |
|
275 fn produce(&mut self, _c : char, _ctx : &Context) { |
|
276 } |
|
277 fn next(self : Box<Self>) -> AnyChainRule { |
|
278 self.0 |
|
279 } |
|
280 fn start_ignored_comment(&mut self, c : char) { |
|
281 self.0.start_ignored_comment(c) |
|
282 } |
|
283 } |
|
284 |
|
285 struct Replaced(AnyNestedRule); |
|
286 |
|
287 impl ChainRule for Replaced { |
|
288 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
289 basic_consume(self, c, ctx, false) |
|
290 } |
|
291 fn flush(self : Box<Self>, ctx : &Context) { |
|
292 self.0.flush(ctx) |
|
293 } |
|
294 } |
|
295 |
|
296 impl NestedRule for Replaced { |
|
297 fn produce(&mut self, c : char, ctx : &Context) { |
|
298 self.0.produce(c, ctx) |
|
299 } |
|
300 fn next(self : Box<Self>) -> AnyChainRule { |
|
301 Scan::new(Deleted(self.0)) |
|
302 } |
|
303 fn start_ignored_comment(&mut self, c : char) { |
|
304 self.0.start_ignored_comment(c) |
|
305 } |
|
306 } |
|
307 |
|
308 struct Scan(AnyNestedRule); |
|
309 |
|
310 impl ChainRule for Scan { |
|
311 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule { |
|
312 if c.is_whitespace() || c == '\n' { |
|
313 self |
|
314 } else if c == '{' { |
|
315 self.0 |
|
316 } else if c == '%' { |
|
317 Box::new(IgnoreComment(self)) |
|
318 } else { |
|
319 panic!("Non-whitespace character ({c}) separating arguments on \ |
|
320 line {lineno}", lineno = ctx.lineno) |
|
321 } |
|
322 } |
|
323 fn flush(self : Box<Self>, ctx : &Context) { |
|
324 self.0.flush(ctx) |
|
325 } |
|
326 } |
|
327 |
|
328 impl Scan { |
|
329 fn new<R : NestedRule + 'static>(r : R) -> Box<dyn ChainRule> { |
|
330 Box::new(Scan(Box::new(r))) |
|
331 } |
|
332 } |
|
333 |
52 |
334 |
53 struct Out<W : Write> { |
335 struct Out<W : Write> { |
54 only_whitespace : bool, |
336 only_whitespace : bool, |
55 stored_whitespace : String, |
337 stored_whitespace : String, |
56 output : W, |
338 output : W, |
57 stack : Vec<Status>, |
|
58 whitespace_satisfied : bool, |
339 whitespace_satisfied : bool, |
59 par_satisfied : bool, |
340 par_satisfied : bool, |
|
341 ignored_comment_only_line : bool |
60 } |
342 } |
61 |
343 |
62 impl<W : Write> Out<W> { |
344 impl<W : Write> Out<W> { |
63 fn current(&self) -> Status { |
|
64 self.stack.last().map_or(Output(Other), |s| *s) |
|
65 } |
|
66 |
|
67 fn raw_out(&mut self, c : char) { |
|
68 write!(self.output, "{}", c).unwrap(); |
|
69 } |
|
70 |
|
71 pub fn out(&mut self, c : char) { |
|
72 self.only_whitespace = false; |
|
73 write!(self.output, "{}{}", self.stored_whitespace, c).unwrap(); |
|
74 self.stored_whitespace.clear(); |
|
75 self.whitespace_satisfied = false; |
|
76 self.par_satisfied = false; |
|
77 } |
|
78 |
|
79 pub fn whitespace(&mut self, c : char) { |
|
80 self.stored_whitespace.push(c); |
|
81 } |
|
82 |
|
83 pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { |
345 pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { |
84 let cur = self.current(); |
|
85 let skip_linefeed = if input_only_ws { |
346 let skip_linefeed = if input_only_ws { |
86 // Need a paragraph break |
347 // Need a paragraph break |
87 strip_ws && self.par_satisfied |
348 strip_ws && self.par_satisfied |
88 } else if strip_ws { |
349 } else if strip_ws { |
89 self.only_whitespace && self.whitespace_satisfied |
350 self.only_whitespace && self.whitespace_satisfied |
90 } else if let Ignore(Comment) = cur { |
351 } else { |
91 // Skip comment-only lines if the comment is ignored |
352 // Skip comment-only lines if the comment is ignored |
92 self.only_whitespace |
353 self.ignored_comment_only_line |
93 } else if let Ignore(_) = cur { |
|
94 // Skip line feeds in ignored bits |
|
95 true |
|
96 } else { |
|
97 false |
|
98 }; |
354 }; |
99 |
355 |
100 if !skip_linefeed { |
356 if !skip_linefeed { |
101 if !strip_ws { |
357 if !strip_ws { |
102 write!(self.output, "{}", self.stored_whitespace).unwrap(); |
358 write!(self.output, "{}", self.stored_whitespace).unwrap(); |
103 } |
359 } |
104 self.raw_out('\n'); |
360 write!(self.output, "\n").unwrap(); |
105 self.whitespace_satisfied = true; |
361 self.whitespace_satisfied = true; |
106 self.par_satisfied = self.only_whitespace; |
362 self.par_satisfied = self.only_whitespace; |
107 } |
363 } |
108 |
364 |
109 if let Ignore(Comment) | Output(Comment) = cur { |
|
110 self.stack.pop(); |
|
111 } |
|
112 |
|
113 self.stored_whitespace.clear(); |
365 self.stored_whitespace.clear(); |
114 self.only_whitespace = true; |
366 self.only_whitespace = true; |
115 } |
367 self.ignored_comment_only_line = false; |
116 |
|
117 pub fn flush(&mut self) { |
|
118 self.output.flush().unwrap(); |
|
119 } |
368 } |
120 } |
369 } |
121 |
370 |
122 fn main() { |
371 fn main() { |
123 let cli = CommandLineArgs::parse(); |
372 let cli = CommandLineArgs::parse(); |
128 let output = cli.output.map_or_else( |
377 let output = cli.output.map_or_else( |
129 || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>, |
378 || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>, |
130 |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write> |
379 |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write> |
131 ); |
380 ); |
132 |
381 |
133 let mut o = Out { |
382 let mut rule : Box<dyn ChainRule> = Box::new(Out { |
134 only_whitespace : true, |
383 only_whitespace : true, |
135 stored_whitespace : String::new(), |
384 stored_whitespace : String::new(), |
136 output, |
385 output, |
137 stack : Vec::new(), |
|
138 whitespace_satisfied : true, |
386 whitespace_satisfied : true, |
139 par_satisfied : true, |
387 par_satisfied : true, |
140 }; |
388 ignored_comment_only_line : false |
141 |
389 }); |
142 let mut lineno = 0; |
390 |
|
391 let mut ctx = Context{ lineno : 0, cli : cli.config, input_only_ws : true}; |
143 |
392 |
144 for l in input.lines().map(|l| l.unwrap()) { |
393 for l in input.lines().map(|l| l.unwrap()) { |
145 lineno += 1; |
394 ctx.lineno += 1; |
146 let mut chars = l.chars(); |
395 ctx.input_only_ws = true; |
147 let mut maybe_next_char = None; |
396 for c in l.chars() { |
148 let mut input_only_ws = true; |
397 ctx.input_only_ws = ctx.input_only_ws && c.is_whitespace(); |
149 |
398 rule = rule.consume(c, &ctx); |
150 'process_line: loop { |
399 } |
151 let next_char = match maybe_next_char { |
400 rule = rule.consume('\n', &ctx); |
152 None => chars.next(), |
401 } |
153 Some(c) => { |
402 |
154 maybe_next_char = None; |
403 rule.flush(&ctx); |
155 Some(c) |
404 } |
156 } |
|
157 }; |
|
158 input_only_ws = input_only_ws && next_char.map_or(true, |c| c.is_whitespace()); |
|
159 match(o.current(), next_char) { |
|
160 (_, None) => { |
|
161 break 'process_line; |
|
162 }, |
|
163 (st @ (Output(e) | Ignore(e)), Some('\\')) if e != Comment => { |
|
164 let mut command = String::new(); |
|
165 let mut first = true; |
|
166 maybe_next_char = 'scan_command: loop { |
|
167 match chars.next() { |
|
168 Some(c) if first && (c=='{' || c=='}' || c=='\\') => { |
|
169 command.push(c); |
|
170 break 'scan_command None; |
|
171 }, |
|
172 Some(c) if c.is_alphanumeric() => { |
|
173 command.push(c); |
|
174 }, |
|
175 maybe_c => { |
|
176 break 'scan_command maybe_c; |
|
177 } |
|
178 } |
|
179 first = false; |
|
180 }; |
|
181 let output_guard = if let Ignore(_) = st { false } else { true }; |
|
182 match command.as_str() { |
|
183 "added" => { |
|
184 o.stack.push(Scan(Added, true && output_guard)); |
|
185 }, |
|
186 "replaced" => { |
|
187 o.stack.push(Scan(Replaced, true && output_guard)); |
|
188 }, |
|
189 "deleted" => { |
|
190 o.stack.push(Scan(Deleted, false)); |
|
191 }, |
|
192 _ => { |
|
193 if output_guard { |
|
194 o.out('\\'); |
|
195 command.chars().for_each(|c| o.out(c.clone())); |
|
196 } |
|
197 } |
|
198 }; |
|
199 }, |
|
200 (Scan(next, out), Some(c)) => { |
|
201 match c { |
|
202 '{' => { |
|
203 o.stack.pop(); |
|
204 o.stack.push(if out { Output(next) } else { Ignore(next) }); |
|
205 }, |
|
206 ' ' => { |
|
207 }, |
|
208 _ => panic!("Non-whitespace character ({c}) separating arguments on\ |
|
209 line {lineno}"), |
|
210 } |
|
211 }, |
|
212 (Output(e), Some('{')) if e != Comment => { |
|
213 o.out('{'); |
|
214 o.stack.push(Output(Other)); |
|
215 }, |
|
216 (Ignore(e), Some('{')) if e != Comment => { |
|
217 o.stack.push(Ignore(Other)); |
|
218 }, |
|
219 (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => { |
|
220 o.stack.pop(); |
|
221 }, |
|
222 (Output(Replaced) | Ignore(Replaced), Some('}')) => { |
|
223 o.stack.pop(); |
|
224 o.stack.push(Scan(Deleted, false)); |
|
225 }, |
|
226 (Output(Other), Some('}')) => { |
|
227 o.out('}'); |
|
228 o.stack.pop(); |
|
229 }, |
|
230 (Ignore(e), Some('}')) if e != Comment => { |
|
231 o.stack.pop(); |
|
232 }, |
|
233 (Output(e), Some('%')) if e != Comment=> { |
|
234 if cli.strip_comments { |
|
235 if o.stored_whitespace.is_empty() && !o.only_whitespace { |
|
236 // Output comment marker if it is required to maintain |
|
237 // lack of whitespace. |
|
238 o.out('%'); |
|
239 } |
|
240 o.stack.push(Ignore(Comment)); |
|
241 } else { |
|
242 o.out('%'); |
|
243 o.stack.push(Output(Comment)); |
|
244 } |
|
245 }, |
|
246 (Ignore(e), Some('%')) if e != Comment => { |
|
247 o.stack.push(Ignore(Comment)); |
|
248 }, |
|
249 (Output(_), Some(c)) if c.is_whitespace() => { |
|
250 o.whitespace(c); |
|
251 }, |
|
252 (Output(_), Some(c)) => { |
|
253 o.out(c); |
|
254 }, |
|
255 (Ignore(_), Some(_)) => { |
|
256 }, |
|
257 }; |
|
258 } |
|
259 |
|
260 o.line_end(cli.strip_whitespace, input_only_ws); |
|
261 } |
|
262 |
|
263 o.flush(); |
|
264 } |
|