src/main.rs

changeset 6
de1cf8032322
parent 3
cec573b16b46
child 7
68538da191c7
equal deleted inserted replaced
5:3716d0eaa356 6:de1cf8032322
1 // The main documentation is in the README. 1 // The main documentation is in the README.
2 #![doc = include_str!("../README.md")] 2 #![doc = include_str!("../README.md")]
3
4 #![feature(trait_upcasting)]
3 5
4 use std::io; 6 use std::io;
5 use std::fs::File; 7 use std::fs::File;
6 use std::io::{BufWriter, BufRead, BufReader}; 8 use std::io::{BufWriter, BufRead, BufReader};
7 use std::io::Write; 9 use std::io::Write;
20 22
21 /// Output file (defalt is stdout) 23 /// Output file (defalt is stdout)
22 #[arg(long, short = 'o')] 24 #[arg(long, short = 'o')]
23 output : Option<String>, 25 output : Option<String>,
24 26
27 #[clap(flatten)]
28 config : Config
29 }
30
31 #[derive(Parser, Debug)]
32 struct Config {
25 #[arg(long, short = 'c')] 33 #[arg(long, short = 'c')]
26 /// Strip comments 34 /// Strip comments
27 strip_comments : bool, 35 strip_comments : bool,
28 36
29 #[arg(long, short = 'w')] 37 #[arg(long, short = 'w')]
30 /// Strip unnecessary whitespace 38 /// Strip unnecessary whitespace
31 strip_whitespace : bool, 39 strip_whitespace : bool,
32 } 40 }
33 41
34 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 42 struct Context {
35 enum Element { 43 lineno : usize,
36 Added, 44 input_only_ws : bool,
37 Deleted, 45 cli : Config
38 Replaced, 46 }
39 Other, 47
40 Comment, 48 type AnyChainRule = Box<dyn ChainRule>;
41 } 49 type AnyNestedRule = Box<dyn NestedRule>;
42 50
43 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 51 trait ChainRule {
44 enum Status { 52 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule;
45 Output(Element), 53 fn flush(self : Box<Self>, ctx : &Context);
46 Ignore(Element), 54 }
47 Scan(Element, bool), 55
48 } 56 trait NestedRule : ChainRule {
49 57 fn produce(&mut self, c : char, ctx : &Context);
50 use Status::*; 58 fn next(self : Box<Self>) -> AnyChainRule;
51 use Element::*; 59 fn produce_string(&mut self, s : String, ctx : &Context) {
60 s.chars().for_each(|c| self.produce(c, ctx));
61 }
62 fn start_ignored_comment(&mut self, c : char);
63 }
64
65 impl<W : Write + 'static> ChainRule for Out<W> {
66 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
67 basic_consume(self, c, ctx, true)
68 }
69 fn flush(mut self : Box<Self>, _ctx : &Context) {
70 self.output.flush().unwrap();
71 }
72 }
73
74 impl<W : Write + 'static> NestedRule for Out<W> {
75 fn produce(&mut self, c : char, ctx : &Context) {
76 if c == '\n' {
77 self.line_end(ctx.cli.strip_whitespace, ctx.input_only_ws)
78 } else if c.is_whitespace() {
79 self.stored_whitespace.push(c);
80 } else {
81 write!(self.output, "{}{}", self.stored_whitespace, c).unwrap();
82 self.stored_whitespace.clear();
83 self.only_whitespace = false;
84 self.whitespace_satisfied = false;
85 self.par_satisfied = false;
86 }
87 }
88
89 fn next(self : Box<Self>) -> AnyChainRule {
90 self
91 }
92
93 fn start_ignored_comment(&mut self, c : char) {
94 if self.stored_whitespace.is_empty() && !self.only_whitespace {
95 // The marker needs to be inserted if there is to be no whitespace inserted
96 write!(self.output, "{c}").unwrap();
97 self.whitespace_satisfied = false;
98 self.par_satisfied = false;
99 self.only_whitespace = false;
100 } else if self.only_whitespace {
101 self.ignored_comment_only_line = true
102 }
103 }
104 }
105
106 fn basic_consume(mut s : AnyNestedRule, c : char, ctx : &Context, print_end : bool)
107 -> AnyChainRule {
108 match c {
109 '{' => {
110 s.produce(c, ctx);
111 Box::new(Group(s))
112 },
113 '}' => {
114 if print_end {
115 s.produce(c, ctx);
116 }
117 s.next()
118 },
119 '\\' => {
120 Box::new(CommandName{parent : s, command : "\\".to_string()})
121 },
122 '%' => {
123 if !ctx.cli.strip_comments {
124 s.produce(c, ctx);
125 Box::new(Comment(s))
126 } else {
127 s.start_ignored_comment(c);
128 Box::new(IgnoreComment(s))
129 }
130 },
131 _ => {
132 s.produce(c, ctx);
133 s
134 }
135 }
136 }
137
138 struct CommandName {
139 parent : AnyNestedRule,
140 command : String
141 }
142
143 impl ChainRule for CommandName {
144 fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
145 match c {
146 '}' | '{' | '\\' if self.command.len() <= 1 => {
147 self.command.push(c);
148 self.handle(ctx)
149 },
150 c if c.is_alphanumeric() => {
151 self.command.push(c);
152 self
153 },
154 c => {
155 self.handle(ctx)
156 .consume(c, ctx)
157 }
158 }
159 }
160
161 fn flush(self : Box<Self>, ctx : &Context) {
162 self.handle(ctx)
163 .flush(ctx)
164 }
165 }
166
167 impl CommandName {
168 fn handle(mut self, ctx : &Context) -> AnyChainRule {
169 match self.command.as_str() {
170 "\\added" => {
171 Scan::new(Added(self.parent))
172 },
173 "\\replaced" => {
174 Scan::new(Replaced(self.parent))
175 },
176 "\\deleted" => {
177 Scan::new(Deleted(self.parent))
178 },
179 _ => {
180 self.parent.produce_string(self.command, ctx);
181 self.parent
182 }
183 }
184 }
185 }
186
187 struct Comment(AnyNestedRule);
188
189 impl ChainRule for Comment {
190 fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
191 if c == '\n' {
192 self.0.consume(c, ctx)
193 } else {
194 self.0.produce(c, ctx);
195 self
196 }
197 }
198 fn flush(self : Box<Self>, ctx : &Context) {
199 self.0.flush(ctx)
200 }
201 }
202
203 struct IgnoreComment(AnyChainRule);
204
205 impl ChainRule for IgnoreComment {
206 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
207 if c == '\n' {
208 self.0.consume(c, ctx)
209 } else {
210 self
211 }
212 }
213 fn flush(self : Box<Self>, ctx : &Context) {
214 self.0.flush(ctx)
215 }
216 }
217
218 struct Group(AnyNestedRule);
219
220 impl ChainRule for Group {
221 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
222 basic_consume(self, c, ctx, true)
223 }
224 fn flush(self : Box<Self>, ctx : &Context) {
225 self.0.flush(ctx)
226 }
227 }
228
229 impl NestedRule for Group {
230 fn produce(&mut self, c : char, ctx : &Context) {
231 self.0.produce(c, ctx)
232 }
233 fn next(self : Box<Self>) -> AnyChainRule {
234 self.0
235 }
236 fn start_ignored_comment(&mut self, c : char) {
237 self.0.start_ignored_comment(c)
238 }
239 }
240
241 struct Added(AnyNestedRule);
242
243 impl ChainRule for Added {
244 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
245 basic_consume(self, c, ctx, false)
246 }
247 fn flush(self : Box<Self>, ctx : &Context) {
248 self.0.flush(ctx)
249 }
250 }
251
252 impl NestedRule for Added {
253 fn produce(&mut self, c : char, ctx : &Context) {
254 self.0.produce(c, ctx)
255 }
256 fn next(self : Box<Self>) -> AnyChainRule {
257 self.0
258 }
259 fn start_ignored_comment(&mut self, c : char) {
260 self.0.start_ignored_comment(c)
261 }
262 }
263 struct Deleted(AnyNestedRule);
264
265 impl ChainRule for Deleted {
266 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
267 basic_consume(self, c, ctx, false)
268 }
269 fn flush(self : Box<Self>, ctx : &Context) {
270 self.0.flush(ctx)
271 }
272 }
273
274 impl NestedRule for Deleted {
275 fn produce(&mut self, _c : char, _ctx : &Context) {
276 }
277 fn next(self : Box<Self>) -> AnyChainRule {
278 self.0
279 }
280 fn start_ignored_comment(&mut self, c : char) {
281 self.0.start_ignored_comment(c)
282 }
283 }
284
285 struct Replaced(AnyNestedRule);
286
287 impl ChainRule for Replaced {
288 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
289 basic_consume(self, c, ctx, false)
290 }
291 fn flush(self : Box<Self>, ctx : &Context) {
292 self.0.flush(ctx)
293 }
294 }
295
296 impl NestedRule for Replaced {
297 fn produce(&mut self, c : char, ctx : &Context) {
298 self.0.produce(c, ctx)
299 }
300 fn next(self : Box<Self>) -> AnyChainRule {
301 Scan::new(Deleted(self.0))
302 }
303 fn start_ignored_comment(&mut self, c : char) {
304 self.0.start_ignored_comment(c)
305 }
306 }
307
308 struct Scan(AnyNestedRule);
309
310 impl ChainRule for Scan {
311 fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
312 if c.is_whitespace() || c == '\n' {
313 self
314 } else if c == '{' {
315 self.0
316 } else if c == '%' {
317 Box::new(IgnoreComment(self))
318 } else {
319 panic!("Non-whitespace character ({c}) separating arguments on \
320 line {lineno}", lineno = ctx.lineno)
321 }
322 }
323 fn flush(self : Box<Self>, ctx : &Context) {
324 self.0.flush(ctx)
325 }
326 }
327
328 impl Scan {
329 fn new<R : NestedRule + 'static>(r : R) -> Box<dyn ChainRule> {
330 Box::new(Scan(Box::new(r)))
331 }
332 }
333
52 334
53 struct Out<W : Write> { 335 struct Out<W : Write> {
54 only_whitespace : bool, 336 only_whitespace : bool,
55 stored_whitespace : String, 337 stored_whitespace : String,
56 output : W, 338 output : W,
57 stack : Vec<Status>,
58 whitespace_satisfied : bool, 339 whitespace_satisfied : bool,
59 par_satisfied : bool, 340 par_satisfied : bool,
341 ignored_comment_only_line : bool
60 } 342 }
61 343
62 impl<W : Write> Out<W> { 344 impl<W : Write> Out<W> {
63 fn current(&self) -> Status {
64 self.stack.last().map_or(Output(Other), |s| *s)
65 }
66
67 fn raw_out(&mut self, c : char) {
68 write!(self.output, "{}", c).unwrap();
69 }
70
71 pub fn out(&mut self, c : char) {
72 self.only_whitespace = false;
73 write!(self.output, "{}{}", self.stored_whitespace, c).unwrap();
74 self.stored_whitespace.clear();
75 self.whitespace_satisfied = false;
76 self.par_satisfied = false;
77 }
78
79 pub fn whitespace(&mut self, c : char) {
80 self.stored_whitespace.push(c);
81 }
82
83 pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { 345 pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) {
84 let cur = self.current();
85 let skip_linefeed = if input_only_ws { 346 let skip_linefeed = if input_only_ws {
86 // Need a paragraph break 347 // Need a paragraph break
87 strip_ws && self.par_satisfied 348 strip_ws && self.par_satisfied
88 } else if strip_ws { 349 } else if strip_ws {
89 self.only_whitespace && self.whitespace_satisfied 350 self.only_whitespace && self.whitespace_satisfied
90 } else if let Ignore(Comment) = cur { 351 } else {
91 // Skip comment-only lines if the comment is ignored 352 // Skip comment-only lines if the comment is ignored
92 self.only_whitespace 353 self.ignored_comment_only_line
93 } else if let Ignore(_) = cur {
94 // Skip line feeds in ignored bits
95 true
96 } else {
97 false
98 }; 354 };
99 355
100 if !skip_linefeed { 356 if !skip_linefeed {
101 if !strip_ws { 357 if !strip_ws {
102 write!(self.output, "{}", self.stored_whitespace).unwrap(); 358 write!(self.output, "{}", self.stored_whitespace).unwrap();
103 } 359 }
104 self.raw_out('\n'); 360 write!(self.output, "\n").unwrap();
105 self.whitespace_satisfied = true; 361 self.whitespace_satisfied = true;
106 self.par_satisfied = self.only_whitespace; 362 self.par_satisfied = self.only_whitespace;
107 } 363 }
108 364
109 if let Ignore(Comment) | Output(Comment) = cur {
110 self.stack.pop();
111 }
112
113 self.stored_whitespace.clear(); 365 self.stored_whitespace.clear();
114 self.only_whitespace = true; 366 self.only_whitespace = true;
115 } 367 self.ignored_comment_only_line = false;
116
117 pub fn flush(&mut self) {
118 self.output.flush().unwrap();
119 } 368 }
120 } 369 }
121 370
122 fn main() { 371 fn main() {
123 let cli = CommandLineArgs::parse(); 372 let cli = CommandLineArgs::parse();
128 let output = cli.output.map_or_else( 377 let output = cli.output.map_or_else(
129 || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>, 378 || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>,
130 |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write> 379 |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write>
131 ); 380 );
132 381
133 let mut o = Out { 382 let mut rule : Box<dyn ChainRule> = Box::new(Out {
134 only_whitespace : true, 383 only_whitespace : true,
135 stored_whitespace : String::new(), 384 stored_whitespace : String::new(),
136 output, 385 output,
137 stack : Vec::new(),
138 whitespace_satisfied : true, 386 whitespace_satisfied : true,
139 par_satisfied : true, 387 par_satisfied : true,
140 }; 388 ignored_comment_only_line : false
141 389 });
142 let mut lineno = 0; 390
391 let mut ctx = Context{ lineno : 0, cli : cli.config, input_only_ws : true};
143 392
144 for l in input.lines().map(|l| l.unwrap()) { 393 for l in input.lines().map(|l| l.unwrap()) {
145 lineno += 1; 394 ctx.lineno += 1;
146 let mut chars = l.chars(); 395 ctx.input_only_ws = true;
147 let mut maybe_next_char = None; 396 for c in l.chars() {
148 let mut input_only_ws = true; 397 ctx.input_only_ws = ctx.input_only_ws && c.is_whitespace();
149 398 rule = rule.consume(c, &ctx);
150 'process_line: loop { 399 }
151 let next_char = match maybe_next_char { 400 rule = rule.consume('\n', &ctx);
152 None => chars.next(), 401 }
153 Some(c) => { 402
154 maybe_next_char = None; 403 rule.flush(&ctx);
155 Some(c) 404 }
156 }
157 };
158 input_only_ws = input_only_ws && next_char.map_or(true, |c| c.is_whitespace());
159 match(o.current(), next_char) {
160 (_, None) => {
161 break 'process_line;
162 },
163 (st @ (Output(e) | Ignore(e)), Some('\\')) if e != Comment => {
164 let mut command = String::new();
165 let mut first = true;
166 maybe_next_char = 'scan_command: loop {
167 match chars.next() {
168 Some(c) if first && (c=='{' || c=='}' || c=='\\') => {
169 command.push(c);
170 break 'scan_command None;
171 },
172 Some(c) if c.is_alphanumeric() => {
173 command.push(c);
174 },
175 maybe_c => {
176 break 'scan_command maybe_c;
177 }
178 }
179 first = false;
180 };
181 let output_guard = if let Ignore(_) = st { false } else { true };
182 match command.as_str() {
183 "added" => {
184 o.stack.push(Scan(Added, true && output_guard));
185 },
186 "replaced" => {
187 o.stack.push(Scan(Replaced, true && output_guard));
188 },
189 "deleted" => {
190 o.stack.push(Scan(Deleted, false));
191 },
192 _ => {
193 if output_guard {
194 o.out('\\');
195 command.chars().for_each(|c| o.out(c.clone()));
196 }
197 }
198 };
199 },
200 (Scan(next, out), Some(c)) => {
201 match c {
202 '{' => {
203 o.stack.pop();
204 o.stack.push(if out { Output(next) } else { Ignore(next) });
205 },
206 ' ' => {
207 },
208 _ => panic!("Non-whitespace character ({c}) separating arguments on\
209 line {lineno}"),
210 }
211 },
212 (Output(e), Some('{')) if e != Comment => {
213 o.out('{');
214 o.stack.push(Output(Other));
215 },
216 (Ignore(e), Some('{')) if e != Comment => {
217 o.stack.push(Ignore(Other));
218 },
219 (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => {
220 o.stack.pop();
221 },
222 (Output(Replaced) | Ignore(Replaced), Some('}')) => {
223 o.stack.pop();
224 o.stack.push(Scan(Deleted, false));
225 },
226 (Output(Other), Some('}')) => {
227 o.out('}');
228 o.stack.pop();
229 },
230 (Ignore(e), Some('}')) if e != Comment => {
231 o.stack.pop();
232 },
233 (Output(e), Some('%')) if e != Comment=> {
234 if cli.strip_comments {
235 if o.stored_whitespace.is_empty() && !o.only_whitespace {
236 // Output comment marker if it is required to maintain
237 // lack of whitespace.
238 o.out('%');
239 }
240 o.stack.push(Ignore(Comment));
241 } else {
242 o.out('%');
243 o.stack.push(Output(Comment));
244 }
245 },
246 (Ignore(e), Some('%')) if e != Comment => {
247 o.stack.push(Ignore(Comment));
248 },
249 (Output(_), Some(c)) if c.is_whitespace() => {
250 o.whitespace(c);
251 },
252 (Output(_), Some(c)) => {
253 o.out(c);
254 },
255 (Ignore(_), Some(_)) => {
256 },
257 };
258 }
259
260 o.line_end(cli.strip_whitespace, input_only_ws);
261 }
262
263 o.flush();
264 }

mercurial