# HG changeset patch # User Tuomo Valkonen # Date 1697746212 18000 # Node ID 254e1e4bd79517d06421f9759bcb37de0c8535b2 # Parent a88aed2bdf13d1dbd4396ef7a47e3eba3846bcc1 Add whitespace and comment stripping diff -r a88aed2bdf13 -r 254e1e4bd795 Cargo.toml --- a/Cargo.toml Thu Oct 19 12:08:07 2023 -0500 +++ b/Cargo.toml Thu Oct 19 15:10:12 2023 -0500 @@ -13,4 +13,5 @@ ] [dependencies] +clap = { version = "~4.0.27", features = ["derive", "unicode", "wrap_help"] } diff -r a88aed2bdf13 -r 254e1e4bd795 src/main.rs --- a/src/main.rs Thu Oct 19 12:08:07 2023 -0500 +++ b/src/main.rs Thu Oct 19 15:10:12 2023 -0500 @@ -1,6 +1,27 @@ +// The main documentation is in the README. +#![doc = include_str!("../README.md")] + use std::io; use std::io::BufWriter; use std::io::Write; +use clap::Parser; + +/// Command line parameters +#[derive(Parser, Debug)] +#[clap( + about = env!("CARGO_PKG_DESCRIPTION"), + author = env!("CARGO_PKG_AUTHORS"), + version = env!("CARGO_PKG_VERSION"), +)] +struct CommandLineArgs { + #[arg(long, short = 'c')] + /// Strip comments + strip_comments : bool, + + #[arg(long, short = 'w')] + /// Strip unnecessary whitespace + strip_whitespace : bool, +} #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum Element { @@ -18,23 +39,99 @@ Scan(Element, bool), } -fn main() { - let input = io::stdin(); - let mut output = BufWriter::new(io::stdout()); - let mut status_stack = Vec::new(); +use Status::*; +use Element::*; + +struct Out { + only_whitespace : bool, + stored_whitespace : String, + output : W, + stack : Vec, + whitespace_satisfied : bool, + par_satisfied : bool, +} + +impl Out { + fn current(&self) -> Status { + self.stack.last().map_or(Output(Other), |s| *s) + } + + fn raw_out(&mut self, c : char) { + write!(self.output, "{}", c).unwrap(); + } + + pub fn out(&mut self, c : char) { + self.only_whitespace = false; + write!(self.output, "{}{}", self.stored_whitespace, c).unwrap(); + self.stored_whitespace.clear(); + self.whitespace_satisfied = false; + self.par_satisfied = false; + } + + pub fn whitespace(&mut self, c : char) { + self.stored_whitespace.push(c); + } - use Status::*; - use Element::*; + pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { + let cur = self.current(); + let skip_linefeed = if input_only_ws { + // Need a paragraph break + strip_ws && self.par_satisfied + } else if strip_ws { + self.only_whitespace && self.whitespace_satisfied + } else if let Ignore(Comment) = cur { + // Skip comment-only lines if the comment is ignored + self.only_whitespace + } else if let Ignore(_) = cur { + // Skip line feeds in ignored bits + true + } else { + false + }; + + if !skip_linefeed { + if !strip_ws { + write!(self.output, "{}", self.stored_whitespace).unwrap(); + } + self.raw_out('\n'); + self.whitespace_satisfied = true; + self.par_satisfied = self.only_whitespace; + } - let current = |s : &Vec| s.last().map_or(Output(Other), |s| *s); - let mut out = |c : char| { write!(output, "{}", c).unwrap(); }; + if let Ignore(Comment) | Output(Comment) = cur { + self.stack.pop(); + } + + self.stored_whitespace.clear(); + self.only_whitespace = true; + } + + pub fn flush(&mut self) { + self.output.flush().unwrap(); + } +} + +fn main() { + let cli = CommandLineArgs::parse(); + let input = io::stdin(); + + let mut o = Out { + only_whitespace : true, + stored_whitespace : String::new(), + output : BufWriter::new(io::stdout()), + stack : Vec::new(), + whitespace_satisfied : true, + par_satisfied : true, + }; + let mut lineno = 0; for l in input.lines().map(|l| l.unwrap()) { lineno += 1; let mut chars = l.chars(); - let started_ignore = if let Ignore(_) = current(&status_stack) { true } else { false }; let mut maybe_next_char = None; + let mut input_only_ws = true; + 'process_line: loop { let next_char = match maybe_next_char { None => chars.next(), @@ -43,7 +140,8 @@ Some(c) } }; - match(current(&status_stack), next_char) { + input_only_ws = input_only_ws && next_char.map_or(true, |c| c.is_whitespace()); + match(o.current(), next_char) { (_, None) => { break 'process_line; }, @@ -68,27 +166,27 @@ let output_guard = if let Ignore(_) = st { false } else { true }; match command.as_str() { "added" => { - status_stack.push(Scan(Added, true && output_guard)); + o.stack.push(Scan(Added, true && output_guard)); }, "replaced" => { - status_stack.push(Scan(Replaced, true && output_guard)); + o.stack.push(Scan(Replaced, true && output_guard)); }, "deleted" => { - status_stack.push(Scan(Deleted, false)); + o.stack.push(Scan(Deleted, false)); }, _ => { if output_guard { - out('\\'); - command.chars().for_each(|c| out(c.clone())); + o.out('\\'); + command.chars().for_each(|c| o.out(c.clone())); } } }; }, - (Scan(next, o), Some(c)) => { + (Scan(next, out), Some(c)) => { match c { '{' => { - status_stack.pop(); - status_stack.push(if o { Output(next) } else { Ignore(next) }); + o.stack.pop(); + o.stack.push(if out { Output(next) } else { Ignore(next) }); }, ' ' => { }, @@ -97,59 +195,55 @@ } }, (Output(e), Some('{')) if e != Comment => { - out('{'); - status_stack.push(Output(Other)); + o.out('{'); + o.stack.push(Output(Other)); }, (Ignore(e), Some('{')) if e != Comment => { - status_stack.push(Ignore(Other)); + o.stack.push(Ignore(Other)); }, (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => { - status_stack.pop(); + o.stack.pop(); }, (Output(Replaced) | Ignore(Replaced), Some('}')) => { - status_stack.pop(); - status_stack.push(Scan(Deleted, false)); + o.stack.pop(); + o.stack.push(Scan(Deleted, false)); }, (Output(Other), Some('}')) => { - out('}'); - status_stack.pop(); + o.out('}'); + o.stack.pop(); }, - (Ignore(_), Some('}')) => { - status_stack.pop(); + (Ignore(e), Some('}')) if e != Comment => { + o.stack.pop(); }, (Output(e), Some('%')) if e != Comment=> { - out('%'); - status_stack.push(Output(Comment)); + if cli.strip_comments { + if o.stored_whitespace.is_empty() && !o.only_whitespace { + // Output comment marker if it is required to maintain + // lack of whitespace. + o.out('%'); + } + o.stack.push(Ignore(Comment)); + } else { + o.out('%'); + o.stack.push(Output(Comment)); + } }, (Ignore(e), Some('%')) if e != Comment => { - status_stack.push(Ignore(Comment)); + o.stack.push(Ignore(Comment)); + }, + (Output(_), Some(c)) if c.is_whitespace() => { + o.whitespace(c); }, (Output(_), Some(c)) => { - out(c); + o.out(c); }, (Ignore(_), Some(_)) => { }, }; } - match current(&status_stack) { - Ignore(e) => { - if !started_ignore { - out('\n'); - } - if e == Comment { - status_stack.pop(); - } - }, - Output(e) => { - out('\n'); - if e == Comment { - status_stack.pop(); - } - }, - Scan(_, _) => { - }, - } + + o.line_end(cli.strip_whitespace, input_only_ws); } - output.flush().unwrap(); + o.flush(); }