Thu, 19 Oct 2023 15:36:39 -0500
doc typofix
// The main documentation is in the README. #![doc = include_str!("../README.md")] use std::io; use std::fs::File; use std::io::{BufWriter, BufRead, BufReader}; use std::io::Write; use clap::Parser; /// Command line parameters #[derive(Parser, Debug)] #[clap( about = env!("CARGO_PKG_DESCRIPTION"), author = env!("CARGO_PKG_AUTHORS"), version = env!("CARGO_PKG_VERSION"), )] struct CommandLineArgs { /// Input file (default is stdin) input : Option<String>, /// Output file (defalt is stdout) #[arg(long, short = 'o')] output : Option<String>, #[arg(long, short = 'c')] /// Strip comments strip_comments : bool, #[arg(long, short = 'w')] /// Strip unnecessary whitespace strip_whitespace : bool, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum Element { Added, Deleted, Replaced, Other, Comment, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum Status { Output(Element), Ignore(Element), Scan(Element, bool), } use Status::*; use Element::*; struct Out<W : Write> { only_whitespace : bool, stored_whitespace : String, output : W, stack : Vec<Status>, whitespace_satisfied : bool, par_satisfied : bool, } impl<W : Write> Out<W> { fn current(&self) -> Status { self.stack.last().map_or(Output(Other), |s| *s) } fn raw_out(&mut self, c : char) { write!(self.output, "{}", c).unwrap(); } pub fn out(&mut self, c : char) { self.only_whitespace = false; write!(self.output, "{}{}", self.stored_whitespace, c).unwrap(); self.stored_whitespace.clear(); self.whitespace_satisfied = false; self.par_satisfied = false; } pub fn whitespace(&mut self, c : char) { self.stored_whitespace.push(c); } pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) { let cur = self.current(); let skip_linefeed = if input_only_ws { // Need a paragraph break strip_ws && self.par_satisfied } else if strip_ws { self.only_whitespace && self.whitespace_satisfied } else if let Ignore(Comment) = cur { // Skip comment-only lines if the comment is ignored self.only_whitespace } else if let Ignore(_) = cur { // Skip line feeds in ignored bits true } else { false }; if !skip_linefeed { if !strip_ws { write!(self.output, "{}", self.stored_whitespace).unwrap(); } self.raw_out('\n'); self.whitespace_satisfied = true; self.par_satisfied = self.only_whitespace; } if let Ignore(Comment) | Output(Comment) = cur { self.stack.pop(); } self.stored_whitespace.clear(); self.only_whitespace = true; } pub fn flush(&mut self) { self.output.flush().unwrap(); } } fn main() { let cli = CommandLineArgs::parse(); let input = cli.input.map_or_else( || Box::new(BufReader::new(io::stdin())) as Box<dyn BufRead>, |f| Box::new(BufReader::new(File::open(f).unwrap())) as Box<dyn BufRead> ); let output = cli.output.map_or_else( || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>, |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write> ); let mut o = Out { only_whitespace : true, stored_whitespace : String::new(), output, stack : Vec::new(), whitespace_satisfied : true, par_satisfied : true, }; let mut lineno = 0; for l in input.lines().map(|l| l.unwrap()) { lineno += 1; let mut chars = l.chars(); let mut maybe_next_char = None; let mut input_only_ws = true; 'process_line: loop { let next_char = match maybe_next_char { None => chars.next(), Some(c) => { maybe_next_char = None; Some(c) } }; input_only_ws = input_only_ws && next_char.map_or(true, |c| c.is_whitespace()); match(o.current(), next_char) { (_, None) => { break 'process_line; }, (st @ (Output(e) | Ignore(e)), Some('\\')) if e != Comment => { let mut command = String::new(); let mut first = true; maybe_next_char = 'scan_command: loop { match chars.next() { Some(c) if first && (c=='{' || c=='}' || c=='\\') => { command.push(c); break 'scan_command None; }, Some(c) if c.is_alphanumeric() => { command.push(c); }, maybe_c => { break 'scan_command maybe_c; } } first = false; }; let output_guard = if let Ignore(_) = st { false } else { true }; match command.as_str() { "added" => { o.stack.push(Scan(Added, true && output_guard)); }, "replaced" => { o.stack.push(Scan(Replaced, true && output_guard)); }, "deleted" => { o.stack.push(Scan(Deleted, false)); }, _ => { if output_guard { o.out('\\'); command.chars().for_each(|c| o.out(c.clone())); } } }; }, (Scan(next, out), Some(c)) => { match c { '{' => { o.stack.pop(); o.stack.push(if out { Output(next) } else { Ignore(next) }); }, ' ' => { }, _ => panic!("Non-whitespace character ({c}) separating arguments on\ line {lineno}"), } }, (Output(e), Some('{')) if e != Comment => { o.out('{'); o.stack.push(Output(Other)); }, (Ignore(e), Some('{')) if e != Comment => { o.stack.push(Ignore(Other)); }, (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => { o.stack.pop(); }, (Output(Replaced) | Ignore(Replaced), Some('}')) => { o.stack.pop(); o.stack.push(Scan(Deleted, false)); }, (Output(Other), Some('}')) => { o.out('}'); o.stack.pop(); }, (Ignore(e), Some('}')) if e != Comment => { o.stack.pop(); }, (Output(e), Some('%')) if e != Comment=> { if cli.strip_comments { if o.stored_whitespace.is_empty() && !o.only_whitespace { // Output comment marker if it is required to maintain // lack of whitespace. o.out('%'); } o.stack.push(Ignore(Comment)); } else { o.out('%'); o.stack.push(Output(Comment)); } }, (Ignore(e), Some('%')) if e != Comment => { o.stack.push(Ignore(Comment)); }, (Output(_), Some(c)) if c.is_whitespace() => { o.whitespace(c); }, (Output(_), Some(c)) => { o.out(c); }, (Ignore(_), Some(_)) => { }, }; } o.line_end(cli.strip_whitespace, input_only_ws); } o.flush(); }