src/main.rs

Thu, 19 Oct 2023 15:10:12 -0500

author
Tuomo Valkonen <tuomov@iki.fi>
date
Thu, 19 Oct 2023 15:10:12 -0500
changeset 2
254e1e4bd795
parent 0
548bf3cc032e
child 3
cec573b16b46
permissions
-rw-r--r--

Add whitespace and comment stripping

// The main documentation is in the README.
#![doc = include_str!("../README.md")]

use std::io;
use std::io::BufWriter;
use std::io::Write;
use clap::Parser;

/// Command line parameters
#[derive(Parser, Debug)]
#[clap(
    about = env!("CARGO_PKG_DESCRIPTION"),
    author = env!("CARGO_PKG_AUTHORS"),
    version = env!("CARGO_PKG_VERSION"),
)]
struct CommandLineArgs {
    #[arg(long, short = 'c')]
    /// Strip comments
    strip_comments : bool,

    #[arg(long, short = 'w')]
    /// Strip unnecessary whitespace
    strip_whitespace : bool,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Element {
    Added,
    Deleted,
    Replaced,
    Other,
    Comment,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Status {
    Output(Element),
    Ignore(Element),
    Scan(Element, bool),
}

use Status::*;
use Element::*;

struct Out<W : Write> {
    only_whitespace : bool,
    stored_whitespace : String,
    output : W,
    stack : Vec<Status>,
    whitespace_satisfied : bool,
    par_satisfied : bool,
}

impl<W : Write> Out<W> {
    fn current(&self) -> Status {
        self.stack.last().map_or(Output(Other), |s| *s)
    }

    fn raw_out(&mut self, c : char) {
        write!(self.output, "{}", c).unwrap();
    }

    pub fn out(&mut self, c : char) {
        self.only_whitespace = false;
        write!(self.output, "{}{}", self.stored_whitespace, c).unwrap();
        self.stored_whitespace.clear();
        self.whitespace_satisfied = false;
        self.par_satisfied = false;
    }

    pub fn whitespace(&mut self, c : char) {
        self.stored_whitespace.push(c);
    }

    pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) {
        let cur = self.current();
        let skip_linefeed = if input_only_ws {
            // Need a paragraph break
            strip_ws && self.par_satisfied
        } else if strip_ws {
            self.only_whitespace && self.whitespace_satisfied
        } else if let Ignore(Comment) = cur {
            // Skip comment-only lines if the comment is ignored
            self.only_whitespace
        } else if let Ignore(_) = cur {
            // Skip line feeds in ignored bits
            true
        } else {
            false
        };

        if !skip_linefeed {
            if !strip_ws {
                write!(self.output, "{}", self.stored_whitespace).unwrap();
            }
            self.raw_out('\n');
            self.whitespace_satisfied = true;
            self.par_satisfied = self.only_whitespace;
        }

        if let Ignore(Comment) | Output(Comment) = cur {
            self.stack.pop();
        }

        self.stored_whitespace.clear();
        self.only_whitespace = true;
    }

    pub fn flush(&mut self) {
        self.output.flush().unwrap();
    }
}

fn main() {
    let cli = CommandLineArgs::parse();
    let input = io::stdin();

    let mut o = Out {
        only_whitespace : true,
        stored_whitespace : String::new(),
        output : BufWriter::new(io::stdout()),
        stack : Vec::new(),
        whitespace_satisfied : true,
        par_satisfied : true,
    };

    let mut lineno = 0;

    for l in input.lines().map(|l| l.unwrap()) {
        lineno += 1;
        let mut chars = l.chars();
        let mut maybe_next_char = None;
        let mut input_only_ws = true;

        'process_line: loop {
            let next_char = match maybe_next_char {
                None => chars.next(),
                Some(c) => {
                    maybe_next_char = None;
                    Some(c)
                }
            };
            input_only_ws = input_only_ws && next_char.map_or(true, |c| c.is_whitespace());
            match(o.current(), next_char) {
                (_, None) => {
                    break 'process_line;
                },
                (st @ (Output(e) | Ignore(e)), Some('\\')) if e != Comment => {
                    let mut command = String::new();
                    let mut first = true;
                    maybe_next_char = 'scan_command: loop {
                        match chars.next() {
                            Some(c) if first && (c=='{' || c=='}' || c=='\\') => {
                                command.push(c);
                                break 'scan_command None;
                            },
                            Some(c) if c.is_alphanumeric() => {
                                command.push(c);
                            },
                            maybe_c => {
                                break 'scan_command maybe_c;
                            }
                        }
                        first = false;
                    };
                    let output_guard = if let Ignore(_) = st { false } else { true };
                    match command.as_str() {
                        "added" => {
                            o.stack.push(Scan(Added, true && output_guard));
                        },
                        "replaced" => {
                            o.stack.push(Scan(Replaced, true && output_guard));
                        },
                        "deleted" => {
                            o.stack.push(Scan(Deleted, false));
                        },
                        _ => {
                            if output_guard {
                                o.out('\\');
                                command.chars().for_each(|c| o.out(c.clone()));
                            }
                        }
                    };
                },
                (Scan(next, out), Some(c)) => {
                    match c {
                        '{' => {
                            o.stack.pop();
                            o.stack.push(if out { Output(next) } else { Ignore(next) });
                        },
                        ' ' => {
                        },
                        _ => panic!("Non-whitespace character ({c}) separating arguments on\
                                     line {lineno}"),
                    }
                },
                (Output(e), Some('{')) if e != Comment => {
                    o.out('{');
                    o.stack.push(Output(Other));
                },
                (Ignore(e), Some('{')) if e != Comment => {
                    o.stack.push(Ignore(Other));
                },
                (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => {
                    o.stack.pop();
                },
                (Output(Replaced) | Ignore(Replaced), Some('}')) => {
                    o.stack.pop();
                    o.stack.push(Scan(Deleted, false));
                },
                (Output(Other), Some('}')) => {
                    o.out('}');
                    o.stack.pop();
                },
                (Ignore(e), Some('}')) if e != Comment => {
                    o.stack.pop();
                },
                (Output(e), Some('%')) if e != Comment=> {
                    if cli.strip_comments {
                        if o.stored_whitespace.is_empty() && !o.only_whitespace {
                            // Output comment marker if it is required to maintain
                            // lack of whitespace.
                            o.out('%');
                        }
                        o.stack.push(Ignore(Comment));
                    } else {
                        o.out('%');
                        o.stack.push(Output(Comment));
                    }
                },
                (Ignore(e), Some('%')) if e != Comment => {
                    o.stack.push(Ignore(Comment));
                },
                (Output(_), Some(c)) if c.is_whitespace() => {
                    o.whitespace(c);
                },
                (Output(_), Some(c)) => {
                    o.out(c);
                },
                (Ignore(_), Some(_)) => {
                },
            };
        }

        o.line_end(cli.strip_whitespace, input_only_ws);
    }

    o.flush();
}

mercurial