src/main.rs

Thu, 19 Oct 2023 23:25:34 -0500

author
Tuomo Valkonen <tuomov@iki.fi>
date
Thu, 19 Oct 2023 23:25:34 -0500
changeset 6
de1cf8032322
parent 3
cec573b16b46
child 7
68538da191c7
permissions
-rw-r--r--

Implement a more expansible parser

// The main documentation is in the README.
#![doc = include_str!("../README.md")]

#![feature(trait_upcasting)]

use std::io;
use std::fs::File;
use std::io::{BufWriter, BufRead, BufReader};
use std::io::Write;
use clap::Parser;

/// Command line parameters
#[derive(Parser, Debug)]
#[clap(
    about = env!("CARGO_PKG_DESCRIPTION"),
    author = env!("CARGO_PKG_AUTHORS"),
    version = env!("CARGO_PKG_VERSION"),
)]
struct CommandLineArgs {
    /// Input file (default is stdin)
    input : Option<String>,

    /// Output file (defalt is stdout)
    #[arg(long, short = 'o')]
    output : Option<String>,

    #[clap(flatten)]
    config : Config
}

#[derive(Parser, Debug)]
struct Config {
    #[arg(long, short = 'c')]
    /// Strip comments
    strip_comments : bool,

    #[arg(long, short = 'w')]
    /// Strip unnecessary whitespace
    strip_whitespace : bool,
}

struct Context {
    lineno : usize,
    input_only_ws : bool,
    cli : Config
}

type AnyChainRule = Box<dyn ChainRule>;
type AnyNestedRule = Box<dyn NestedRule>;

trait ChainRule {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule;
    fn flush(self : Box<Self>, ctx : &Context);
}

trait NestedRule : ChainRule {
    fn produce(&mut self, c : char, ctx : &Context);
    fn next(self : Box<Self>) -> AnyChainRule;
    fn produce_string(&mut self, s : String, ctx : &Context) {
        s.chars().for_each(|c| self.produce(c, ctx));
    }
    fn start_ignored_comment(&mut self, c : char);
}

impl<W : Write + 'static> ChainRule for Out<W> {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        basic_consume(self, c, ctx, true)
    }
    fn flush(mut self : Box<Self>, _ctx : &Context) {
        self.output.flush().unwrap();
    }
}

impl<W : Write + 'static> NestedRule for Out<W> {
    fn produce(&mut self, c : char, ctx : &Context) {
        if c == '\n' {
            self.line_end(ctx.cli.strip_whitespace, ctx.input_only_ws)
        } else if c.is_whitespace() {
            self.stored_whitespace.push(c);
        } else {
            write!(self.output, "{}{}", self.stored_whitespace, c).unwrap();
            self.stored_whitespace.clear();
            self.only_whitespace = false;
            self.whitespace_satisfied = false;
            self.par_satisfied = false;
        }
    }

    fn next(self : Box<Self>) -> AnyChainRule {
        self
    }

    fn start_ignored_comment(&mut self, c : char) {
        if self.stored_whitespace.is_empty() && !self.only_whitespace {
            // The marker needs to be inserted if there is to be no whitespace inserted
            write!(self.output, "{c}").unwrap();
            self.whitespace_satisfied = false;
            self.par_satisfied = false;
            self.only_whitespace = false;
        } else if self.only_whitespace {
            self.ignored_comment_only_line = true
        }
    }
}

fn basic_consume(mut s : AnyNestedRule, c : char, ctx : &Context, print_end : bool)
-> AnyChainRule {
    match c {
        '{' => {
            s.produce(c, ctx);
            Box::new(Group(s))
        },
        '}' => {
            if print_end {
                s.produce(c, ctx);
            }
            s.next()
        },
        '\\' => {
            Box::new(CommandName{parent : s, command : "\\".to_string()})
        },
        '%' => {
            if !ctx.cli.strip_comments {
                s.produce(c, ctx);
                Box::new(Comment(s))
            } else {
                s.start_ignored_comment(c);
                Box::new(IgnoreComment(s))
            }
        },
        _ => {
            s.produce(c, ctx);
            s
        }
    }
}

struct CommandName {
    parent : AnyNestedRule,
    command : String
}

impl ChainRule for CommandName {
    fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        match c {
            '}' | '{' | '\\' if self.command.len() <= 1 => {
                self.command.push(c);
                self.handle(ctx)
            },
            c if c.is_alphanumeric() => {
                self.command.push(c);
                self
            },
            c => {
                self.handle(ctx)
                    .consume(c, ctx)
            }
        }
    }

    fn flush(self : Box<Self>, ctx : &Context) {
        self.handle(ctx)
            .flush(ctx)
    }
}

impl CommandName {
    fn handle(mut self, ctx : &Context) -> AnyChainRule {
        match self.command.as_str() {
            "\\added" => {
                Scan::new(Added(self.parent))
            },
            "\\replaced" => {
                Scan::new(Replaced(self.parent))
            },
            "\\deleted" => {
                Scan::new(Deleted(self.parent))
            },
            _ => {
                self.parent.produce_string(self.command, ctx);
                self.parent
            }
        }
    }
}

struct Comment(AnyNestedRule);

impl ChainRule for Comment {
    fn consume(mut self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        if c == '\n' {
            self.0.consume(c, ctx)
        } else {
            self.0.produce(c, ctx);
            self
        }
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

struct IgnoreComment(AnyChainRule);

impl ChainRule for IgnoreComment {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        if c == '\n' {
            self.0.consume(c, ctx)
        } else {
            self
        }
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

struct Group(AnyNestedRule);

impl ChainRule for Group {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        basic_consume(self, c, ctx, true)
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

impl NestedRule for Group {
    fn produce(&mut self, c : char, ctx : &Context) {
        self.0.produce(c, ctx)
    }
    fn next(self : Box<Self>) -> AnyChainRule {
        self.0
    }
    fn start_ignored_comment(&mut self, c : char) {
        self.0.start_ignored_comment(c)
    }
}

struct Added(AnyNestedRule);

impl ChainRule for Added {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        basic_consume(self, c, ctx, false)
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

impl NestedRule for Added {
    fn produce(&mut self, c : char, ctx : &Context) {
        self.0.produce(c, ctx)
    }
    fn next(self : Box<Self>) -> AnyChainRule {
        self.0
    }
    fn start_ignored_comment(&mut self, c : char) {
        self.0.start_ignored_comment(c)
    }
}
struct Deleted(AnyNestedRule);

impl ChainRule for Deleted {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        basic_consume(self, c, ctx, false)
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

impl NestedRule for Deleted {
    fn produce(&mut self, _c : char, _ctx : &Context) {
    }
    fn next(self : Box<Self>) -> AnyChainRule {
        self.0
    }
    fn start_ignored_comment(&mut self, c : char) {
        self.0.start_ignored_comment(c)
    }
}

struct Replaced(AnyNestedRule);

impl ChainRule for Replaced {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        basic_consume(self, c, ctx, false)
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

impl NestedRule for Replaced {
    fn produce(&mut self, c : char, ctx : &Context) {
        self.0.produce(c, ctx)
    }
    fn next(self : Box<Self>) -> AnyChainRule {
        Scan::new(Deleted(self.0))
    }
    fn start_ignored_comment(&mut self, c : char) {
        self.0.start_ignored_comment(c)
    }
}

struct Scan(AnyNestedRule);

impl ChainRule for Scan {
    fn consume(self : Box<Self>, c : char, ctx : &Context) -> AnyChainRule {
        if c.is_whitespace() || c == '\n' {
            self
        } else if c == '{' {
            self.0
        } else if c == '%' {
            Box::new(IgnoreComment(self))
        } else {
            panic!("Non-whitespace character ({c}) separating arguments on \
                    line {lineno}", lineno = ctx.lineno)
        }
    }
    fn flush(self : Box<Self>, ctx : &Context) {
        self.0.flush(ctx)
    }
}

impl Scan {
    fn new<R : NestedRule + 'static>(r : R) -> Box<dyn ChainRule> {
        Box::new(Scan(Box::new(r)))
    }
}


struct Out<W : Write> {
    only_whitespace : bool,
    stored_whitespace : String,
    output : W,
    whitespace_satisfied : bool,
    par_satisfied : bool,
    ignored_comment_only_line : bool
}

impl<W : Write> Out<W> {
    pub fn line_end(&mut self, strip_ws : bool, input_only_ws : bool) {
        let skip_linefeed = if input_only_ws {
            // Need a paragraph break
            strip_ws && self.par_satisfied
        } else if strip_ws {
            self.only_whitespace && self.whitespace_satisfied
        } else {
            // Skip comment-only lines if the comment is ignored
            self.ignored_comment_only_line
        };

        if !skip_linefeed {
            if !strip_ws {
                write!(self.output, "{}", self.stored_whitespace).unwrap();
            }
            write!(self.output, "\n").unwrap();
            self.whitespace_satisfied = true;
            self.par_satisfied = self.only_whitespace;
        }

        self.stored_whitespace.clear();
        self.only_whitespace = true;
        self.ignored_comment_only_line = false;
    }
}

fn main() {
    let cli = CommandLineArgs::parse();
    let input = cli.input.map_or_else(
        || Box::new(BufReader::new(io::stdin())) as Box<dyn BufRead>,
        |f| Box::new(BufReader::new(File::open(f).unwrap())) as Box<dyn BufRead>
    );
    let output = cli.output.map_or_else(
        || Box::new(BufWriter::new(io::stdout())) as Box<dyn Write>,
        |f| Box::new(BufWriter::new(File::create(f).unwrap())) as Box<dyn Write>
    );
    
    let mut rule : Box<dyn ChainRule> = Box::new(Out {
        only_whitespace : true,
        stored_whitespace : String::new(),
        output,
        whitespace_satisfied : true,
        par_satisfied : true,
        ignored_comment_only_line : false
    });

    let mut ctx = Context{ lineno : 0, cli : cli.config, input_only_ws : true};

    for l in input.lines().map(|l| l.unwrap()) {
        ctx.lineno += 1;
        ctx.input_only_ws = true;
        for c in l.chars() {
            ctx.input_only_ws = ctx.input_only_ws && c.is_whitespace();
            rule = rule.consume(c, &ctx);
        }
        rule = rule.consume('\n', &ctx);
    }

    rule.flush(&ctx);
}

mercurial