Initial rough version

Thu, 19 Oct 2023 11:49:38 -0500

author
Tuomo Valkonen <tuomov@iki.fi>
date
Thu, 19 Oct 2023 11:49:38 -0500
changeset 0
548bf3cc032e
child 1
a88aed2bdf13

Initial rough version

Cargo.toml file | annotate | diff | comparison | revisions
README.md file | annotate | diff | comparison | revisions
src/main.rs file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Cargo.toml	Thu Oct 19 11:49:38 2023 -0500
@@ -0,0 +1,16 @@
+[package]
+name = "strip-changes-markup"
+version = "0.1.0"
+edition = "2021"
+authors = ["Tuomo Valkonen <tuomov@iki.fi>"]
+description = "Removes changes-markup from LaTeX documents"
+homepage = "https://tuomov.iki.fi/software/strip-changes-markup/"
+repository = "https://tuomov.iki.fi/repos/strip-changes-markup/"
+license-file = "LICENSE"
+keywords = [
+    "latex",
+    "changes",
+]
+
+[dependencies]
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Thu Oct 19 11:49:38 2023 -0500
@@ -0,0 +1,16 @@
+
+# strip-changes-markup
+
+This program removes [changes](https://www.ctan.org/pkg/changes) package markup from LaTeX documents. It was written because none of the existing regular expression based scripts actually worked on any sort of complex markup. This program (written in [Rust](https://www.rust-lang.org)) is not based on regular expressions, but has an actual semi-proper parser.
+
+## Installation and usage
+
+1. Install [Rust](https://www.rust-lang.org) following instructions.
+2. Run `cargo build` to run the program (optional; the next step does it if needed).
+3. To process a document, use
+   ```
+   cargo run < input.tex > output.tex
+   ```
+   You may also find the built binary under `target/debug/strip-changes-markup`, and copy it to a place that works for you, instead of using `cargo` to run the program.
+
+   
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main.rs	Thu Oct 19 11:49:38 2023 -0500
@@ -0,0 +1,155 @@
+use std::io;
+use std::io::BufWriter;
+use std::io::Write;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum Element {
+    Added,
+    Deleted,
+    Replaced,
+    Other,
+    Comment,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum Status {
+    Output(Element),
+    Ignore(Element),
+    Scan(Element, bool),
+}
+
+fn main() {
+    let input = io::stdin();
+    let mut output = BufWriter::new(io::stdout());
+    let mut status_stack = Vec::new();
+
+    use Status::*;
+    use Element::*;
+
+    let current = |s : &Vec<Status>| s.last().map_or(Output(Other), |s| *s);
+    let mut out = |c : char| { write!(output, "{}", c).unwrap(); };
+    let mut lineno = 0;
+
+    for l in input.lines().map(|l| l.unwrap()) {
+        lineno += 1;
+        let mut chars = l.chars();
+        let started_ignore = if let Ignore(_) = current(&status_stack) { true } else { false };
+        let mut maybe_next_char = None;
+        'process_line: loop {
+            let next_char = match maybe_next_char {
+                None => chars.next(),
+                Some(c) => {
+                    maybe_next_char = None;
+                    Some(c)
+                }
+            };
+            match(current(&status_stack), next_char) {
+                (_, None) => {
+                    break 'process_line;
+                },
+                (st @ (Output(e) | Ignore(e)), Some('\\')) if e != Comment => {
+                    let mut command = String::new();
+                    let mut first = true;
+                    maybe_next_char = 'scan_command: loop {
+                        match chars.next() {
+                            Some(c) if first && (c=='{' || c=='}' || c=='\\') => {
+                                command.push(c);
+                                break 'scan_command None;
+                            },
+                            Some(c) if c.is_alphanumeric() => {
+                                command.push(c);
+                            },
+                            maybe_c => {
+                                break 'scan_command maybe_c;
+                            }
+                        }
+                        first = false;
+                    };
+                    let output_guard = if let Ignore(_) = st { false } else { true };
+                    match command.as_str() {
+                        "added" => {
+                            status_stack.push(Scan(Added, true && output_guard));
+                        },
+                        "replaced" => {
+                            status_stack.push(Scan(Replaced, true && output_guard));
+                        },
+                        "deleted" => {
+                            status_stack.push(Scan(Deleted, false));
+                        },
+                        _ => {
+                            if output_guard {
+                                out('\\');
+                                command.chars().for_each(|c| out(c.clone()));
+                            }
+                        }
+                    };
+                },
+                (Scan(next, o), Some(c)) => {
+                    match c {
+                        '{' => {
+                            status_stack.pop();
+                            status_stack.push(if o { Output(next) } else { Ignore(next) });
+                        },
+                        ' ' => {
+                        },
+                        _ => panic!("Non-whitespace character ({c}) separating arguments on\
+                                     line {lineno}"),
+                    }
+                },
+                (Output(e), Some('{')) if e != Comment => {
+                    out('{');
+                    status_stack.push(Output(Other));
+                },
+                (Ignore(e), Some('{')) if e != Comment => {
+                    status_stack.push(Ignore(Other));
+                },
+                (Output(Added) | Ignore(Added) | Output(Deleted) | Ignore(Deleted), Some('}')) => {
+                    status_stack.pop();
+                },
+                (Output(Replaced) | Ignore(Replaced), Some('}')) => {
+                    status_stack.pop();
+                    status_stack.push(Scan(Deleted, false));
+                },
+                (Output(Other), Some('}')) => {
+                    out('}');
+                    status_stack.pop();
+                },
+                (Ignore(_), Some('}')) => {
+                    status_stack.pop();
+                },
+                (Output(e), Some('%')) if e != Comment=> {
+                    out('%');
+                    status_stack.push(Output(Comment));
+                },
+                (Ignore(e), Some('%')) if e != Comment => {
+                    status_stack.push(Ignore(Comment));
+                },
+                (Output(_), Some(c)) => {
+                    out(c);
+                },
+                (Ignore(_), Some(_)) => {
+                },
+            };
+        }
+        match current(&status_stack) {
+            Ignore(e) => {
+                if !started_ignore {
+                    out('\n');
+                }
+                if e == Comment {
+                    status_stack.pop();
+                }
+            },
+            Output(e) => {
+                out('\n');
+                if e == Comment {
+                    status_stack.pop();
+                }
+            },
+            Scan(_, _) => {
+            },
+        }
+    }
+
+    output.flush().unwrap();
+}

mercurial