Tue, 16 Jan 2018 16:39:48 +0000
Convert module stuff to lua 5.3
0 | 1 | #!/usr/bin/env lua |
2 | ||
3 | --[[ | |
4 | # markdown.lua -- version 0.32 | |
5 | ||
6 | <http://www.frykholm.se/files/markdown.lua> | |
7 | ||
8 | **Author:** Niklas Frykholm, <niklas@frykholm.se> | |
9 | **Date:** 31 May 2008 | |
10 | ||
11 | This is an implementation of the popular text markup language Markdown in pure Lua. | |
12 | Markdown can convert documents written in a simple and easy to read text format | |
13 | to well-formatted HTML. For a more thourough description of Markdown and the Markdown | |
14 | syntax, see <http://daringfireball.net/projects/markdown>. | |
15 | ||
16 | The original Markdown source is written in Perl and makes heavy use of advanced | |
17 | regular expression techniques (such as negative look-ahead, etc) which are not available | |
18 | in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground | |
19 | up. It is probably not completely bug free. If you notice any bugs, please report them to | |
20 | me. A unit test that exposes the error is helpful. | |
21 | ||
22 | ## Usage | |
23 | ||
24 | require "markdown" | |
25 | markdown(source) | |
26 | ||
27 | ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the | |
28 | Markdown transformation to the specified string. | |
29 | ||
30 | ``markdown.lua`` can also be used directly from the command line: | |
31 | ||
32 | lua markdown.lua test.md | |
33 | ||
34 | Creates a file ``test.html`` with the converted content of ``test.md``. Run: | |
35 | ||
36 | lua markdown.lua -h | |
37 | ||
38 | For a description of the command-line options. | |
39 | ||
40 | ``markdown.lua`` uses the same license as Lua, the MIT license. | |
41 | ||
42 | ## License | |
43 | ||
44 | Copyright © 2008 Niklas Frykholm. | |
45 | ||
46 | Permission is hereby granted, free of charge, to any person obtaining a copy of this | |
47 | software and associated documentation files (the "Software"), to deal in the Software | |
48 | without restriction, including without limitation the rights to use, copy, modify, merge, | |
49 | publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons | |
50 | to whom the Software is furnished to do so, subject to the following conditions: | |
51 | ||
52 | The above copyright notice and this permission notice shall be included in all copies | |
53 | or substantial portions of the Software. | |
54 | ||
55 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
56 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
57 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
58 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
59 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
60 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
61 | THE SOFTWARE. | |
62 | ||
63 | ## Version history | |
64 | ||
65 | - **0.32** -- 31 May 2008 | |
66 | - Fix for links containing brackets | |
67 | - **0.31** -- 1 Mar 2008 | |
68 | - Fix for link definitions followed by spaces | |
69 | - **0.30** -- 25 Feb 2008 | |
70 | - Consistent behavior with Markdown when the same link reference is reused | |
71 | - **0.29** -- 24 Feb 2008 | |
72 | - Fix for <pre> blocks with spaces in them | |
73 | - **0.28** -- 18 Feb 2008 | |
74 | - Fix for link encoding | |
75 | - **0.27** -- 14 Feb 2008 | |
76 | - Fix for link database links with () | |
77 | - **0.26** -- 06 Feb 2008 | |
78 | - Fix for nested italic and bold markers | |
79 | - **0.25** -- 24 Jan 2008 | |
80 | - Fix for encoding of naked < | |
81 | - **0.24** -- 21 Jan 2008 | |
82 | - Fix for link behavior. | |
83 | - **0.23** -- 10 Jan 2008 | |
84 | - Fix for a regression bug in longer expressions in italic or bold. | |
85 | - **0.22** -- 27 Dec 2007 | |
86 | - Fix for crash when processing blocks with a percent sign in them. | |
87 | - **0.21** -- 27 Dec 2007 | |
88 | - Fix for combined strong and emphasis tags | |
89 | - **0.20** -- 13 Oct 2007 | |
90 | - Fix for < as well in image titles, now matches Dingus behavior | |
91 | - **0.19** -- 28 Sep 2007 | |
92 | - Fix for quotation marks " and ampersands & in link and image titles. | |
93 | - **0.18** -- 28 Jul 2007 | |
94 | - Does not crash on unmatched tags (behaves like standard markdown) | |
95 | - **0.17** -- 12 Apr 2007 | |
96 | - Fix for links with %20 in them. | |
97 | - **0.16** -- 12 Apr 2007 | |
98 | - Do not require arg global to exist. | |
99 | - **0.15** -- 28 Aug 2006 | |
100 | - Better handling of links with underscores in them. | |
101 | - **0.14** -- 22 Aug 2006 | |
102 | - Bug for *`foo()`* | |
103 | - **0.13** -- 12 Aug 2006 | |
104 | - Added -l option for including stylesheet inline in document. | |
105 | - Fixed bug in -s flag. | |
106 | - Fixed emphasis bug. | |
107 | - **0.12** -- 15 May 2006 | |
108 | - Fixed several bugs to comply with MarkdownTest 1.0 <http://six.pairlist.net/pipermail/markdown-discuss/2004-December/000909.html> | |
109 | - **0.11** -- 12 May 2006 | |
110 | - Fixed bug for escaping `*` and `_` inside code spans. | |
111 | - Added license terms. | |
112 | - Changed join() to table.concat(). | |
113 | - **0.10** -- 3 May 2006 | |
114 | - Initial public release. | |
115 | ||
116 | // Niklas | |
117 | ]] | |
118 | ||
119 | ||
120 | -- Set up a table for holding local functions to avoid polluting the global namespace | |
121 | local M = {} | |
122 | local MT = {__index = _G} | |
123 | setmetatable(M, MT) | |
124 | setfenv(1, M) | |
125 | ||
126 | ---------------------------------------------------------------------- | |
127 | -- Utility functions | |
128 | ---------------------------------------------------------------------- | |
129 | ||
130 | -- Locks table t from changes, writes an error if someone attempts to change the table. | |
131 | -- This is useful for detecting variables that have "accidently" been made global. Something | |
132 | -- I tend to do all too much. | |
133 | function lock(t) | |
134 | function lock_new_index(t, k, v) | |
135 | error("module has been locked -- " .. k .. " must be declared local", 2) | |
136 | end | |
137 | ||
138 | local mt = {__newindex = lock_new_index} | |
139 | if getmetatable(t) then mt.__index = getmetatable(t).__index end | |
140 | setmetatable(t, mt) | |
141 | end | |
142 | ||
143 | -- Returns the result of mapping the values in table t through the function f | |
144 | function map(t, f) | |
145 | local out = {} | |
146 | for k,v in pairs(t) do out[k] = f(v,k) end | |
147 | return out | |
148 | end | |
149 | ||
150 | -- The identity function, useful as a placeholder. | |
151 | function identity(text) return text end | |
152 | ||
153 | -- Functional style if statement. (NOTE: no short circuit evaluation) | |
154 | function iff(t, a, b) if t then return a else return b end end | |
155 | ||
156 | -- Splits the text into an array of separate lines. | |
157 | function split(text, sep) | |
158 | sep = sep or "\n" | |
159 | local lines = {} | |
160 | local pos = 1 | |
161 | while true do | |
162 | local b,e = text:find(sep, pos) | |
163 | if not b then table.insert(lines, text:sub(pos)) break end | |
164 | table.insert(lines, text:sub(pos, b-1)) | |
165 | pos = e + 1 | |
166 | end | |
167 | return lines | |
168 | end | |
169 | ||
170 | -- Converts tabs to spaces | |
171 | function detab(text) | |
172 | local tab_width = 4 | |
173 | local function rep(match) | |
174 | local spaces = -match:len() | |
175 | while spaces<1 do spaces = spaces + tab_width end | |
176 | return match .. string.rep(" ", spaces) | |
177 | end | |
178 | text = text:gsub("([^\n]-)\t", rep) | |
179 | return text | |
180 | end | |
181 | ||
182 | -- Applies string.find for every pattern in the list and returns the first match | |
183 | function find_first(s, patterns, index) | |
184 | local res = {} | |
185 | for _,p in ipairs(patterns) do | |
186 | local match = {s:find(p, index)} | |
187 | if #match>0 and (#res==0 or match[1] < res[1]) then res = match end | |
188 | end | |
6 | 189 | return table.unpack(res) |
0 | 190 | end |
191 | ||
192 | -- If a replacement array is specified, the range [start, stop] in the array is replaced | |
193 | -- with the replacement array and the resulting array is returned. Without a replacement | |
194 | -- array the section of the array between start and stop is returned. | |
195 | function splice(array, start, stop, replacement) | |
196 | if replacement then | |
197 | local n = stop - start + 1 | |
198 | while n > 0 do | |
199 | table.remove(array, start) | |
200 | n = n - 1 | |
201 | end | |
202 | for i,v in ipairs(replacement) do | |
203 | table.insert(array, start, v) | |
204 | end | |
205 | return array | |
206 | else | |
207 | local res = {} | |
208 | for i = start,stop do | |
209 | table.insert(res, array[i]) | |
210 | end | |
211 | return res | |
212 | end | |
213 | end | |
214 | ||
215 | -- Outdents the text one step. | |
216 | function outdent(text) | |
217 | text = "\n" .. text | |
218 | text = text:gsub("\n ? ? ?", "\n") | |
219 | text = text:sub(2) | |
220 | return text | |
221 | end | |
222 | ||
223 | -- Indents the text one step. | |
224 | function indent(text) | |
225 | text = text:gsub("\n", "\n ") | |
226 | return text | |
227 | end | |
228 | ||
229 | -- Does a simple tokenization of html data. Returns the data as a list of tokens. | |
230 | -- Each token is a table with a type field (which is either "tag" or "text") and | |
231 | -- a text field (which contains the original token data). | |
232 | function tokenize_html(html) | |
233 | local tokens = {} | |
234 | local pos = 1 | |
235 | while true do | |
236 | local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos) | |
237 | if not start then | |
238 | table.insert(tokens, {type="text", text=html:sub(pos)}) | |
239 | break | |
240 | end | |
241 | if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end | |
242 | ||
243 | local _, stop | |
244 | if html:match("^<!%-%-", start) then | |
245 | _,stop = html:find("%-%->", start) | |
246 | elseif html:match("^<%?", start) then | |
247 | _,stop = html:find("?>", start) | |
248 | else | |
249 | _,stop = html:find("%b<>", start) | |
250 | end | |
251 | if not stop then | |
252 | -- error("Could not match html tag " .. html:sub(start,start+30)) | |
253 | table.insert(tokens, {type="text", text=html:sub(start, start)}) | |
254 | pos = start + 1 | |
255 | else | |
256 | table.insert(tokens, {type="tag", text=html:sub(start, stop)}) | |
257 | pos = stop + 1 | |
258 | end | |
259 | end | |
260 | return tokens | |
261 | end | |
262 | ||
263 | ---------------------------------------------------------------------- | |
264 | -- Hash | |
265 | ---------------------------------------------------------------------- | |
266 | ||
267 | -- This is used to "hash" data into alphanumeric strings that are unique | |
268 | -- in the document. (Note that this is not cryptographic hash, the hash | |
269 | -- function is not one-way.) The hash procedure is used to protect parts | |
270 | -- of the document from further processing. | |
271 | ||
272 | local HASH = { | |
273 | -- Has the hash been inited. | |
274 | inited = false, | |
275 | ||
276 | -- The unique string prepended to all hash values. This is to ensure | |
277 | -- that hash values do not accidently coincide with an actual existing | |
278 | -- string in the document. | |
279 | identifier = "", | |
280 | ||
281 | -- Counter that counts up for each new hash instance. | |
282 | counter = 0, | |
283 | ||
284 | -- Hash table. | |
285 | table = {} | |
286 | } | |
287 | ||
288 | -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere | |
289 | -- in the text. | |
290 | function init_hash(text) | |
291 | HASH.inited = true | |
292 | HASH.identifier = "" | |
293 | HASH.counter = 0 | |
294 | HASH.table = {} | |
295 | ||
296 | local s = "HASH" | |
297 | local counter = 0 | |
298 | local id | |
299 | while true do | |
300 | id = s .. counter | |
301 | if not text:find(id, 1, true) then break end | |
302 | counter = counter + 1 | |
303 | end | |
304 | HASH.identifier = id | |
305 | end | |
306 | ||
307 | -- Returns the hashed value for s. | |
308 | function hash(s) | |
309 | assert(HASH.inited) | |
310 | if not HASH.table[s] then | |
311 | HASH.counter = HASH.counter + 1 | |
312 | local id = HASH.identifier .. HASH.counter .. "X" | |
313 | HASH.table[s] = id | |
314 | end | |
315 | return HASH.table[s] | |
316 | end | |
317 | ||
318 | ---------------------------------------------------------------------- | |
319 | -- Protection | |
320 | ---------------------------------------------------------------------- | |
321 | ||
322 | -- The protection module is used to "protect" parts of a document | |
323 | -- so that they are not modified by subsequent processing steps. | |
324 | -- Protected parts are saved in a table for later unprotection | |
325 | ||
326 | -- Protection data | |
327 | local PD = { | |
328 | -- Saved blocks that have been converted | |
329 | blocks = {}, | |
330 | ||
331 | -- Block level tags that will be protected | |
332 | tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", | |
333 | "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset", | |
334 | "iframe", "math", "ins", "del"} | |
335 | } | |
336 | ||
337 | -- Pattern for matching a block tag that begins and ends in the leftmost | |
338 | -- column and may contain indented subtags, i.e. | |
339 | -- <div> | |
340 | -- A nested block. | |
341 | -- <div> | |
342 | -- Nested data. | |
343 | -- </div> | |
344 | -- </div> | |
345 | function block_pattern(tag) | |
346 | return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n" | |
347 | end | |
348 | ||
349 | -- Pattern for matching a block tag that begins and ends with a newline | |
350 | function line_pattern(tag) | |
351 | return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n" | |
352 | end | |
353 | ||
354 | -- Protects the range of characters from start to stop in the text and | |
355 | -- returns the protected string. | |
356 | function protect_range(text, start, stop) | |
357 | local s = text:sub(start, stop) | |
358 | local h = hash(s) | |
359 | PD.blocks[h] = s | |
360 | text = text:sub(1,start) .. h .. text:sub(stop) | |
361 | return text | |
362 | end | |
363 | ||
364 | -- Protect every part of the text that matches any of the patterns. The first | |
365 | -- matching pattern is protected first, etc. | |
366 | function protect_matches(text, patterns) | |
367 | while true do | |
368 | local start, stop = find_first(text, patterns) | |
369 | if not start then break end | |
370 | text = protect_range(text, start, stop) | |
371 | end | |
372 | return text | |
373 | end | |
374 | ||
375 | -- Protects blocklevel tags in the specified text | |
376 | function protect(text) | |
377 | -- First protect potentially nested block tags | |
378 | text = protect_matches(text, map(PD.tags, block_pattern)) | |
379 | -- Then protect block tags at the line level. | |
380 | text = protect_matches(text, map(PD.tags, line_pattern)) | |
381 | -- Protect <hr> and comment tags | |
382 | text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"}) | |
383 | text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"}) | |
384 | return text | |
385 | end | |
386 | ||
387 | -- Returns true if the string s is a hash resulting from protection | |
388 | function is_protected(s) | |
389 | return PD.blocks[s] | |
390 | end | |
391 | ||
392 | -- Unprotects the specified text by expanding all the nonces | |
393 | function unprotect(text) | |
394 | for k,v in pairs(PD.blocks) do | |
395 | v = v:gsub("%%", "%%%%") | |
396 | text = text:gsub(k, v) | |
397 | end | |
398 | return text | |
399 | end | |
400 | ||
401 | ||
402 | ---------------------------------------------------------------------- | |
403 | -- Block transform | |
404 | ---------------------------------------------------------------------- | |
405 | ||
406 | -- The block transform functions transform the text on the block level. | |
407 | -- They work with the text as an array of lines rather than as individual | |
408 | -- characters. | |
409 | ||
410 | -- Returns true if the line is a ruler of (char) characters. | |
411 | -- The line must contain at least three char characters and contain only spaces and | |
412 | -- char characters. | |
413 | function is_ruler_of(line, char) | |
414 | if not line:match("^[ %" .. char .. "]*$") then return false end | |
415 | if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end | |
416 | return true | |
417 | end | |
418 | ||
419 | -- Identifies the block level formatting present in the line | |
420 | function classify(line) | |
421 | local info = {line = line, text = line} | |
422 | ||
423 | if line:match("^ ") then | |
424 | info.type = "indented" | |
425 | info.outdented = line:sub(5) | |
426 | return info | |
427 | end | |
428 | ||
429 | for _,c in ipairs({'*', '-', '_', '='}) do | |
430 | if is_ruler_of(line, c) then | |
431 | info.type = "ruler" | |
432 | info.ruler_char = c | |
433 | return info | |
434 | end | |
435 | end | |
436 | ||
437 | if line == "" then | |
438 | info.type = "blank" | |
439 | return info | |
440 | end | |
441 | ||
442 | if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then | |
443 | local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") | |
444 | info.type = "header" | |
445 | info.level = m1:len() | |
446 | info.text = m2 | |
447 | return info | |
448 | end | |
449 | ||
450 | if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then | |
451 | local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") | |
452 | info.type = "list_item" | |
453 | info.list_type = "numeric" | |
454 | info.number = 0 + number | |
455 | info.text = text | |
456 | return info | |
457 | end | |
458 | ||
459 | if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then | |
460 | local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") | |
461 | info.type = "list_item" | |
462 | info.list_type = "bullet" | |
463 | info.bullet = bullet | |
464 | info.text= text | |
465 | return info | |
466 | end | |
467 | ||
468 | if line:match("^>[ \t]?(.*)") then | |
469 | info.type = "blockquote" | |
470 | info.text = line:match("^>[ \t]?(.*)") | |
471 | return info | |
472 | end | |
473 | ||
474 | if is_protected(line) then | |
475 | info.type = "raw" | |
476 | info.html = unprotect(line) | |
477 | return info | |
478 | end | |
479 | ||
480 | info.type = "normal" | |
481 | return info | |
482 | end | |
483 | ||
484 | -- Find headers constisting of a normal line followed by a ruler and converts them to | |
485 | -- header entries. | |
486 | function headers(array) | |
487 | local i = 1 | |
488 | while i <= #array - 1 do | |
489 | if array[i].type == "normal" and array[i+1].type == "ruler" and | |
490 | (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then | |
491 | local info = {line = array[i].line} | |
492 | info.text = info.line | |
493 | info.type = "header" | |
494 | info.level = iff(array[i+1].ruler_char == "=", 1, 2) | |
495 | table.remove(array, i+1) | |
496 | array[i] = info | |
497 | end | |
498 | i = i + 1 | |
499 | end | |
500 | return array | |
501 | end | |
502 | ||
503 | -- Find list blocks and convert them to protected data blocks | |
504 | function lists(array, sublist) | |
505 | local function process_list(arr) | |
506 | local function any_blanks(arr) | |
507 | for i = 1, #arr do | |
508 | if arr[i].type == "blank" then return true end | |
509 | end | |
510 | return false | |
511 | end | |
512 | ||
513 | local function split_list_items(arr) | |
514 | local acc = {arr[1]} | |
515 | local res = {} | |
516 | for i=2,#arr do | |
517 | if arr[i].type == "list_item" then | |
518 | table.insert(res, acc) | |
519 | acc = {arr[i]} | |
520 | else | |
521 | table.insert(acc, arr[i]) | |
522 | end | |
523 | end | |
524 | table.insert(res, acc) | |
525 | return res | |
526 | end | |
527 | ||
528 | local function process_list_item(lines, block) | |
529 | while lines[#lines].type == "blank" do | |
530 | table.remove(lines) | |
531 | end | |
532 | ||
533 | local itemtext = lines[1].text | |
534 | for i=2,#lines do | |
535 | itemtext = itemtext .. "\n" .. outdent(lines[i].line) | |
536 | end | |
537 | if block then | |
538 | itemtext = block_transform(itemtext, true) | |
539 | if not itemtext:find("<pre>") then itemtext = indent(itemtext) end | |
540 | return " <li>" .. itemtext .. "</li>" | |
541 | else | |
542 | local lines = split(itemtext) | |
543 | lines = map(lines, classify) | |
544 | lines = lists(lines, true) | |
545 | lines = blocks_to_html(lines, true) | |
546 | itemtext = table.concat(lines, "\n") | |
547 | if not itemtext:find("<pre>") then itemtext = indent(itemtext) end | |
548 | return " <li>" .. itemtext .. "</li>" | |
549 | end | |
550 | end | |
551 | ||
552 | local block_list = any_blanks(arr) | |
553 | local items = split_list_items(arr) | |
554 | local out = "" | |
555 | for _, item in ipairs(items) do | |
556 | out = out .. process_list_item(item, block_list) .. "\n" | |
557 | end | |
558 | if arr[1].list_type == "numeric" then | |
559 | return "<ol>\n" .. out .. "</ol>" | |
560 | else | |
561 | return "<ul>\n" .. out .. "</ul>" | |
562 | end | |
563 | end | |
564 | ||
565 | -- Finds the range of lines composing the first list in the array. A list | |
566 | -- starts with (^ list_item) or (blank list_item) and ends with | |
567 | -- (blank* $) or (blank normal). | |
568 | -- | |
569 | -- A sublist can start with just (list_item) does not need a blank... | |
570 | local function find_list(array, sublist) | |
571 | local function find_list_start(array, sublist) | |
572 | if array[1].type == "list_item" then return 1 end | |
573 | if sublist then | |
574 | for i = 1,#array do | |
575 | if array[i].type == "list_item" then return i end | |
576 | end | |
577 | else | |
578 | for i = 1, #array-1 do | |
579 | if array[i].type == "blank" and array[i+1].type == "list_item" then | |
580 | return i+1 | |
581 | end | |
582 | end | |
583 | end | |
584 | return nil | |
585 | end | |
586 | local function find_list_end(array, start) | |
587 | local pos = #array | |
588 | for i = start, #array-1 do | |
589 | if array[i].type == "blank" and array[i+1].type ~= "list_item" | |
590 | and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then | |
591 | pos = i-1 | |
592 | break | |
593 | end | |
594 | end | |
595 | while pos > start and array[pos].type == "blank" do | |
596 | pos = pos - 1 | |
597 | end | |
598 | return pos | |
599 | end | |
600 | ||
601 | local start = find_list_start(array, sublist) | |
602 | if not start then return nil end | |
603 | return start, find_list_end(array, start) | |
604 | end | |
605 | ||
606 | while true do | |
607 | local start, stop = find_list(array, sublist) | |
608 | if not start then break end | |
609 | local text = process_list(splice(array, start, stop)) | |
610 | local info = { | |
611 | line = text, | |
612 | type = "raw", | |
613 | html = text | |
614 | } | |
615 | array = splice(array, start, stop, {info}) | |
616 | end | |
617 | ||
618 | -- Convert any remaining list items to normal | |
619 | for _,line in ipairs(array) do | |
620 | if line.type == "list_item" then line.type = "normal" end | |
621 | end | |
622 | ||
623 | return array | |
624 | end | |
625 | ||
626 | -- Find and convert blockquote markers. | |
627 | function blockquotes(lines) | |
628 | local function find_blockquote(lines) | |
629 | local start | |
630 | for i,line in ipairs(lines) do | |
631 | if line.type == "blockquote" then | |
632 | start = i | |
633 | break | |
634 | end | |
635 | end | |
636 | if not start then return nil end | |
637 | ||
638 | local stop = #lines | |
639 | for i = start+1, #lines do | |
640 | if lines[i].type == "blank" or lines[i].type == "blockquote" then | |
641 | elseif lines[i].type == "normal" then | |
642 | if lines[i-1].type == "blank" then stop = i-1 break end | |
643 | else | |
644 | stop = i-1 break | |
645 | end | |
646 | end | |
647 | while lines[stop].type == "blank" do stop = stop - 1 end | |
648 | return start, stop | |
649 | end | |
650 | ||
651 | local function process_blockquote(lines) | |
652 | local raw = lines[1].text | |
653 | for i = 2,#lines do | |
654 | raw = raw .. "\n" .. lines[i].text | |
655 | end | |
656 | local bt = block_transform(raw) | |
657 | if not bt:find("<pre>") then bt = indent(bt) end | |
658 | return "<blockquote>\n " .. bt .. | |
659 | "\n</blockquote>" | |
660 | end | |
661 | ||
662 | while true do | |
663 | local start, stop = find_blockquote(lines) | |
664 | if not start then break end | |
665 | local text = process_blockquote(splice(lines, start, stop)) | |
666 | local info = { | |
667 | line = text, | |
668 | type = "raw", | |
669 | html = text | |
670 | } | |
671 | lines = splice(lines, start, stop, {info}) | |
672 | end | |
673 | return lines | |
674 | end | |
675 | ||
676 | -- Find and convert codeblocks. | |
677 | function codeblocks(lines) | |
678 | local function find_codeblock(lines) | |
679 | local start | |
680 | for i,line in ipairs(lines) do | |
681 | if line.type == "indented" then start = i break end | |
682 | end | |
683 | if not start then return nil end | |
684 | ||
685 | local stop = #lines | |
686 | for i = start+1, #lines do | |
687 | if lines[i].type ~= "indented" and lines[i].type ~= "blank" then | |
688 | stop = i-1 | |
689 | break | |
690 | end | |
691 | end | |
692 | while lines[stop].type == "blank" do stop = stop - 1 end | |
693 | return start, stop | |
694 | end | |
695 | ||
696 | local function process_codeblock(lines) | |
697 | local raw = detab(encode_code(outdent(lines[1].line))) | |
698 | for i = 2,#lines do | |
699 | raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line))) | |
700 | end | |
701 | return "<pre><code>" .. raw .. "\n</code></pre>" | |
702 | end | |
703 | ||
704 | while true do | |
705 | local start, stop = find_codeblock(lines) | |
706 | if not start then break end | |
707 | local text = process_codeblock(splice(lines, start, stop)) | |
708 | local info = { | |
709 | line = text, | |
710 | type = "raw", | |
711 | html = text | |
712 | } | |
713 | lines = splice(lines, start, stop, {info}) | |
714 | end | |
715 | return lines | |
716 | end | |
717 | ||
718 | -- Convert lines to html code | |
719 | function blocks_to_html(lines, no_paragraphs) | |
720 | local out = {} | |
721 | local i = 1 | |
722 | while i <= #lines do | |
723 | local line = lines[i] | |
724 | if line.type == "ruler" then | |
725 | table.insert(out, "<hr/>") | |
726 | elseif line.type == "raw" then | |
727 | table.insert(out, line.html) | |
728 | elseif line.type == "normal" then | |
729 | local s = line.line | |
730 | ||
731 | while i+1 <= #lines and lines[i+1].type == "normal" do | |
732 | i = i + 1 | |
733 | s = s .. "\n" .. lines[i].line | |
734 | end | |
735 | ||
736 | if no_paragraphs then | |
737 | table.insert(out, span_transform(s)) | |
738 | else | |
739 | table.insert(out, "<p>" .. span_transform(s) .. "</p>") | |
740 | end | |
741 | elseif line.type == "header" then | |
742 | local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">" | |
743 | table.insert(out, s) | |
744 | else | |
745 | table.insert(out, line.line) | |
746 | end | |
747 | i = i + 1 | |
748 | end | |
749 | return out | |
750 | end | |
751 | ||
752 | -- Perform all the block level transforms | |
753 | function block_transform(text, sublist) | |
754 | local lines = split(text) | |
755 | lines = map(lines, classify) | |
756 | lines = headers(lines) | |
757 | lines = lists(lines, sublist) | |
758 | lines = codeblocks(lines) | |
759 | lines = blockquotes(lines) | |
760 | lines = blocks_to_html(lines) | |
761 | local text = table.concat(lines, "\n") | |
762 | return text | |
763 | end | |
764 | ||
765 | -- Debug function for printing a line array to see the result | |
766 | -- of partial transforms. | |
767 | function print_lines(lines) | |
768 | for i, line in ipairs(lines) do | |
769 | print(i, line.type, line.text or line.line) | |
770 | end | |
771 | end | |
772 | ||
773 | ---------------------------------------------------------------------- | |
774 | -- Span transform | |
775 | ---------------------------------------------------------------------- | |
776 | ||
777 | -- Functions for transforming the text at the span level. | |
778 | ||
779 | -- These characters may need to be escaped because they have a special | |
780 | -- meaning in markdown. | |
781 | escape_chars = "'\\`*_{}[]()>#+-.!'" | |
782 | escape_table = {} | |
783 | ||
784 | function init_escape_table() | |
785 | escape_table = {} | |
786 | for i = 1,#escape_chars do | |
787 | local c = escape_chars:sub(i,i) | |
788 | escape_table[c] = hash(c) | |
789 | end | |
790 | end | |
791 | ||
792 | -- Adds a new escape to the escape table. | |
793 | function add_escape(text) | |
794 | if not escape_table[text] then | |
795 | escape_table[text] = hash(text) | |
796 | end | |
797 | return escape_table[text] | |
798 | end | |
799 | ||
800 | -- Escape characters that should not be disturbed by markdown. | |
801 | function escape_special_chars(text) | |
802 | local tokens = tokenize_html(text) | |
803 | ||
804 | local out = "" | |
805 | for _, token in ipairs(tokens) do | |
806 | local t = token.text | |
807 | if token.type == "tag" then | |
808 | -- In tags, encode * and _ so they don't conflict with their use in markdown. | |
809 | t = t:gsub("%*", escape_table["*"]) | |
810 | t = t:gsub("%_", escape_table["_"]) | |
811 | else | |
812 | t = encode_backslash_escapes(t) | |
813 | end | |
814 | out = out .. t | |
815 | end | |
816 | return out | |
817 | end | |
818 | ||
819 | -- Encode backspace-escaped characters in the markdown source. | |
820 | function encode_backslash_escapes(t) | |
821 | for i=1,escape_chars:len() do | |
822 | local c = escape_chars:sub(i,i) | |
823 | t = t:gsub("\\%" .. c, escape_table[c]) | |
824 | end | |
825 | return t | |
826 | end | |
827 | ||
828 | -- Unescape characters that have been encoded. | |
829 | function unescape_special_chars(t) | |
830 | local tin = t | |
831 | for k,v in pairs(escape_table) do | |
832 | k = k:gsub("%%", "%%%%") | |
833 | t = t:gsub(v,k) | |
834 | end | |
835 | if t ~= tin then t = unescape_special_chars(t) end | |
836 | return t | |
837 | end | |
838 | ||
839 | -- Encode/escape certain characters inside Markdown code runs. | |
840 | -- The point is that in code, these characters are literals, | |
841 | -- and lose their special Markdown meanings. | |
842 | function encode_code(s) | |
843 | s = s:gsub("%&", "&") | |
844 | s = s:gsub("<", "<") | |
845 | s = s:gsub(">", ">") | |
846 | for k,v in pairs(escape_table) do | |
847 | s = s:gsub("%"..k, v) | |
848 | end | |
849 | return s | |
850 | end | |
851 | ||
852 | -- Handle backtick blocks. | |
853 | function code_spans(s) | |
854 | s = s:gsub("\\\\", escape_table["\\"]) | |
855 | s = s:gsub("\\`", escape_table["`"]) | |
856 | ||
857 | local pos = 1 | |
858 | while true do | |
859 | local start, stop = s:find("`+", pos) | |
860 | if not start then return s end | |
861 | local count = stop - start + 1 | |
862 | -- Find a matching numbert of backticks | |
863 | local estart, estop = s:find(string.rep("`", count), stop+1) | |
864 | local brstart = s:find("\n", stop+1) | |
865 | if estart and (not brstart or estart < brstart) then | |
866 | local code = s:sub(stop+1, estart-1) | |
867 | code = code:gsub("^[ \t]+", "") | |
868 | code = code:gsub("[ \t]+$", "") | |
869 | code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"]) | |
870 | code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"]) | |
871 | code = "<code>" .. encode_code(code) .. "</code>" | |
872 | code = add_escape(code) | |
873 | s = s:sub(1, start-1) .. code .. s:sub(estop+1) | |
874 | pos = start + code:len() | |
875 | else | |
876 | pos = stop + 1 | |
877 | end | |
878 | end | |
879 | return s | |
880 | end | |
881 | ||
882 | -- Encode alt text... enodes &, and ". | |
883 | function encode_alt(s) | |
884 | if not s then return s end | |
885 | s = s:gsub('&', '&') | |
886 | s = s:gsub('"', '"') | |
887 | s = s:gsub('<', '<') | |
888 | return s | |
889 | end | |
890 | ||
891 | -- Handle image references | |
892 | function images(text) | |
893 | local function reference_link(alt, id) | |
894 | alt = encode_alt(alt:match("%b[]"):sub(2,-2)) | |
895 | id = id:match("%[(.*)%]"):lower() | |
896 | if id == "" then id = text:lower() end | |
897 | link_database[id] = link_database[id] or {} | |
898 | if not link_database[id].url then return nil end | |
899 | local url = link_database[id].url or id | |
900 | url = encode_alt(url) | |
901 | local title = encode_alt(link_database[id].title) | |
902 | if title then title = " title=\"" .. title .. "\"" else title = "" end | |
903 | return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>") | |
904 | end | |
905 | ||
906 | local function inline_link(alt, link) | |
907 | alt = encode_alt(alt:match("%b[]"):sub(2,-2)) | |
908 | local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") | |
909 | url = url or link:match("%(<?(.-)>?%)") | |
910 | url = encode_alt(url) | |
911 | title = encode_alt(title) | |
912 | if title then | |
913 | return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>') | |
914 | else | |
915 | return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>') | |
916 | end | |
917 | end | |
918 | ||
919 | text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) | |
920 | text = text:gsub("!(%b[])(%b())", inline_link) | |
921 | return text | |
922 | end | |
923 | ||
924 | -- Handle anchor references | |
925 | function anchors(text) | |
926 | local function reference_link(text, id) | |
927 | text = text:match("%b[]"):sub(2,-2) | |
928 | id = id:match("%b[]"):sub(2,-2):lower() | |
929 | if id == "" then id = text:lower() end | |
930 | link_database[id] = link_database[id] or {} | |
931 | if not link_database[id].url then return nil end | |
932 | local url = link_database[id].url or id | |
933 | url = encode_alt(url) | |
934 | local title = encode_alt(link_database[id].title) | |
935 | if title then title = " title=\"" .. title .. "\"" else title = "" end | |
936 | return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>") | |
937 | end | |
938 | ||
939 | local function inline_link(text, link) | |
940 | text = text:match("%b[]"):sub(2,-2) | |
941 | local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") | |
942 | title = encode_alt(title) | |
943 | url = url or link:match("%(<?(.-)>?%)") or "" | |
944 | url = encode_alt(url) | |
945 | if title then | |
946 | return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>" | |
947 | else | |
948 | return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>") | |
949 | end | |
950 | end | |
951 | ||
952 | text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) | |
953 | text = text:gsub("(%b[])(%b())", inline_link) | |
954 | return text | |
955 | end | |
956 | ||
957 | -- Handle auto links, i.e. <http://www.google.com/>. | |
958 | function auto_links(text) | |
959 | local function link(s) | |
960 | return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>" | |
961 | end | |
962 | -- Encode chars as a mix of dec and hex entitites to (perhaps) fool | |
963 | -- spambots. | |
964 | local function encode_email_address(s) | |
965 | -- Use a deterministic encoding to make unit testing possible. | |
966 | -- Code 45% hex, 45% dec, 10% plain. | |
967 | local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45} | |
968 | local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45} | |
969 | local plain = {code = function(c) return c end, count = 0, rate = 0.1} | |
970 | local codes = {hex, dec, plain} | |
971 | local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end | |
972 | ||
973 | local out = "" | |
974 | for i = 1,s:len() do | |
975 | for _,code in ipairs(codes) do code.count = code.count + code.rate end | |
976 | if codes[1].count < codes[2].count then swap(codes,1,2) end | |
977 | if codes[2].count < codes[3].count then swap(codes,2,3) end | |
978 | if codes[1].count < codes[2].count then swap(codes,1,2) end | |
979 | ||
980 | local code = codes[1] | |
981 | local c = s:sub(i,i) | |
982 | -- Force encoding of "@" to make email address more invisible. | |
983 | if c == "@" and code == plain then code = codes[2] end | |
984 | out = out .. code.code(c) | |
985 | code.count = code.count - 1 | |
986 | end | |
987 | return out | |
988 | end | |
989 | local function mail(s) | |
990 | s = unescape_special_chars(s) | |
991 | local address = encode_email_address("mailto:" .. s) | |
992 | local text = encode_email_address(s) | |
993 | return add_escape("<a href=\"" .. address .. "\">") .. text .. "</a>" | |
994 | end | |
995 | -- links | |
996 | text = text:gsub("<(https?:[^'\">%s]+)>", link) | |
997 | text = text:gsub("<(ftp:[^'\">%s]+)>", link) | |
998 | ||
999 | ||
1000 | text = text:gsub("<mailto:([^'\">%s]+)>", mail) | |
1001 | text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) | |
1002 | return text | |
1003 | end | |
1004 | ||
1005 | -- Encode free standing amps (&) and angles (<)... note that this does not | |
1006 | -- encode free >. | |
1007 | function amps_and_angles(s) | |
1008 | -- encode amps not part of &..; expression | |
1009 | local pos = 1 | |
1010 | while true do | |
1011 | local amp = s:find("&", pos) | |
1012 | if not amp then break end | |
1013 | local semi = s:find(";", amp+1) | |
1014 | local stop = s:find("[ \t\n&]", amp+1) | |
1015 | if not semi or (stop and stop < semi) or (semi - amp) > 15 then | |
1016 | s = s:sub(1,amp-1) .. "&" .. s:sub(amp+1) | |
1017 | pos = amp+1 | |
1018 | else | |
1019 | pos = amp+1 | |
1020 | end | |
1021 | end | |
1022 | ||
1023 | -- encode naked <'s | |
1024 | s = s:gsub("<([^a-zA-Z/?$!])", "<%1") | |
1025 | s = s:gsub("<$", "<") | |
1026 | ||
1027 | -- what about >, nothing done in the original markdown source to handle them | |
1028 | return s | |
1029 | end | |
1030 | ||
1031 | -- Handles emphasis markers (* and _) in the text. | |
1032 | function emphasis(text) | |
1033 | for _, s in ipairs {"%*%*", "%_%_"} do | |
1034 | text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>") | |
1035 | text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>") | |
1036 | end | |
1037 | for _, s in ipairs {"%*", "%_"} do | |
1038 | text = text:gsub(s .. "([^%s_])" .. s, "<em>%1</em>") | |
1039 | text = text:gsub(s .. "(<strong>[^%s_]</strong>)" .. s, "<em>%1</em>") | |
1040 | text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "<em>%1</em>") | |
1041 | text = text:gsub(s .. "([^<>_]-<strong>[^<>_]-</strong>[^<>_]-)" .. s, "<em>%1</em>") | |
1042 | end | |
1043 | return text | |
1044 | end | |
1045 | ||
1046 | -- Handles line break markers in the text. | |
1047 | function line_breaks(text) | |
1048 | return text:gsub(" +\n", " <br/>\n") | |
1049 | end | |
1050 | ||
1051 | -- Perform all span level transforms. | |
1052 | function span_transform(text) | |
1053 | text = code_spans(text) | |
1054 | text = escape_special_chars(text) | |
1055 | text = images(text) | |
1056 | text = anchors(text) | |
1057 | text = auto_links(text) | |
1058 | text = amps_and_angles(text) | |
1059 | text = emphasis(text) | |
1060 | text = line_breaks(text) | |
1061 | return text | |
1062 | end | |
1063 | ||
1064 | ---------------------------------------------------------------------- | |
1065 | -- Markdown | |
1066 | ---------------------------------------------------------------------- | |
1067 | ||
1068 | -- Cleanup the text by normalizing some possible variations to make further | |
1069 | -- processing easier. | |
1070 | function cleanup(text) | |
1071 | -- Standardize line endings | |
1072 | text = text:gsub("\r\n", "\n") -- DOS to UNIX | |
1073 | text = text:gsub("\r", "\n") -- Mac to UNIX | |
1074 | ||
1075 | -- Convert all tabs to spaces | |
1076 | text = detab(text) | |
1077 | ||
1078 | -- Strip lines with only spaces and tabs | |
1079 | while true do | |
1080 | local subs | |
1081 | text, subs = text:gsub("\n[ \t]+\n", "\n\n") | |
1082 | if subs == 0 then break end | |
1083 | end | |
1084 | ||
1085 | return "\n" .. text .. "\n" | |
1086 | end | |
1087 | ||
1088 | -- Strips link definitions from the text and stores the data in a lookup table. | |
1089 | function strip_link_definitions(text) | |
1090 | local linkdb = {} | |
1091 | ||
1092 | local function link_def(id, url, title) | |
1093 | id = id:match("%[(.+)%]"):lower() | |
1094 | linkdb[id] = linkdb[id] or {} | |
1095 | linkdb[id].url = url or linkdb[id].url | |
1096 | linkdb[id].title = title or linkdb[id].title | |
1097 | return "" | |
1098 | end | |
1099 | ||
1100 | local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*<?([^%s>]+)>?[ \t]*" | |
1101 | local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" | |
1102 | local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" | |
1103 | local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" | |
1104 | ||
1105 | text = text:gsub(def_title1, link_def) | |
1106 | text = text:gsub(def_title2, link_def) | |
1107 | text = text:gsub(def_title3, link_def) | |
1108 | text = text:gsub(def_no_title, link_def) | |
1109 | return text, linkdb | |
1110 | end | |
1111 | ||
1112 | link_database = {} | |
1113 | ||
1114 | -- Main markdown processing function | |
1115 | function markdown(text) | |
1116 | init_hash(text) | |
1117 | init_escape_table() | |
1118 | ||
1119 | text = cleanup(text) | |
1120 | text = protect(text) | |
1121 | text, link_database = strip_link_definitions(text) | |
1122 | text = block_transform(text) | |
1123 | text = unescape_special_chars(text) | |
1124 | return text | |
1125 | end | |
1126 | ||
1127 | ---------------------------------------------------------------------- | |
1128 | -- End of module | |
1129 | ---------------------------------------------------------------------- | |
1130 | ||
1131 | setfenv(1, _G) | |
1132 | M.lock(M) | |
1133 | ||
1134 | -- Expose markdown function to the world | |
1135 | markdown = M.markdown | |
1136 | ||
1137 | -- Class for parsing command-line options | |
1138 | local OptionParser = {} | |
1139 | OptionParser.__index = OptionParser | |
1140 | ||
1141 | -- Creates a new option parser | |
1142 | function OptionParser:new() | |
1143 | local o = {short = {}, long = {}} | |
1144 | setmetatable(o, self) | |
1145 | return o | |
1146 | end | |
1147 | ||
1148 | -- Calls f() whenever a flag with specified short and long name is encountered | |
1149 | function OptionParser:flag(short, long, f) | |
1150 | local info = {type = "flag", f = f} | |
1151 | if short then self.short[short] = info end | |
1152 | if long then self.long[long] = info end | |
1153 | end | |
1154 | ||
1155 | -- Calls f(param) whenever a parameter flag with specified short and long name is encountered | |
1156 | function OptionParser:param(short, long, f) | |
1157 | local info = {type = "param", f = f} | |
1158 | if short then self.short[short] = info end | |
1159 | if long then self.long[long] = info end | |
1160 | end | |
1161 | ||
1162 | -- Calls f(v) for each non-flag argument | |
1163 | function OptionParser:arg(f) | |
1164 | self.arg = f | |
1165 | end | |
1166 | ||
1167 | -- Runs the option parser for the specified set of arguments. Returns true if all arguments | |
1168 | -- where successfully parsed and false otherwise. | |
1169 | function OptionParser:run(args) | |
1170 | local pos = 1 | |
1171 | while pos <= #args do | |
1172 | local arg = args[pos] | |
1173 | if arg == "--" then | |
1174 | for i=pos+1,#args do | |
1175 | if self.arg then self.arg(args[i]) end | |
1176 | return true | |
1177 | end | |
1178 | end | |
1179 | if arg:match("^%-%-") then | |
1180 | local info = self.long[arg:sub(3)] | |
1181 | if not info then print("Unknown flag: " .. arg) return false end | |
1182 | if info.type == "flag" then | |
1183 | info.f() | |
1184 | pos = pos + 1 | |
1185 | else | |
1186 | param = args[pos+1] | |
1187 | if not param then print("No parameter for flag: " .. arg) return false end | |
1188 | info.f(param) | |
1189 | pos = pos+2 | |
1190 | end | |
1191 | elseif arg:match("^%-") then | |
1192 | for i=2,arg:len() do | |
1193 | local c = arg:sub(i,i) | |
1194 | local info = self.short[c] | |
1195 | if not info then print("Unknown flag: -" .. c) return false end | |
1196 | if info.type == "flag" then | |
1197 | info.f() | |
1198 | else | |
1199 | if i == arg:len() then | |
1200 | param = args[pos+1] | |
1201 | if not param then print("No parameter for flag: -" .. c) return false end | |
1202 | info.f(param) | |
1203 | pos = pos + 1 | |
1204 | else | |
1205 | param = arg:sub(i+1) | |
1206 | info.f(param) | |
1207 | end | |
1208 | break | |
1209 | end | |
1210 | end | |
1211 | pos = pos + 1 | |
1212 | else | |
1213 | if self.arg then self.arg(arg) end | |
1214 | pos = pos + 1 | |
1215 | end | |
1216 | end | |
1217 | return true | |
1218 | end | |
1219 | ||
1220 | -- Handles the case when markdown is run from the command line | |
1221 | local function run_command_line(arg) | |
1222 | -- Generate output for input s given options | |
1223 | local function run(s, options) | |
1224 | s = markdown(s) | |
1225 | if not options.wrap_header then return s end | |
1226 | local header = "" | |
1227 | if options.header then | |
1228 | local f = io.open(options.header) or error("Could not open file: " .. options.header) | |
1229 | header = f:read("*a") | |
1230 | f:close() | |
1231 | else | |
1232 | header = [[ | |
1233 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
1234 | <html> | |
1235 | <head> | |
1236 | <meta http-equiv="content-type" content="text/html; charset=CHARSET" /> | |
1237 | <title>TITLE</title> | |
1238 | <link rel="stylesheet" type="text/css" href="STYLESHEET" /> | |
1239 | </head> | |
1240 | <body> | |
1241 | ]] | |
1242 | local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or | |
1243 | s:match("<h3>(.-)</h3>") or "Untitled" | |
1244 | header = header:gsub("TITLE", title) | |
1245 | if options.inline_style then | |
1246 | local style = "" | |
1247 | local f = io.open(options.stylesheet) | |
1248 | if f then | |
1249 | style = f:read("*a") f:close() | |
1250 | else | |
1251 | error("Could not include style sheet " .. options.stylesheet .. ": File not found") | |
1252 | end | |
1253 | header = header:gsub('<link rel="stylesheet" type="text/css" href="STYLESHEET" />', | |
1254 | "<style type=\"text/css\"><!--\n" .. style .. "\n--></style>") | |
1255 | else | |
1256 | header = header:gsub("STYLESHEET", options.stylesheet) | |
1257 | end | |
1258 | header = header:gsub("CHARSET", options.charset) | |
1259 | end | |
1260 | local footer = "</body></html>" | |
1261 | if options.footer then | |
1262 | local f = io.open(options.footer) or error("Could not open file: " .. options.footer) | |
1263 | footer = f:read("*a") | |
1264 | f:close() | |
1265 | end | |
1266 | return header .. s .. footer | |
1267 | end | |
1268 | ||
1269 | -- Generate output path name from input path name given options. | |
1270 | local function outpath(path, options) | |
1271 | if options.append then return path .. ".html" end | |
1272 | local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end | |
1273 | m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end | |
1274 | return path .. ".html" | |
1275 | end | |
1276 | ||
1277 | -- Default commandline options | |
1278 | local options = { | |
1279 | wrap_header = true, | |
1280 | header = nil, | |
1281 | footer = nil, | |
1282 | charset = "utf-8", | |
1283 | title = nil, | |
1284 | stylesheet = "default.css", | |
1285 | inline_style = false | |
1286 | } | |
1287 | local help = [[ | |
1288 | Usage: markdown.lua [OPTION] [FILE] | |
1289 | Runs the markdown text markup to HTML converter on each file specified on the | |
1290 | command line. If no files are specified, runs on standard input. | |
1291 | ||
1292 | No header: | |
1293 | -n, --no-wrap Don't wrap the output in <html>... tags. | |
1294 | Custom header: | |
1295 | -e, --header FILE Use content of FILE for header. | |
1296 | -f, --footer FILE Use content of FILE for footer. | |
1297 | Generated header: | |
1298 | -c, --charset SET Specifies charset (default utf-8). | |
1299 | -i, --title TITLE Specifies title (default from first <h1> tag). | |
1300 | -s, --style STYLE Specifies style sheet file (default default.css). | |
1301 | -l, --inline-style Include the style sheet file inline in the header. | |
1302 | Generated files: | |
1303 | -a, --append Append .html extension (instead of replacing). | |
1304 | Other options: | |
1305 | -h, --help Print this help text. | |
1306 | -t, --test Run the unit tests. | |
1307 | ]] | |
1308 | ||
1309 | local run_stdin = true | |
1310 | local op = OptionParser:new() | |
1311 | op:flag("n", "no-wrap", function () options.wrap_header = false end) | |
1312 | op:param("e", "header", function (x) options.header = x end) | |
1313 | op:param("f", "footer", function (x) options.footer = x end) | |
1314 | op:param("c", "charset", function (x) options.charset = x end) | |
1315 | op:param("i", "title", function(x) options.title = x end) | |
1316 | op:param("s", "style", function(x) options.stylesheet = x end) | |
1317 | op:flag("l", "inline-style", function(x) options.inline_style = true end) | |
1318 | op:flag("a", "append", function() options.append = true end) | |
1319 | op:flag("t", "test", function() | |
1320 | local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") | |
1321 | local f = io.open(n) | |
1322 | if f then | |
1323 | f:close() dofile(n) | |
1324 | else | |
1325 | error("Cannot find markdown-tests.lua") | |
1326 | end | |
1327 | run_stdin = false | |
1328 | end) | |
1329 | op:flag("h", "help", function() print(help) run_stdin = false end) | |
1330 | op:arg(function(path) | |
1331 | local file = io.open(path) or error("Could not open file: " .. path) | |
1332 | local s = file:read("*a") | |
1333 | file:close() | |
1334 | s = run(s, options) | |
1335 | file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options)) | |
1336 | file:write(s) | |
1337 | file:close() | |
1338 | run_stdin = false | |
1339 | end | |
1340 | ) | |
1341 | ||
1342 | if not op:run(arg) then | |
1343 | print(help) | |
1344 | run_stdin = false | |
1345 | end | |
1346 | ||
1347 | if run_stdin then | |
1348 | local s = io.read("*a") | |
1349 | s = run(s, options) | |
1350 | io.write(s) | |
1351 | end | |
1352 | end | |
1353 | ||
1354 | -- If we are being run from the command-line, act accordingly | |
1355 | if arg and arg[0]:find("markdown%.lua$") then | |
1356 | run_command_line(arg) | |
1357 | else | |
1358 | return markdown | |
1359 | end |