|
1 #!/usr/bin/env lua |
|
2 |
|
3 --[[ |
|
4 # markdown.lua -- version 0.32 |
|
5 |
|
6 <http://www.frykholm.se/files/markdown.lua> |
|
7 |
|
8 **Author:** Niklas Frykholm, <niklas@frykholm.se> |
|
9 **Date:** 31 May 2008 |
|
10 |
|
11 This is an implementation of the popular text markup language Markdown in pure Lua. |
|
12 Markdown can convert documents written in a simple and easy to read text format |
|
13 to well-formatted HTML. For a more thourough description of Markdown and the Markdown |
|
14 syntax, see <http://daringfireball.net/projects/markdown>. |
|
15 |
|
16 The original Markdown source is written in Perl and makes heavy use of advanced |
|
17 regular expression techniques (such as negative look-ahead, etc) which are not available |
|
18 in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground |
|
19 up. It is probably not completely bug free. If you notice any bugs, please report them to |
|
20 me. A unit test that exposes the error is helpful. |
|
21 |
|
22 ## Usage |
|
23 |
|
24 require "markdown" |
|
25 markdown(source) |
|
26 |
|
27 ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the |
|
28 Markdown transformation to the specified string. |
|
29 |
|
30 ``markdown.lua`` can also be used directly from the command line: |
|
31 |
|
32 lua markdown.lua test.md |
|
33 |
|
34 Creates a file ``test.html`` with the converted content of ``test.md``. Run: |
|
35 |
|
36 lua markdown.lua -h |
|
37 |
|
38 For a description of the command-line options. |
|
39 |
|
40 ``markdown.lua`` uses the same license as Lua, the MIT license. |
|
41 |
|
42 ## License |
|
43 |
|
44 Copyright © 2008 Niklas Frykholm. |
|
45 |
|
46 Permission is hereby granted, free of charge, to any person obtaining a copy of this |
|
47 software and associated documentation files (the "Software"), to deal in the Software |
|
48 without restriction, including without limitation the rights to use, copy, modify, merge, |
|
49 publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons |
|
50 to whom the Software is furnished to do so, subject to the following conditions: |
|
51 |
|
52 The above copyright notice and this permission notice shall be included in all copies |
|
53 or substantial portions of the Software. |
|
54 |
|
55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
56 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
57 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
58 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
59 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
60 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
61 THE SOFTWARE. |
|
62 |
|
63 ## Version history |
|
64 |
|
65 - **0.32** -- 31 May 2008 |
|
66 - Fix for links containing brackets |
|
67 - **0.31** -- 1 Mar 2008 |
|
68 - Fix for link definitions followed by spaces |
|
69 - **0.30** -- 25 Feb 2008 |
|
70 - Consistent behavior with Markdown when the same link reference is reused |
|
71 - **0.29** -- 24 Feb 2008 |
|
72 - Fix for <pre> blocks with spaces in them |
|
73 - **0.28** -- 18 Feb 2008 |
|
74 - Fix for link encoding |
|
75 - **0.27** -- 14 Feb 2008 |
|
76 - Fix for link database links with () |
|
77 - **0.26** -- 06 Feb 2008 |
|
78 - Fix for nested italic and bold markers |
|
79 - **0.25** -- 24 Jan 2008 |
|
80 - Fix for encoding of naked < |
|
81 - **0.24** -- 21 Jan 2008 |
|
82 - Fix for link behavior. |
|
83 - **0.23** -- 10 Jan 2008 |
|
84 - Fix for a regression bug in longer expressions in italic or bold. |
|
85 - **0.22** -- 27 Dec 2007 |
|
86 - Fix for crash when processing blocks with a percent sign in them. |
|
87 - **0.21** -- 27 Dec 2007 |
|
88 - Fix for combined strong and emphasis tags |
|
89 - **0.20** -- 13 Oct 2007 |
|
90 - Fix for < as well in image titles, now matches Dingus behavior |
|
91 - **0.19** -- 28 Sep 2007 |
|
92 - Fix for quotation marks " and ampersands & in link and image titles. |
|
93 - **0.18** -- 28 Jul 2007 |
|
94 - Does not crash on unmatched tags (behaves like standard markdown) |
|
95 - **0.17** -- 12 Apr 2007 |
|
96 - Fix for links with %20 in them. |
|
97 - **0.16** -- 12 Apr 2007 |
|
98 - Do not require arg global to exist. |
|
99 - **0.15** -- 28 Aug 2006 |
|
100 - Better handling of links with underscores in them. |
|
101 - **0.14** -- 22 Aug 2006 |
|
102 - Bug for *`foo()`* |
|
103 - **0.13** -- 12 Aug 2006 |
|
104 - Added -l option for including stylesheet inline in document. |
|
105 - Fixed bug in -s flag. |
|
106 - Fixed emphasis bug. |
|
107 - **0.12** -- 15 May 2006 |
|
108 - Fixed several bugs to comply with MarkdownTest 1.0 <http://six.pairlist.net/pipermail/markdown-discuss/2004-December/000909.html> |
|
109 - **0.11** -- 12 May 2006 |
|
110 - Fixed bug for escaping `*` and `_` inside code spans. |
|
111 - Added license terms. |
|
112 - Changed join() to table.concat(). |
|
113 - **0.10** -- 3 May 2006 |
|
114 - Initial public release. |
|
115 |
|
116 // Niklas |
|
117 ]] |
|
118 |
|
119 |
|
120 -- Set up a table for holding local functions to avoid polluting the global namespace |
|
121 local M = {} |
|
122 local MT = {__index = _G} |
|
123 setmetatable(M, MT) |
|
124 setfenv(1, M) |
|
125 |
|
126 ---------------------------------------------------------------------- |
|
127 -- Utility functions |
|
128 ---------------------------------------------------------------------- |
|
129 |
|
130 -- Locks table t from changes, writes an error if someone attempts to change the table. |
|
131 -- This is useful for detecting variables that have "accidently" been made global. Something |
|
132 -- I tend to do all too much. |
|
133 function lock(t) |
|
134 function lock_new_index(t, k, v) |
|
135 error("module has been locked -- " .. k .. " must be declared local", 2) |
|
136 end |
|
137 |
|
138 local mt = {__newindex = lock_new_index} |
|
139 if getmetatable(t) then mt.__index = getmetatable(t).__index end |
|
140 setmetatable(t, mt) |
|
141 end |
|
142 |
|
143 -- Returns the result of mapping the values in table t through the function f |
|
144 function map(t, f) |
|
145 local out = {} |
|
146 for k,v in pairs(t) do out[k] = f(v,k) end |
|
147 return out |
|
148 end |
|
149 |
|
150 -- The identity function, useful as a placeholder. |
|
151 function identity(text) return text end |
|
152 |
|
153 -- Functional style if statement. (NOTE: no short circuit evaluation) |
|
154 function iff(t, a, b) if t then return a else return b end end |
|
155 |
|
156 -- Splits the text into an array of separate lines. |
|
157 function split(text, sep) |
|
158 sep = sep or "\n" |
|
159 local lines = {} |
|
160 local pos = 1 |
|
161 while true do |
|
162 local b,e = text:find(sep, pos) |
|
163 if not b then table.insert(lines, text:sub(pos)) break end |
|
164 table.insert(lines, text:sub(pos, b-1)) |
|
165 pos = e + 1 |
|
166 end |
|
167 return lines |
|
168 end |
|
169 |
|
170 -- Converts tabs to spaces |
|
171 function detab(text) |
|
172 local tab_width = 4 |
|
173 local function rep(match) |
|
174 local spaces = -match:len() |
|
175 while spaces<1 do spaces = spaces + tab_width end |
|
176 return match .. string.rep(" ", spaces) |
|
177 end |
|
178 text = text:gsub("([^\n]-)\t", rep) |
|
179 return text |
|
180 end |
|
181 |
|
182 -- Applies string.find for every pattern in the list and returns the first match |
|
183 function find_first(s, patterns, index) |
|
184 local res = {} |
|
185 for _,p in ipairs(patterns) do |
|
186 local match = {s:find(p, index)} |
|
187 if #match>0 and (#res==0 or match[1] < res[1]) then res = match end |
|
188 end |
|
189 return unpack(res) |
|
190 end |
|
191 |
|
192 -- If a replacement array is specified, the range [start, stop] in the array is replaced |
|
193 -- with the replacement array and the resulting array is returned. Without a replacement |
|
194 -- array the section of the array between start and stop is returned. |
|
195 function splice(array, start, stop, replacement) |
|
196 if replacement then |
|
197 local n = stop - start + 1 |
|
198 while n > 0 do |
|
199 table.remove(array, start) |
|
200 n = n - 1 |
|
201 end |
|
202 for i,v in ipairs(replacement) do |
|
203 table.insert(array, start, v) |
|
204 end |
|
205 return array |
|
206 else |
|
207 local res = {} |
|
208 for i = start,stop do |
|
209 table.insert(res, array[i]) |
|
210 end |
|
211 return res |
|
212 end |
|
213 end |
|
214 |
|
215 -- Outdents the text one step. |
|
216 function outdent(text) |
|
217 text = "\n" .. text |
|
218 text = text:gsub("\n ? ? ?", "\n") |
|
219 text = text:sub(2) |
|
220 return text |
|
221 end |
|
222 |
|
223 -- Indents the text one step. |
|
224 function indent(text) |
|
225 text = text:gsub("\n", "\n ") |
|
226 return text |
|
227 end |
|
228 |
|
229 -- Does a simple tokenization of html data. Returns the data as a list of tokens. |
|
230 -- Each token is a table with a type field (which is either "tag" or "text") and |
|
231 -- a text field (which contains the original token data). |
|
232 function tokenize_html(html) |
|
233 local tokens = {} |
|
234 local pos = 1 |
|
235 while true do |
|
236 local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos) |
|
237 if not start then |
|
238 table.insert(tokens, {type="text", text=html:sub(pos)}) |
|
239 break |
|
240 end |
|
241 if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end |
|
242 |
|
243 local _, stop |
|
244 if html:match("^<!%-%-", start) then |
|
245 _,stop = html:find("%-%->", start) |
|
246 elseif html:match("^<%?", start) then |
|
247 _,stop = html:find("?>", start) |
|
248 else |
|
249 _,stop = html:find("%b<>", start) |
|
250 end |
|
251 if not stop then |
|
252 -- error("Could not match html tag " .. html:sub(start,start+30)) |
|
253 table.insert(tokens, {type="text", text=html:sub(start, start)}) |
|
254 pos = start + 1 |
|
255 else |
|
256 table.insert(tokens, {type="tag", text=html:sub(start, stop)}) |
|
257 pos = stop + 1 |
|
258 end |
|
259 end |
|
260 return tokens |
|
261 end |
|
262 |
|
263 ---------------------------------------------------------------------- |
|
264 -- Hash |
|
265 ---------------------------------------------------------------------- |
|
266 |
|
267 -- This is used to "hash" data into alphanumeric strings that are unique |
|
268 -- in the document. (Note that this is not cryptographic hash, the hash |
|
269 -- function is not one-way.) The hash procedure is used to protect parts |
|
270 -- of the document from further processing. |
|
271 |
|
272 local HASH = { |
|
273 -- Has the hash been inited. |
|
274 inited = false, |
|
275 |
|
276 -- The unique string prepended to all hash values. This is to ensure |
|
277 -- that hash values do not accidently coincide with an actual existing |
|
278 -- string in the document. |
|
279 identifier = "", |
|
280 |
|
281 -- Counter that counts up for each new hash instance. |
|
282 counter = 0, |
|
283 |
|
284 -- Hash table. |
|
285 table = {} |
|
286 } |
|
287 |
|
288 -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere |
|
289 -- in the text. |
|
290 function init_hash(text) |
|
291 HASH.inited = true |
|
292 HASH.identifier = "" |
|
293 HASH.counter = 0 |
|
294 HASH.table = {} |
|
295 |
|
296 local s = "HASH" |
|
297 local counter = 0 |
|
298 local id |
|
299 while true do |
|
300 id = s .. counter |
|
301 if not text:find(id, 1, true) then break end |
|
302 counter = counter + 1 |
|
303 end |
|
304 HASH.identifier = id |
|
305 end |
|
306 |
|
307 -- Returns the hashed value for s. |
|
308 function hash(s) |
|
309 assert(HASH.inited) |
|
310 if not HASH.table[s] then |
|
311 HASH.counter = HASH.counter + 1 |
|
312 local id = HASH.identifier .. HASH.counter .. "X" |
|
313 HASH.table[s] = id |
|
314 end |
|
315 return HASH.table[s] |
|
316 end |
|
317 |
|
318 ---------------------------------------------------------------------- |
|
319 -- Protection |
|
320 ---------------------------------------------------------------------- |
|
321 |
|
322 -- The protection module is used to "protect" parts of a document |
|
323 -- so that they are not modified by subsequent processing steps. |
|
324 -- Protected parts are saved in a table for later unprotection |
|
325 |
|
326 -- Protection data |
|
327 local PD = { |
|
328 -- Saved blocks that have been converted |
|
329 blocks = {}, |
|
330 |
|
331 -- Block level tags that will be protected |
|
332 tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", |
|
333 "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset", |
|
334 "iframe", "math", "ins", "del"} |
|
335 } |
|
336 |
|
337 -- Pattern for matching a block tag that begins and ends in the leftmost |
|
338 -- column and may contain indented subtags, i.e. |
|
339 -- <div> |
|
340 -- A nested block. |
|
341 -- <div> |
|
342 -- Nested data. |
|
343 -- </div> |
|
344 -- </div> |
|
345 function block_pattern(tag) |
|
346 return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n" |
|
347 end |
|
348 |
|
349 -- Pattern for matching a block tag that begins and ends with a newline |
|
350 function line_pattern(tag) |
|
351 return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n" |
|
352 end |
|
353 |
|
354 -- Protects the range of characters from start to stop in the text and |
|
355 -- returns the protected string. |
|
356 function protect_range(text, start, stop) |
|
357 local s = text:sub(start, stop) |
|
358 local h = hash(s) |
|
359 PD.blocks[h] = s |
|
360 text = text:sub(1,start) .. h .. text:sub(stop) |
|
361 return text |
|
362 end |
|
363 |
|
364 -- Protect every part of the text that matches any of the patterns. The first |
|
365 -- matching pattern is protected first, etc. |
|
366 function protect_matches(text, patterns) |
|
367 while true do |
|
368 local start, stop = find_first(text, patterns) |
|
369 if not start then break end |
|
370 text = protect_range(text, start, stop) |
|
371 end |
|
372 return text |
|
373 end |
|
374 |
|
375 -- Protects blocklevel tags in the specified text |
|
376 function protect(text) |
|
377 -- First protect potentially nested block tags |
|
378 text = protect_matches(text, map(PD.tags, block_pattern)) |
|
379 -- Then protect block tags at the line level. |
|
380 text = protect_matches(text, map(PD.tags, line_pattern)) |
|
381 -- Protect <hr> and comment tags |
|
382 text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"}) |
|
383 text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"}) |
|
384 return text |
|
385 end |
|
386 |
|
387 -- Returns true if the string s is a hash resulting from protection |
|
388 function is_protected(s) |
|
389 return PD.blocks[s] |
|
390 end |
|
391 |
|
392 -- Unprotects the specified text by expanding all the nonces |
|
393 function unprotect(text) |
|
394 for k,v in pairs(PD.blocks) do |
|
395 v = v:gsub("%%", "%%%%") |
|
396 text = text:gsub(k, v) |
|
397 end |
|
398 return text |
|
399 end |
|
400 |
|
401 |
|
402 ---------------------------------------------------------------------- |
|
403 -- Block transform |
|
404 ---------------------------------------------------------------------- |
|
405 |
|
406 -- The block transform functions transform the text on the block level. |
|
407 -- They work with the text as an array of lines rather than as individual |
|
408 -- characters. |
|
409 |
|
410 -- Returns true if the line is a ruler of (char) characters. |
|
411 -- The line must contain at least three char characters and contain only spaces and |
|
412 -- char characters. |
|
413 function is_ruler_of(line, char) |
|
414 if not line:match("^[ %" .. char .. "]*$") then return false end |
|
415 if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end |
|
416 return true |
|
417 end |
|
418 |
|
419 -- Identifies the block level formatting present in the line |
|
420 function classify(line) |
|
421 local info = {line = line, text = line} |
|
422 |
|
423 if line:match("^ ") then |
|
424 info.type = "indented" |
|
425 info.outdented = line:sub(5) |
|
426 return info |
|
427 end |
|
428 |
|
429 for _,c in ipairs({'*', '-', '_', '='}) do |
|
430 if is_ruler_of(line, c) then |
|
431 info.type = "ruler" |
|
432 info.ruler_char = c |
|
433 return info |
|
434 end |
|
435 end |
|
436 |
|
437 if line == "" then |
|
438 info.type = "blank" |
|
439 return info |
|
440 end |
|
441 |
|
442 if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then |
|
443 local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") |
|
444 info.type = "header" |
|
445 info.level = m1:len() |
|
446 info.text = m2 |
|
447 return info |
|
448 end |
|
449 |
|
450 if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then |
|
451 local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") |
|
452 info.type = "list_item" |
|
453 info.list_type = "numeric" |
|
454 info.number = 0 + number |
|
455 info.text = text |
|
456 return info |
|
457 end |
|
458 |
|
459 if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then |
|
460 local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") |
|
461 info.type = "list_item" |
|
462 info.list_type = "bullet" |
|
463 info.bullet = bullet |
|
464 info.text= text |
|
465 return info |
|
466 end |
|
467 |
|
468 if line:match("^>[ \t]?(.*)") then |
|
469 info.type = "blockquote" |
|
470 info.text = line:match("^>[ \t]?(.*)") |
|
471 return info |
|
472 end |
|
473 |
|
474 if is_protected(line) then |
|
475 info.type = "raw" |
|
476 info.html = unprotect(line) |
|
477 return info |
|
478 end |
|
479 |
|
480 info.type = "normal" |
|
481 return info |
|
482 end |
|
483 |
|
484 -- Find headers constisting of a normal line followed by a ruler and converts them to |
|
485 -- header entries. |
|
486 function headers(array) |
|
487 local i = 1 |
|
488 while i <= #array - 1 do |
|
489 if array[i].type == "normal" and array[i+1].type == "ruler" and |
|
490 (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then |
|
491 local info = {line = array[i].line} |
|
492 info.text = info.line |
|
493 info.type = "header" |
|
494 info.level = iff(array[i+1].ruler_char == "=", 1, 2) |
|
495 table.remove(array, i+1) |
|
496 array[i] = info |
|
497 end |
|
498 i = i + 1 |
|
499 end |
|
500 return array |
|
501 end |
|
502 |
|
503 -- Find list blocks and convert them to protected data blocks |
|
504 function lists(array, sublist) |
|
505 local function process_list(arr) |
|
506 local function any_blanks(arr) |
|
507 for i = 1, #arr do |
|
508 if arr[i].type == "blank" then return true end |
|
509 end |
|
510 return false |
|
511 end |
|
512 |
|
513 local function split_list_items(arr) |
|
514 local acc = {arr[1]} |
|
515 local res = {} |
|
516 for i=2,#arr do |
|
517 if arr[i].type == "list_item" then |
|
518 table.insert(res, acc) |
|
519 acc = {arr[i]} |
|
520 else |
|
521 table.insert(acc, arr[i]) |
|
522 end |
|
523 end |
|
524 table.insert(res, acc) |
|
525 return res |
|
526 end |
|
527 |
|
528 local function process_list_item(lines, block) |
|
529 while lines[#lines].type == "blank" do |
|
530 table.remove(lines) |
|
531 end |
|
532 |
|
533 local itemtext = lines[1].text |
|
534 for i=2,#lines do |
|
535 itemtext = itemtext .. "\n" .. outdent(lines[i].line) |
|
536 end |
|
537 if block then |
|
538 itemtext = block_transform(itemtext, true) |
|
539 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end |
|
540 return " <li>" .. itemtext .. "</li>" |
|
541 else |
|
542 local lines = split(itemtext) |
|
543 lines = map(lines, classify) |
|
544 lines = lists(lines, true) |
|
545 lines = blocks_to_html(lines, true) |
|
546 itemtext = table.concat(lines, "\n") |
|
547 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end |
|
548 return " <li>" .. itemtext .. "</li>" |
|
549 end |
|
550 end |
|
551 |
|
552 local block_list = any_blanks(arr) |
|
553 local items = split_list_items(arr) |
|
554 local out = "" |
|
555 for _, item in ipairs(items) do |
|
556 out = out .. process_list_item(item, block_list) .. "\n" |
|
557 end |
|
558 if arr[1].list_type == "numeric" then |
|
559 return "<ol>\n" .. out .. "</ol>" |
|
560 else |
|
561 return "<ul>\n" .. out .. "</ul>" |
|
562 end |
|
563 end |
|
564 |
|
565 -- Finds the range of lines composing the first list in the array. A list |
|
566 -- starts with (^ list_item) or (blank list_item) and ends with |
|
567 -- (blank* $) or (blank normal). |
|
568 -- |
|
569 -- A sublist can start with just (list_item) does not need a blank... |
|
570 local function find_list(array, sublist) |
|
571 local function find_list_start(array, sublist) |
|
572 if array[1].type == "list_item" then return 1 end |
|
573 if sublist then |
|
574 for i = 1,#array do |
|
575 if array[i].type == "list_item" then return i end |
|
576 end |
|
577 else |
|
578 for i = 1, #array-1 do |
|
579 if array[i].type == "blank" and array[i+1].type == "list_item" then |
|
580 return i+1 |
|
581 end |
|
582 end |
|
583 end |
|
584 return nil |
|
585 end |
|
586 local function find_list_end(array, start) |
|
587 local pos = #array |
|
588 for i = start, #array-1 do |
|
589 if array[i].type == "blank" and array[i+1].type ~= "list_item" |
|
590 and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then |
|
591 pos = i-1 |
|
592 break |
|
593 end |
|
594 end |
|
595 while pos > start and array[pos].type == "blank" do |
|
596 pos = pos - 1 |
|
597 end |
|
598 return pos |
|
599 end |
|
600 |
|
601 local start = find_list_start(array, sublist) |
|
602 if not start then return nil end |
|
603 return start, find_list_end(array, start) |
|
604 end |
|
605 |
|
606 while true do |
|
607 local start, stop = find_list(array, sublist) |
|
608 if not start then break end |
|
609 local text = process_list(splice(array, start, stop)) |
|
610 local info = { |
|
611 line = text, |
|
612 type = "raw", |
|
613 html = text |
|
614 } |
|
615 array = splice(array, start, stop, {info}) |
|
616 end |
|
617 |
|
618 -- Convert any remaining list items to normal |
|
619 for _,line in ipairs(array) do |
|
620 if line.type == "list_item" then line.type = "normal" end |
|
621 end |
|
622 |
|
623 return array |
|
624 end |
|
625 |
|
626 -- Find and convert blockquote markers. |
|
627 function blockquotes(lines) |
|
628 local function find_blockquote(lines) |
|
629 local start |
|
630 for i,line in ipairs(lines) do |
|
631 if line.type == "blockquote" then |
|
632 start = i |
|
633 break |
|
634 end |
|
635 end |
|
636 if not start then return nil end |
|
637 |
|
638 local stop = #lines |
|
639 for i = start+1, #lines do |
|
640 if lines[i].type == "blank" or lines[i].type == "blockquote" then |
|
641 elseif lines[i].type == "normal" then |
|
642 if lines[i-1].type == "blank" then stop = i-1 break end |
|
643 else |
|
644 stop = i-1 break |
|
645 end |
|
646 end |
|
647 while lines[stop].type == "blank" do stop = stop - 1 end |
|
648 return start, stop |
|
649 end |
|
650 |
|
651 local function process_blockquote(lines) |
|
652 local raw = lines[1].text |
|
653 for i = 2,#lines do |
|
654 raw = raw .. "\n" .. lines[i].text |
|
655 end |
|
656 local bt = block_transform(raw) |
|
657 if not bt:find("<pre>") then bt = indent(bt) end |
|
658 return "<blockquote>\n " .. bt .. |
|
659 "\n</blockquote>" |
|
660 end |
|
661 |
|
662 while true do |
|
663 local start, stop = find_blockquote(lines) |
|
664 if not start then break end |
|
665 local text = process_blockquote(splice(lines, start, stop)) |
|
666 local info = { |
|
667 line = text, |
|
668 type = "raw", |
|
669 html = text |
|
670 } |
|
671 lines = splice(lines, start, stop, {info}) |
|
672 end |
|
673 return lines |
|
674 end |
|
675 |
|
676 -- Find and convert codeblocks. |
|
677 function codeblocks(lines) |
|
678 local function find_codeblock(lines) |
|
679 local start |
|
680 for i,line in ipairs(lines) do |
|
681 if line.type == "indented" then start = i break end |
|
682 end |
|
683 if not start then return nil end |
|
684 |
|
685 local stop = #lines |
|
686 for i = start+1, #lines do |
|
687 if lines[i].type ~= "indented" and lines[i].type ~= "blank" then |
|
688 stop = i-1 |
|
689 break |
|
690 end |
|
691 end |
|
692 while lines[stop].type == "blank" do stop = stop - 1 end |
|
693 return start, stop |
|
694 end |
|
695 |
|
696 local function process_codeblock(lines) |
|
697 local raw = detab(encode_code(outdent(lines[1].line))) |
|
698 for i = 2,#lines do |
|
699 raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line))) |
|
700 end |
|
701 return "<pre><code>" .. raw .. "\n</code></pre>" |
|
702 end |
|
703 |
|
704 while true do |
|
705 local start, stop = find_codeblock(lines) |
|
706 if not start then break end |
|
707 local text = process_codeblock(splice(lines, start, stop)) |
|
708 local info = { |
|
709 line = text, |
|
710 type = "raw", |
|
711 html = text |
|
712 } |
|
713 lines = splice(lines, start, stop, {info}) |
|
714 end |
|
715 return lines |
|
716 end |
|
717 |
|
718 -- Convert lines to html code |
|
719 function blocks_to_html(lines, no_paragraphs) |
|
720 local out = {} |
|
721 local i = 1 |
|
722 while i <= #lines do |
|
723 local line = lines[i] |
|
724 if line.type == "ruler" then |
|
725 table.insert(out, "<hr/>") |
|
726 elseif line.type == "raw" then |
|
727 table.insert(out, line.html) |
|
728 elseif line.type == "normal" then |
|
729 local s = line.line |
|
730 |
|
731 while i+1 <= #lines and lines[i+1].type == "normal" do |
|
732 i = i + 1 |
|
733 s = s .. "\n" .. lines[i].line |
|
734 end |
|
735 |
|
736 if no_paragraphs then |
|
737 table.insert(out, span_transform(s)) |
|
738 else |
|
739 table.insert(out, "<p>" .. span_transform(s) .. "</p>") |
|
740 end |
|
741 elseif line.type == "header" then |
|
742 local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">" |
|
743 table.insert(out, s) |
|
744 else |
|
745 table.insert(out, line.line) |
|
746 end |
|
747 i = i + 1 |
|
748 end |
|
749 return out |
|
750 end |
|
751 |
|
752 -- Perform all the block level transforms |
|
753 function block_transform(text, sublist) |
|
754 local lines = split(text) |
|
755 lines = map(lines, classify) |
|
756 lines = headers(lines) |
|
757 lines = lists(lines, sublist) |
|
758 lines = codeblocks(lines) |
|
759 lines = blockquotes(lines) |
|
760 lines = blocks_to_html(lines) |
|
761 local text = table.concat(lines, "\n") |
|
762 return text |
|
763 end |
|
764 |
|
765 -- Debug function for printing a line array to see the result |
|
766 -- of partial transforms. |
|
767 function print_lines(lines) |
|
768 for i, line in ipairs(lines) do |
|
769 print(i, line.type, line.text or line.line) |
|
770 end |
|
771 end |
|
772 |
|
773 ---------------------------------------------------------------------- |
|
774 -- Span transform |
|
775 ---------------------------------------------------------------------- |
|
776 |
|
777 -- Functions for transforming the text at the span level. |
|
778 |
|
779 -- These characters may need to be escaped because they have a special |
|
780 -- meaning in markdown. |
|
781 escape_chars = "'\\`*_{}[]()>#+-.!'" |
|
782 escape_table = {} |
|
783 |
|
784 function init_escape_table() |
|
785 escape_table = {} |
|
786 for i = 1,#escape_chars do |
|
787 local c = escape_chars:sub(i,i) |
|
788 escape_table[c] = hash(c) |
|
789 end |
|
790 end |
|
791 |
|
792 -- Adds a new escape to the escape table. |
|
793 function add_escape(text) |
|
794 if not escape_table[text] then |
|
795 escape_table[text] = hash(text) |
|
796 end |
|
797 return escape_table[text] |
|
798 end |
|
799 |
|
800 -- Escape characters that should not be disturbed by markdown. |
|
801 function escape_special_chars(text) |
|
802 local tokens = tokenize_html(text) |
|
803 |
|
804 local out = "" |
|
805 for _, token in ipairs(tokens) do |
|
806 local t = token.text |
|
807 if token.type == "tag" then |
|
808 -- In tags, encode * and _ so they don't conflict with their use in markdown. |
|
809 t = t:gsub("%*", escape_table["*"]) |
|
810 t = t:gsub("%_", escape_table["_"]) |
|
811 else |
|
812 t = encode_backslash_escapes(t) |
|
813 end |
|
814 out = out .. t |
|
815 end |
|
816 return out |
|
817 end |
|
818 |
|
819 -- Encode backspace-escaped characters in the markdown source. |
|
820 function encode_backslash_escapes(t) |
|
821 for i=1,escape_chars:len() do |
|
822 local c = escape_chars:sub(i,i) |
|
823 t = t:gsub("\\%" .. c, escape_table[c]) |
|
824 end |
|
825 return t |
|
826 end |
|
827 |
|
828 -- Unescape characters that have been encoded. |
|
829 function unescape_special_chars(t) |
|
830 local tin = t |
|
831 for k,v in pairs(escape_table) do |
|
832 k = k:gsub("%%", "%%%%") |
|
833 t = t:gsub(v,k) |
|
834 end |
|
835 if t ~= tin then t = unescape_special_chars(t) end |
|
836 return t |
|
837 end |
|
838 |
|
839 -- Encode/escape certain characters inside Markdown code runs. |
|
840 -- The point is that in code, these characters are literals, |
|
841 -- and lose their special Markdown meanings. |
|
842 function encode_code(s) |
|
843 s = s:gsub("%&", "&") |
|
844 s = s:gsub("<", "<") |
|
845 s = s:gsub(">", ">") |
|
846 for k,v in pairs(escape_table) do |
|
847 s = s:gsub("%"..k, v) |
|
848 end |
|
849 return s |
|
850 end |
|
851 |
|
852 -- Handle backtick blocks. |
|
853 function code_spans(s) |
|
854 s = s:gsub("\\\\", escape_table["\\"]) |
|
855 s = s:gsub("\\`", escape_table["`"]) |
|
856 |
|
857 local pos = 1 |
|
858 while true do |
|
859 local start, stop = s:find("`+", pos) |
|
860 if not start then return s end |
|
861 local count = stop - start + 1 |
|
862 -- Find a matching numbert of backticks |
|
863 local estart, estop = s:find(string.rep("`", count), stop+1) |
|
864 local brstart = s:find("\n", stop+1) |
|
865 if estart and (not brstart or estart < brstart) then |
|
866 local code = s:sub(stop+1, estart-1) |
|
867 code = code:gsub("^[ \t]+", "") |
|
868 code = code:gsub("[ \t]+$", "") |
|
869 code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"]) |
|
870 code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"]) |
|
871 code = "<code>" .. encode_code(code) .. "</code>" |
|
872 code = add_escape(code) |
|
873 s = s:sub(1, start-1) .. code .. s:sub(estop+1) |
|
874 pos = start + code:len() |
|
875 else |
|
876 pos = stop + 1 |
|
877 end |
|
878 end |
|
879 return s |
|
880 end |
|
881 |
|
882 -- Encode alt text... enodes &, and ". |
|
883 function encode_alt(s) |
|
884 if not s then return s end |
|
885 s = s:gsub('&', '&') |
|
886 s = s:gsub('"', '"') |
|
887 s = s:gsub('<', '<') |
|
888 return s |
|
889 end |
|
890 |
|
891 -- Handle image references |
|
892 function images(text) |
|
893 local function reference_link(alt, id) |
|
894 alt = encode_alt(alt:match("%b[]"):sub(2,-2)) |
|
895 id = id:match("%[(.*)%]"):lower() |
|
896 if id == "" then id = text:lower() end |
|
897 link_database[id] = link_database[id] or {} |
|
898 if not link_database[id].url then return nil end |
|
899 local url = link_database[id].url or id |
|
900 url = encode_alt(url) |
|
901 local title = encode_alt(link_database[id].title) |
|
902 if title then title = " title=\"" .. title .. "\"" else title = "" end |
|
903 return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>") |
|
904 end |
|
905 |
|
906 local function inline_link(alt, link) |
|
907 alt = encode_alt(alt:match("%b[]"):sub(2,-2)) |
|
908 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") |
|
909 url = url or link:match("%(<?(.-)>?%)") |
|
910 url = encode_alt(url) |
|
911 title = encode_alt(title) |
|
912 if title then |
|
913 return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>') |
|
914 else |
|
915 return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>') |
|
916 end |
|
917 end |
|
918 |
|
919 text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) |
|
920 text = text:gsub("!(%b[])(%b())", inline_link) |
|
921 return text |
|
922 end |
|
923 |
|
924 -- Handle anchor references |
|
925 function anchors(text) |
|
926 local function reference_link(text, id) |
|
927 text = text:match("%b[]"):sub(2,-2) |
|
928 id = id:match("%b[]"):sub(2,-2):lower() |
|
929 if id == "" then id = text:lower() end |
|
930 link_database[id] = link_database[id] or {} |
|
931 if not link_database[id].url then return nil end |
|
932 local url = link_database[id].url or id |
|
933 url = encode_alt(url) |
|
934 local title = encode_alt(link_database[id].title) |
|
935 if title then title = " title=\"" .. title .. "\"" else title = "" end |
|
936 return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>") |
|
937 end |
|
938 |
|
939 local function inline_link(text, link) |
|
940 text = text:match("%b[]"):sub(2,-2) |
|
941 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") |
|
942 title = encode_alt(title) |
|
943 url = url or link:match("%(<?(.-)>?%)") or "" |
|
944 url = encode_alt(url) |
|
945 if title then |
|
946 return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>" |
|
947 else |
|
948 return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>") |
|
949 end |
|
950 end |
|
951 |
|
952 text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) |
|
953 text = text:gsub("(%b[])(%b())", inline_link) |
|
954 return text |
|
955 end |
|
956 |
|
957 -- Handle auto links, i.e. <http://www.google.com/>. |
|
958 function auto_links(text) |
|
959 local function link(s) |
|
960 return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>" |
|
961 end |
|
962 -- Encode chars as a mix of dec and hex entitites to (perhaps) fool |
|
963 -- spambots. |
|
964 local function encode_email_address(s) |
|
965 -- Use a deterministic encoding to make unit testing possible. |
|
966 -- Code 45% hex, 45% dec, 10% plain. |
|
967 local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45} |
|
968 local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45} |
|
969 local plain = {code = function(c) return c end, count = 0, rate = 0.1} |
|
970 local codes = {hex, dec, plain} |
|
971 local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end |
|
972 |
|
973 local out = "" |
|
974 for i = 1,s:len() do |
|
975 for _,code in ipairs(codes) do code.count = code.count + code.rate end |
|
976 if codes[1].count < codes[2].count then swap(codes,1,2) end |
|
977 if codes[2].count < codes[3].count then swap(codes,2,3) end |
|
978 if codes[1].count < codes[2].count then swap(codes,1,2) end |
|
979 |
|
980 local code = codes[1] |
|
981 local c = s:sub(i,i) |
|
982 -- Force encoding of "@" to make email address more invisible. |
|
983 if c == "@" and code == plain then code = codes[2] end |
|
984 out = out .. code.code(c) |
|
985 code.count = code.count - 1 |
|
986 end |
|
987 return out |
|
988 end |
|
989 local function mail(s) |
|
990 s = unescape_special_chars(s) |
|
991 local address = encode_email_address("mailto:" .. s) |
|
992 local text = encode_email_address(s) |
|
993 return add_escape("<a href=\"" .. address .. "\">") .. text .. "</a>" |
|
994 end |
|
995 -- links |
|
996 text = text:gsub("<(https?:[^'\">%s]+)>", link) |
|
997 text = text:gsub("<(ftp:[^'\">%s]+)>", link) |
|
998 |
|
999 -- mail |
|
1000 text = text:gsub("<mailto:([^'\">%s]+)>", mail) |
|
1001 text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) |
|
1002 return text |
|
1003 end |
|
1004 |
|
1005 -- Encode free standing amps (&) and angles (<)... note that this does not |
|
1006 -- encode free >. |
|
1007 function amps_and_angles(s) |
|
1008 -- encode amps not part of &..; expression |
|
1009 local pos = 1 |
|
1010 while true do |
|
1011 local amp = s:find("&", pos) |
|
1012 if not amp then break end |
|
1013 local semi = s:find(";", amp+1) |
|
1014 local stop = s:find("[ \t\n&]", amp+1) |
|
1015 if not semi or (stop and stop < semi) or (semi - amp) > 15 then |
|
1016 s = s:sub(1,amp-1) .. "&" .. s:sub(amp+1) |
|
1017 pos = amp+1 |
|
1018 else |
|
1019 pos = amp+1 |
|
1020 end |
|
1021 end |
|
1022 |
|
1023 -- encode naked <'s |
|
1024 s = s:gsub("<([^a-zA-Z/?$!])", "<%1") |
|
1025 s = s:gsub("<$", "<") |
|
1026 |
|
1027 -- what about >, nothing done in the original markdown source to handle them |
|
1028 return s |
|
1029 end |
|
1030 |
|
1031 -- Handles emphasis markers (* and _) in the text. |
|
1032 function emphasis(text) |
|
1033 for _, s in ipairs {"%*%*", "%_%_"} do |
|
1034 text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>") |
|
1035 text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>") |
|
1036 end |
|
1037 for _, s in ipairs {"%*", "%_"} do |
|
1038 text = text:gsub(s .. "([^%s_])" .. s, "<em>%1</em>") |
|
1039 text = text:gsub(s .. "(<strong>[^%s_]</strong>)" .. s, "<em>%1</em>") |
|
1040 text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "<em>%1</em>") |
|
1041 text = text:gsub(s .. "([^<>_]-<strong>[^<>_]-</strong>[^<>_]-)" .. s, "<em>%1</em>") |
|
1042 end |
|
1043 return text |
|
1044 end |
|
1045 |
|
1046 -- Handles line break markers in the text. |
|
1047 function line_breaks(text) |
|
1048 return text:gsub(" +\n", " <br/>\n") |
|
1049 end |
|
1050 |
|
1051 -- Perform all span level transforms. |
|
1052 function span_transform(text) |
|
1053 text = code_spans(text) |
|
1054 text = escape_special_chars(text) |
|
1055 text = images(text) |
|
1056 text = anchors(text) |
|
1057 text = auto_links(text) |
|
1058 text = amps_and_angles(text) |
|
1059 text = emphasis(text) |
|
1060 text = line_breaks(text) |
|
1061 return text |
|
1062 end |
|
1063 |
|
1064 ---------------------------------------------------------------------- |
|
1065 -- Markdown |
|
1066 ---------------------------------------------------------------------- |
|
1067 |
|
1068 -- Cleanup the text by normalizing some possible variations to make further |
|
1069 -- processing easier. |
|
1070 function cleanup(text) |
|
1071 -- Standardize line endings |
|
1072 text = text:gsub("\r\n", "\n") -- DOS to UNIX |
|
1073 text = text:gsub("\r", "\n") -- Mac to UNIX |
|
1074 |
|
1075 -- Convert all tabs to spaces |
|
1076 text = detab(text) |
|
1077 |
|
1078 -- Strip lines with only spaces and tabs |
|
1079 while true do |
|
1080 local subs |
|
1081 text, subs = text:gsub("\n[ \t]+\n", "\n\n") |
|
1082 if subs == 0 then break end |
|
1083 end |
|
1084 |
|
1085 return "\n" .. text .. "\n" |
|
1086 end |
|
1087 |
|
1088 -- Strips link definitions from the text and stores the data in a lookup table. |
|
1089 function strip_link_definitions(text) |
|
1090 local linkdb = {} |
|
1091 |
|
1092 local function link_def(id, url, title) |
|
1093 id = id:match("%[(.+)%]"):lower() |
|
1094 linkdb[id] = linkdb[id] or {} |
|
1095 linkdb[id].url = url or linkdb[id].url |
|
1096 linkdb[id].title = title or linkdb[id].title |
|
1097 return "" |
|
1098 end |
|
1099 |
|
1100 local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*<?([^%s>]+)>?[ \t]*" |
|
1101 local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" |
|
1102 local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" |
|
1103 local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" |
|
1104 |
|
1105 text = text:gsub(def_title1, link_def) |
|
1106 text = text:gsub(def_title2, link_def) |
|
1107 text = text:gsub(def_title3, link_def) |
|
1108 text = text:gsub(def_no_title, link_def) |
|
1109 return text, linkdb |
|
1110 end |
|
1111 |
|
1112 link_database = {} |
|
1113 |
|
1114 -- Main markdown processing function |
|
1115 function markdown(text) |
|
1116 init_hash(text) |
|
1117 init_escape_table() |
|
1118 |
|
1119 text = cleanup(text) |
|
1120 text = protect(text) |
|
1121 text, link_database = strip_link_definitions(text) |
|
1122 text = block_transform(text) |
|
1123 text = unescape_special_chars(text) |
|
1124 return text |
|
1125 end |
|
1126 |
|
1127 ---------------------------------------------------------------------- |
|
1128 -- End of module |
|
1129 ---------------------------------------------------------------------- |
|
1130 |
|
1131 setfenv(1, _G) |
|
1132 M.lock(M) |
|
1133 |
|
1134 -- Expose markdown function to the world |
|
1135 markdown = M.markdown |
|
1136 |
|
1137 -- Class for parsing command-line options |
|
1138 local OptionParser = {} |
|
1139 OptionParser.__index = OptionParser |
|
1140 |
|
1141 -- Creates a new option parser |
|
1142 function OptionParser:new() |
|
1143 local o = {short = {}, long = {}} |
|
1144 setmetatable(o, self) |
|
1145 return o |
|
1146 end |
|
1147 |
|
1148 -- Calls f() whenever a flag with specified short and long name is encountered |
|
1149 function OptionParser:flag(short, long, f) |
|
1150 local info = {type = "flag", f = f} |
|
1151 if short then self.short[short] = info end |
|
1152 if long then self.long[long] = info end |
|
1153 end |
|
1154 |
|
1155 -- Calls f(param) whenever a parameter flag with specified short and long name is encountered |
|
1156 function OptionParser:param(short, long, f) |
|
1157 local info = {type = "param", f = f} |
|
1158 if short then self.short[short] = info end |
|
1159 if long then self.long[long] = info end |
|
1160 end |
|
1161 |
|
1162 -- Calls f(v) for each non-flag argument |
|
1163 function OptionParser:arg(f) |
|
1164 self.arg = f |
|
1165 end |
|
1166 |
|
1167 -- Runs the option parser for the specified set of arguments. Returns true if all arguments |
|
1168 -- where successfully parsed and false otherwise. |
|
1169 function OptionParser:run(args) |
|
1170 local pos = 1 |
|
1171 while pos <= #args do |
|
1172 local arg = args[pos] |
|
1173 if arg == "--" then |
|
1174 for i=pos+1,#args do |
|
1175 if self.arg then self.arg(args[i]) end |
|
1176 return true |
|
1177 end |
|
1178 end |
|
1179 if arg:match("^%-%-") then |
|
1180 local info = self.long[arg:sub(3)] |
|
1181 if not info then print("Unknown flag: " .. arg) return false end |
|
1182 if info.type == "flag" then |
|
1183 info.f() |
|
1184 pos = pos + 1 |
|
1185 else |
|
1186 param = args[pos+1] |
|
1187 if not param then print("No parameter for flag: " .. arg) return false end |
|
1188 info.f(param) |
|
1189 pos = pos+2 |
|
1190 end |
|
1191 elseif arg:match("^%-") then |
|
1192 for i=2,arg:len() do |
|
1193 local c = arg:sub(i,i) |
|
1194 local info = self.short[c] |
|
1195 if not info then print("Unknown flag: -" .. c) return false end |
|
1196 if info.type == "flag" then |
|
1197 info.f() |
|
1198 else |
|
1199 if i == arg:len() then |
|
1200 param = args[pos+1] |
|
1201 if not param then print("No parameter for flag: -" .. c) return false end |
|
1202 info.f(param) |
|
1203 pos = pos + 1 |
|
1204 else |
|
1205 param = arg:sub(i+1) |
|
1206 info.f(param) |
|
1207 end |
|
1208 break |
|
1209 end |
|
1210 end |
|
1211 pos = pos + 1 |
|
1212 else |
|
1213 if self.arg then self.arg(arg) end |
|
1214 pos = pos + 1 |
|
1215 end |
|
1216 end |
|
1217 return true |
|
1218 end |
|
1219 |
|
1220 -- Handles the case when markdown is run from the command line |
|
1221 local function run_command_line(arg) |
|
1222 -- Generate output for input s given options |
|
1223 local function run(s, options) |
|
1224 s = markdown(s) |
|
1225 if not options.wrap_header then return s end |
|
1226 local header = "" |
|
1227 if options.header then |
|
1228 local f = io.open(options.header) or error("Could not open file: " .. options.header) |
|
1229 header = f:read("*a") |
|
1230 f:close() |
|
1231 else |
|
1232 header = [[ |
|
1233 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
|
1234 <html> |
|
1235 <head> |
|
1236 <meta http-equiv="content-type" content="text/html; charset=CHARSET" /> |
|
1237 <title>TITLE</title> |
|
1238 <link rel="stylesheet" type="text/css" href="STYLESHEET" /> |
|
1239 </head> |
|
1240 <body> |
|
1241 ]] |
|
1242 local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or |
|
1243 s:match("<h3>(.-)</h3>") or "Untitled" |
|
1244 header = header:gsub("TITLE", title) |
|
1245 if options.inline_style then |
|
1246 local style = "" |
|
1247 local f = io.open(options.stylesheet) |
|
1248 if f then |
|
1249 style = f:read("*a") f:close() |
|
1250 else |
|
1251 error("Could not include style sheet " .. options.stylesheet .. ": File not found") |
|
1252 end |
|
1253 header = header:gsub('<link rel="stylesheet" type="text/css" href="STYLESHEET" />', |
|
1254 "<style type=\"text/css\"><!--\n" .. style .. "\n--></style>") |
|
1255 else |
|
1256 header = header:gsub("STYLESHEET", options.stylesheet) |
|
1257 end |
|
1258 header = header:gsub("CHARSET", options.charset) |
|
1259 end |
|
1260 local footer = "</body></html>" |
|
1261 if options.footer then |
|
1262 local f = io.open(options.footer) or error("Could not open file: " .. options.footer) |
|
1263 footer = f:read("*a") |
|
1264 f:close() |
|
1265 end |
|
1266 return header .. s .. footer |
|
1267 end |
|
1268 |
|
1269 -- Generate output path name from input path name given options. |
|
1270 local function outpath(path, options) |
|
1271 if options.append then return path .. ".html" end |
|
1272 local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end |
|
1273 m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end |
|
1274 return path .. ".html" |
|
1275 end |
|
1276 |
|
1277 -- Default commandline options |
|
1278 local options = { |
|
1279 wrap_header = true, |
|
1280 header = nil, |
|
1281 footer = nil, |
|
1282 charset = "utf-8", |
|
1283 title = nil, |
|
1284 stylesheet = "default.css", |
|
1285 inline_style = false |
|
1286 } |
|
1287 local help = [[ |
|
1288 Usage: markdown.lua [OPTION] [FILE] |
|
1289 Runs the markdown text markup to HTML converter on each file specified on the |
|
1290 command line. If no files are specified, runs on standard input. |
|
1291 |
|
1292 No header: |
|
1293 -n, --no-wrap Don't wrap the output in <html>... tags. |
|
1294 Custom header: |
|
1295 -e, --header FILE Use content of FILE for header. |
|
1296 -f, --footer FILE Use content of FILE for footer. |
|
1297 Generated header: |
|
1298 -c, --charset SET Specifies charset (default utf-8). |
|
1299 -i, --title TITLE Specifies title (default from first <h1> tag). |
|
1300 -s, --style STYLE Specifies style sheet file (default default.css). |
|
1301 -l, --inline-style Include the style sheet file inline in the header. |
|
1302 Generated files: |
|
1303 -a, --append Append .html extension (instead of replacing). |
|
1304 Other options: |
|
1305 -h, --help Print this help text. |
|
1306 -t, --test Run the unit tests. |
|
1307 ]] |
|
1308 |
|
1309 local run_stdin = true |
|
1310 local op = OptionParser:new() |
|
1311 op:flag("n", "no-wrap", function () options.wrap_header = false end) |
|
1312 op:param("e", "header", function (x) options.header = x end) |
|
1313 op:param("f", "footer", function (x) options.footer = x end) |
|
1314 op:param("c", "charset", function (x) options.charset = x end) |
|
1315 op:param("i", "title", function(x) options.title = x end) |
|
1316 op:param("s", "style", function(x) options.stylesheet = x end) |
|
1317 op:flag("l", "inline-style", function(x) options.inline_style = true end) |
|
1318 op:flag("a", "append", function() options.append = true end) |
|
1319 op:flag("t", "test", function() |
|
1320 local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") |
|
1321 local f = io.open(n) |
|
1322 if f then |
|
1323 f:close() dofile(n) |
|
1324 else |
|
1325 error("Cannot find markdown-tests.lua") |
|
1326 end |
|
1327 run_stdin = false |
|
1328 end) |
|
1329 op:flag("h", "help", function() print(help) run_stdin = false end) |
|
1330 op:arg(function(path) |
|
1331 local file = io.open(path) or error("Could not open file: " .. path) |
|
1332 local s = file:read("*a") |
|
1333 file:close() |
|
1334 s = run(s, options) |
|
1335 file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options)) |
|
1336 file:write(s) |
|
1337 file:close() |
|
1338 run_stdin = false |
|
1339 end |
|
1340 ) |
|
1341 |
|
1342 if not op:run(arg) then |
|
1343 print(help) |
|
1344 run_stdin = false |
|
1345 end |
|
1346 |
|
1347 if run_stdin then |
|
1348 local s = io.read("*a") |
|
1349 s = run(s, options) |
|
1350 io.write(s) |
|
1351 end |
|
1352 end |
|
1353 |
|
1354 -- If we are being run from the command-line, act accordingly |
|
1355 if arg and arg[0]:find("markdown%.lua$") then |
|
1356 run_command_line(arg) |
|
1357 else |
|
1358 return markdown |
|
1359 end |