markdown.lua

changeset 8
836dac92eced
parent 6
219d7a7304f8
equal deleted inserted replaced
7:038275cd92ed 8:836dac92eced
1 #!/usr/bin/env lua 1 #!/usr/bin/env lua
2 2
3 --[[
4 # markdown.lua -- version 0.32
5
6 <http://www.frykholm.se/files/markdown.lua>
7
8 **Author:** Niklas Frykholm, <niklas@frykholm.se>
9 **Date:** 31 May 2008
10
11 This is an implementation of the popular text markup language Markdown in pure Lua.
12 Markdown can convert documents written in a simple and easy to read text format
13 to well-formatted HTML. For a more thourough description of Markdown and the Markdown
14 syntax, see <http://daringfireball.net/projects/markdown>.
15
16 The original Markdown source is written in Perl and makes heavy use of advanced
17 regular expression techniques (such as negative look-ahead, etc) which are not available
18 in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground
19 up. It is probably not completely bug free. If you notice any bugs, please report them to
20 me. A unit test that exposes the error is helpful.
21
22 ## Usage
23
24 require "markdown"
25 markdown(source)
26
27 ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the
28 Markdown transformation to the specified string.
29
30 ``markdown.lua`` can also be used directly from the command line:
31
32 lua markdown.lua test.md
33
34 Creates a file ``test.html`` with the converted content of ``test.md``. Run:
35
36 lua markdown.lua -h
37
38 For a description of the command-line options.
39
40 ``markdown.lua`` uses the same license as Lua, the MIT license.
41
42 ## License
43
44 Copyright &copy; 2008 Niklas Frykholm.
45
46 Permission is hereby granted, free of charge, to any person obtaining a copy of this
47 software and associated documentation files (the "Software"), to deal in the Software
48 without restriction, including without limitation the rights to use, copy, modify, merge,
49 publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
50 to whom the Software is furnished to do so, subject to the following conditions:
51
52 The above copyright notice and this permission notice shall be included in all copies
53 or substantial portions of the Software.
54
55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
56 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
57 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
58 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
59 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
60 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
61 THE SOFTWARE.
62
63 ## Version history
64
65 - **0.32** -- 31 May 2008
66 - Fix for links containing brackets
67 - **0.31** -- 1 Mar 2008
68 - Fix for link definitions followed by spaces
69 - **0.30** -- 25 Feb 2008
70 - Consistent behavior with Markdown when the same link reference is reused
71 - **0.29** -- 24 Feb 2008
72 - Fix for <pre> blocks with spaces in them
73 - **0.28** -- 18 Feb 2008
74 - Fix for link encoding
75 - **0.27** -- 14 Feb 2008
76 - Fix for link database links with ()
77 - **0.26** -- 06 Feb 2008
78 - Fix for nested italic and bold markers
79 - **0.25** -- 24 Jan 2008
80 - Fix for encoding of naked <
81 - **0.24** -- 21 Jan 2008
82 - Fix for link behavior.
83 - **0.23** -- 10 Jan 2008
84 - Fix for a regression bug in longer expressions in italic or bold.
85 - **0.22** -- 27 Dec 2007
86 - Fix for crash when processing blocks with a percent sign in them.
87 - **0.21** -- 27 Dec 2007
88 - Fix for combined strong and emphasis tags
89 - **0.20** -- 13 Oct 2007
90 - Fix for < as well in image titles, now matches Dingus behavior
91 - **0.19** -- 28 Sep 2007
92 - Fix for quotation marks " and ampersands & in link and image titles.
93 - **0.18** -- 28 Jul 2007
94 - Does not crash on unmatched tags (behaves like standard markdown)
95 - **0.17** -- 12 Apr 2007
96 - Fix for links with %20 in them.
97 - **0.16** -- 12 Apr 2007
98 - Do not require arg global to exist.
99 - **0.15** -- 28 Aug 2006
100 - Better handling of links with underscores in them.
101 - **0.14** -- 22 Aug 2006
102 - Bug for *`foo()`*
103 - **0.13** -- 12 Aug 2006
104 - Added -l option for including stylesheet inline in document.
105 - Fixed bug in -s flag.
106 - Fixed emphasis bug.
107 - **0.12** -- 15 May 2006
108 - Fixed several bugs to comply with MarkdownTest 1.0 <http://six.pairlist.net/pipermail/markdown-discuss/2004-December/000909.html>
109 - **0.11** -- 12 May 2006
110 - Fixed bug for escaping `*` and `_` inside code spans.
111 - Added license terms.
112 - Changed join() to table.concat().
113 - **0.10** -- 3 May 2006
114 - Initial public release.
115
116 // Niklas
117 ]]
118
119
120 -- Set up a table for holding local functions to avoid polluting the global namespace
121 local M = {}
122 local MT = {__index = _G}
123 setmetatable(M, MT)
124 setfenv(1, M)
125
126 ---------------------------------------------------------------------- 3 ----------------------------------------------------------------------
127 -- Utility functions 4 -- Utility functions
128 ---------------------------------------------------------------------- 5 ----------------------------------------------------------------------
129 6
130 -- Locks table t from changes, writes an error if someone attempts to change the table. 7 local unpack = table.unpack or unpack
131 -- This is useful for detecting variables that have "accidently" been made global. Something
132 -- I tend to do all too much.
133 function lock(t)
134 function lock_new_index(t, k, v)
135 error("module has been locked -- " .. k .. " must be declared local", 2)
136 end
137
138 local mt = {__newindex = lock_new_index}
139 if getmetatable(t) then mt.__index = getmetatable(t).__index end
140 setmetatable(t, mt)
141 end
142 8
143 -- Returns the result of mapping the values in table t through the function f 9 -- Returns the result of mapping the values in table t through the function f
144 function map(t, f) 10 local function map(t, f)
145 local out = {} 11 local out = {}
146 for k,v in pairs(t) do out[k] = f(v,k) end 12 for k,v in pairs(t) do out[k] = f(v,k) end
147 return out 13 return out
148 end 14 end
149
150 -- The identity function, useful as a placeholder.
151 function identity(text) return text end
152 15
153 -- Functional style if statement. (NOTE: no short circuit evaluation) 16 -- Functional style if statement. (NOTE: no short circuit evaluation)
154 function iff(t, a, b) if t then return a else return b end end 17 local function iff(t, a, b) if t then return a else return b end end
155 18
156 -- Splits the text into an array of separate lines. 19 -- Splits the text into an array of separate lines.
157 function split(text, sep) 20 local function split(text, sep)
158 sep = sep or "\n" 21 sep = sep or "\n"
159 local lines = {} 22 local lines = {}
160 local pos = 1 23 local pos = 1
161 while true do 24 while true do
162 local b,e = text:find(sep, pos) 25 local b,e = text:find(sep, pos)
163 if not b then table.insert(lines, text:sub(pos)) break end 26 if not b then table.insert(lines, text:sub(pos)) break end
164 table.insert(lines, text:sub(pos, b-1)) 27 table.insert(lines, text:sub(pos, b-1))
165 pos = e + 1 28 pos = e + 1
166 end 29 end
167 return lines 30 return lines
168 end 31 end
169 32
170 -- Converts tabs to spaces 33 -- Converts tabs to spaces
171 function detab(text) 34 local function detab(text)
172 local tab_width = 4 35 local tab_width = 4
173 local function rep(match) 36 local function rep(match)
174 local spaces = -match:len() 37 local spaces = -match:len()
175 while spaces<1 do spaces = spaces + tab_width end 38 while spaces<1 do spaces = spaces + tab_width end
176 return match .. string.rep(" ", spaces) 39 return match .. string.rep(" ", spaces)
177 end 40 end
178 text = text:gsub("([^\n]-)\t", rep) 41 text = text:gsub("([^\n]-)\t", rep)
179 return text 42 return text
180 end 43 end
181 44
182 -- Applies string.find for every pattern in the list and returns the first match 45 -- Applies string.find for every pattern in the list and returns the first match
183 function find_first(s, patterns, index) 46 local function find_first(s, patterns, index)
184 local res = {} 47 local res = {}
185 for _,p in ipairs(patterns) do 48 for _,p in ipairs(patterns) do
186 local match = {s:find(p, index)} 49 local match = {s:find(p, index)}
187 if #match>0 and (#res==0 or match[1] < res[1]) then res = match end 50 if #match>0 and (#res==0 or match[1] < res[1]) then res = match end
188 end 51 end
189 return table.unpack(res) 52 return unpack(res)
190 end 53 end
191 54
192 -- If a replacement array is specified, the range [start, stop] in the array is replaced 55 -- If a replacement array is specified, the range [start, stop] in the array is replaced
193 -- with the replacement array and the resulting array is returned. Without a replacement 56 -- with the replacement array and the resulting array is returned. Without a replacement
194 -- array the section of the array between start and stop is returned. 57 -- array the section of the array between start and stop is returned.
195 function splice(array, start, stop, replacement) 58 local function splice(array, start, stop, replacement)
196 if replacement then 59 if replacement then
197 local n = stop - start + 1 60 local n = stop - start + 1
198 while n > 0 do 61 while n > 0 do
199 table.remove(array, start) 62 table.remove(array, start)
200 n = n - 1 63 n = n - 1
201 end 64 end
202 for i,v in ipairs(replacement) do 65 for _,v in ipairs(replacement) do
203 table.insert(array, start, v) 66 table.insert(array, start, v)
204 end 67 end
205 return array 68 return array
206 else 69 else
207 local res = {} 70 local res = {}
208 for i = start,stop do 71 for i = start,stop do
209 table.insert(res, array[i]) 72 table.insert(res, array[i])
210 end 73 end
211 return res 74 return res
212 end 75 end
213 end 76 end
214 77
215 -- Outdents the text one step. 78 -- Outdents the text one step.
216 function outdent(text) 79 local function outdent(text)
217 text = "\n" .. text 80 text = "\n" .. text
218 text = text:gsub("\n ? ? ?", "\n") 81 text = text:gsub("\n ? ? ?", "\n")
219 text = text:sub(2) 82 text = text:sub(2)
220 return text 83 return text
221 end 84 end
222 85
223 -- Indents the text one step. 86 -- Indents the text one step.
224 function indent(text) 87 local function indent(text)
225 text = text:gsub("\n", "\n ") 88 text = text:gsub("\n", "\n ")
226 return text 89 return text
227 end 90 end
228 91
229 -- Does a simple tokenization of html data. Returns the data as a list of tokens. 92 -- Does a simple tokenization of html data. Returns the data as a list of tokens.
230 -- Each token is a table with a type field (which is either "tag" or "text") and 93 -- Each token is a table with a type field (which is either "tag" or "text") and
231 -- a text field (which contains the original token data). 94 -- a text field (which contains the original token data).
232 function tokenize_html(html) 95 local function tokenize_html(html)
233 local tokens = {} 96 local tokens = {}
234 local pos = 1 97 local pos = 1
235 while true do 98 while true do
236 local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos) 99 local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos)
237 if not start then 100 if not start then
238 table.insert(tokens, {type="text", text=html:sub(pos)}) 101 table.insert(tokens, {type="text", text=html:sub(pos)})
239 break 102 break
240 end 103 end
241 if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end 104 if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end
242 105
243 local _, stop 106 local _, stop
244 if html:match("^<!%-%-", start) then 107 if html:match("^<!%-%-", start) then
245 _,stop = html:find("%-%->", start) 108 _,stop = html:find("%-%->", start)
246 elseif html:match("^<%?", start) then 109 elseif html:match("^<%?", start) then
247 _,stop = html:find("?>", start) 110 _,stop = html:find("?>", start)
248 else 111 else
249 _,stop = html:find("%b<>", start) 112 _,stop = html:find("%b<>", start)
250 end 113 end
251 if not stop then 114 if not stop then
252 -- error("Could not match html tag " .. html:sub(start,start+30)) 115 -- error("Could not match html tag " .. html:sub(start,start+30))
253 table.insert(tokens, {type="text", text=html:sub(start, start)}) 116 table.insert(tokens, {type="text", text=html:sub(start, start)})
254 pos = start + 1 117 pos = start + 1
255 else 118 else
256 table.insert(tokens, {type="tag", text=html:sub(start, stop)}) 119 table.insert(tokens, {type="tag", text=html:sub(start, stop)})
257 pos = stop + 1 120 pos = stop + 1
258 end 121 end
259 end 122 end
260 return tokens 123 return tokens
261 end 124 end
262 125
263 ---------------------------------------------------------------------- 126 ----------------------------------------------------------------------
264 -- Hash 127 -- Hash
265 ---------------------------------------------------------------------- 128 ----------------------------------------------------------------------
268 -- in the document. (Note that this is not cryptographic hash, the hash 131 -- in the document. (Note that this is not cryptographic hash, the hash
269 -- function is not one-way.) The hash procedure is used to protect parts 132 -- function is not one-way.) The hash procedure is used to protect parts
270 -- of the document from further processing. 133 -- of the document from further processing.
271 134
272 local HASH = { 135 local HASH = {
273 -- Has the hash been inited. 136 -- Has the hash been inited.
274 inited = false, 137 inited = false,
275 138
276 -- The unique string prepended to all hash values. This is to ensure 139 -- The unique string prepended to all hash values. This is to ensure
277 -- that hash values do not accidently coincide with an actual existing 140 -- that hash values do not accidently coincide with an actual existing
278 -- string in the document. 141 -- string in the document.
279 identifier = "", 142 identifier = "",
280 143
281 -- Counter that counts up for each new hash instance. 144 -- Counter that counts up for each new hash instance.
282 counter = 0, 145 counter = 0,
283 146
284 -- Hash table. 147 -- Hash table.
285 table = {} 148 table = {}
286 } 149 }
287 150
288 -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere 151 -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
289 -- in the text. 152 -- in the text.
290 function init_hash(text) 153 local function init_hash(text)
291 HASH.inited = true 154 HASH.inited = true
292 HASH.identifier = "" 155 HASH.identifier = ""
293 HASH.counter = 0 156 HASH.counter = 0
294 HASH.table = {} 157 HASH.table = {}
295 158
296 local s = "HASH" 159 local s = "HASH"
297 local counter = 0 160 local counter = 0
298 local id 161 local id
299 while true do 162 while true do
300 id = s .. counter 163 id = s .. counter
301 if not text:find(id, 1, true) then break end 164 if not text:find(id, 1, true) then break end
302 counter = counter + 1 165 counter = counter + 1
303 end 166 end
304 HASH.identifier = id 167 HASH.identifier = id
305 end 168 end
306 169
307 -- Returns the hashed value for s. 170 -- Returns the hashed value for s.
308 function hash(s) 171 local function hash(s)
309 assert(HASH.inited) 172 assert(HASH.inited)
310 if not HASH.table[s] then 173 if not HASH.table[s] then
311 HASH.counter = HASH.counter + 1 174 HASH.counter = HASH.counter + 1
312 local id = HASH.identifier .. HASH.counter .. "X" 175 local id = HASH.identifier .. HASH.counter .. "X"
313 HASH.table[s] = id 176 HASH.table[s] = id
314 end 177 end
315 return HASH.table[s] 178 return HASH.table[s]
316 end 179 end
317 180
318 ---------------------------------------------------------------------- 181 ----------------------------------------------------------------------
319 -- Protection 182 -- Protection
320 ---------------------------------------------------------------------- 183 ----------------------------------------------------------------------
321 184
322 -- The protection module is used to "protect" parts of a document 185 -- The protection module is used to "protect" parts of a document
323 -- so that they are not modified by subsequent processing steps. 186 -- so that they are not modified by subsequent processing steps.
324 -- Protected parts are saved in a table for later unprotection 187 -- Protected parts are saved in a table for later unprotection
325 188
326 -- Protection data 189 -- Protection data
327 local PD = { 190 local PD = {
328 -- Saved blocks that have been converted 191 -- Saved blocks that have been converted
329 blocks = {}, 192 blocks = {},
330 193
331 -- Block level tags that will be protected 194 -- Block level tags that will be protected
332 tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", 195 tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote",
333 "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset", 196 "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset",
334 "iframe", "math", "ins", "del"} 197 "iframe", "math", "ins", "del"}
335 } 198 }
336 199
337 -- Pattern for matching a block tag that begins and ends in the leftmost 200 -- Pattern for matching a block tag that begins and ends in the leftmost
338 -- column and may contain indented subtags, i.e. 201 -- column and may contain indented subtags, i.e.
339 -- <div> 202 -- <div>
340 -- A nested block. 203 -- A nested block.
341 -- <div> 204 -- <div>
342 -- Nested data. 205 -- Nested data.
343 -- </div> 206 -- </div>
344 -- </div> 207 -- </div>
345 function block_pattern(tag) 208 local function block_pattern(tag)
346 return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n" 209 return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n"
347 end 210 end
348 211
349 -- Pattern for matching a block tag that begins and ends with a newline 212 -- Pattern for matching a block tag that begins and ends with a newline
350 function line_pattern(tag) 213 local function line_pattern(tag)
351 return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n" 214 return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n"
352 end 215 end
353 216
354 -- Protects the range of characters from start to stop in the text and 217 -- Protects the range of characters from start to stop in the text and
355 -- returns the protected string. 218 -- returns the protected string.
356 function protect_range(text, start, stop) 219 local function protect_range(text, start, stop)
357 local s = text:sub(start, stop) 220 local s = text:sub(start, stop)
358 local h = hash(s) 221 local h = hash(s)
359 PD.blocks[h] = s 222 PD.blocks[h] = s
360 text = text:sub(1,start) .. h .. text:sub(stop) 223 text = text:sub(1,start) .. h .. text:sub(stop)
361 return text 224 return text
362 end 225 end
363 226
364 -- Protect every part of the text that matches any of the patterns. The first 227 -- Protect every part of the text that matches any of the patterns. The first
365 -- matching pattern is protected first, etc. 228 -- matching pattern is protected first, etc.
366 function protect_matches(text, patterns) 229 local function protect_matches(text, patterns)
367 while true do 230 while true do
368 local start, stop = find_first(text, patterns) 231 local start, stop = find_first(text, patterns)
369 if not start then break end 232 if not start then break end
370 text = protect_range(text, start, stop) 233 text = protect_range(text, start, stop)
371 end 234 end
372 return text 235 return text
373 end 236 end
374 237
375 -- Protects blocklevel tags in the specified text 238 -- Protects blocklevel tags in the specified text
376 function protect(text) 239 local function protect(text)
377 -- First protect potentially nested block tags 240 -- First protect potentially nested block tags
378 text = protect_matches(text, map(PD.tags, block_pattern)) 241 text = protect_matches(text, map(PD.tags, block_pattern))
379 -- Then protect block tags at the line level. 242 -- Then protect block tags at the line level.
380 text = protect_matches(text, map(PD.tags, line_pattern)) 243 text = protect_matches(text, map(PD.tags, line_pattern))
381 -- Protect <hr> and comment tags 244 -- Protect <hr> and comment tags
382 text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"}) 245 text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"})
383 text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"}) 246 text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"})
384 return text 247 return text
385 end 248 end
386 249
387 -- Returns true if the string s is a hash resulting from protection 250 -- Returns true if the string s is a hash resulting from protection
388 function is_protected(s) 251 local function is_protected(s)
389 return PD.blocks[s] 252 return PD.blocks[s]
390 end 253 end
391 254
392 -- Unprotects the specified text by expanding all the nonces 255 -- Unprotects the specified text by expanding all the nonces
393 function unprotect(text) 256 local function unprotect(text)
394 for k,v in pairs(PD.blocks) do 257 for k,v in pairs(PD.blocks) do
395 v = v:gsub("%%", "%%%%") 258 v = v:gsub("%%", "%%%%")
396 text = text:gsub(k, v) 259 text = text:gsub(k, v)
397 end 260 end
398 return text 261 return text
399 end 262 end
400 263
401 264
402 ---------------------------------------------------------------------- 265 ----------------------------------------------------------------------
403 -- Block transform 266 -- Block transform
408 -- characters. 271 -- characters.
409 272
410 -- Returns true if the line is a ruler of (char) characters. 273 -- Returns true if the line is a ruler of (char) characters.
411 -- The line must contain at least three char characters and contain only spaces and 274 -- The line must contain at least three char characters and contain only spaces and
412 -- char characters. 275 -- char characters.
413 function is_ruler_of(line, char) 276 local function is_ruler_of(line, char)
414 if not line:match("^[ %" .. char .. "]*$") then return false end 277 if not line:match("^[ %" .. char .. "]*$") then return false end
415 if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end 278 if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end
416 return true 279 return true
417 end 280 end
418 281
419 -- Identifies the block level formatting present in the line 282 -- Identifies the block level formatting present in the line
420 function classify(line) 283 local function classify(line)
421 local info = {line = line, text = line} 284 local info = {line = line, text = line}
422 285
423 if line:match("^ ") then 286 if line:match("^ ") then
424 info.type = "indented" 287 info.type = "indented"
425 info.outdented = line:sub(5) 288 info.outdented = line:sub(5)
426 return info 289 return info
427 end 290 end
428 291
429 for _,c in ipairs({'*', '-', '_', '='}) do 292 for _,c in ipairs({'*', '-', '_', '='}) do
430 if is_ruler_of(line, c) then 293 if is_ruler_of(line, c) then
431 info.type = "ruler" 294 info.type = "ruler"
432 info.ruler_char = c 295 info.ruler_char = c
433 return info 296 return info
434 end 297 end
435 end 298 end
436 299
437 if line == "" then 300 if line == "" then
438 info.type = "blank" 301 info.type = "blank"
439 return info 302 return info
440 end 303 end
441 304
442 if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then 305 if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then
443 local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") 306 local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$")
444 info.type = "header" 307 info.type = "header"
445 info.level = m1:len() 308 info.level = m1:len()
446 info.text = m2 309 info.text = m2
447 return info 310 return info
448 end 311 end
449 312
450 if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then 313 if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then
451 local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") 314 local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)")
452 info.type = "list_item" 315 info.type = "list_item"
453 info.list_type = "numeric" 316 info.list_type = "numeric"
454 info.number = 0 + number 317 info.number = 0 + number
455 info.text = text 318 info.text = text
456 return info 319 return info
457 end 320 end
458 321
459 if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then 322 if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then
460 local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") 323 local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)")
461 info.type = "list_item" 324 info.type = "list_item"
462 info.list_type = "bullet" 325 info.list_type = "bullet"
463 info.bullet = bullet 326 info.bullet = bullet
464 info.text= text 327 info.text= text
465 return info 328 return info
466 end 329 end
467 330
468 if line:match("^>[ \t]?(.*)") then 331 if line:match("^>[ \t]?(.*)") then
469 info.type = "blockquote" 332 info.type = "blockquote"
470 info.text = line:match("^>[ \t]?(.*)") 333 info.text = line:match("^>[ \t]?(.*)")
471 return info 334 return info
472 end 335 end
473 336
474 if is_protected(line) then 337 if is_protected(line) then
475 info.type = "raw" 338 info.type = "raw"
476 info.html = unprotect(line) 339 info.html = unprotect(line)
477 return info 340 return info
478 end 341 end
479 342
480 info.type = "normal" 343 info.type = "normal"
481 return info 344 return info
482 end 345 end
483 346
484 -- Find headers constisting of a normal line followed by a ruler and converts them to 347 -- Find headers constisting of a normal line followed by a ruler and converts them to
485 -- header entries. 348 -- header entries.
486 function headers(array) 349 local function headers(array)
487 local i = 1 350 local i = 1
488 while i <= #array - 1 do 351 while i <= #array - 1 do
489 if array[i].type == "normal" and array[i+1].type == "ruler" and 352 if array[i].type == "normal" and array[i+1].type == "ruler" and
490 (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then 353 (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then
491 local info = {line = array[i].line} 354 local info = {line = array[i].line}
492 info.text = info.line 355 info.text = info.line
493 info.type = "header" 356 info.type = "header"
494 info.level = iff(array[i+1].ruler_char == "=", 1, 2) 357 info.level = iff(array[i+1].ruler_char == "=", 1, 2)
495 table.remove(array, i+1) 358 table.remove(array, i+1)
496 array[i] = info 359 array[i] = info
497 end 360 end
498 i = i + 1 361 i = i + 1
499 end 362 end
500 return array 363 return array
364 end
365
366 -- Forward declarations
367 local block_transform, span_transform, encode_code
368
369 -- Convert lines to html code
370 local function blocks_to_html(lines, no_paragraphs)
371 local out = {}
372 local i = 1
373 while i <= #lines do
374 local line = lines[i]
375 if line.type == "ruler" then
376 table.insert(out, "<hr/>")
377 elseif line.type == "raw" then
378 table.insert(out, line.html)
379 elseif line.type == "normal" then
380 local s = line.line
381
382 while i+1 <= #lines and lines[i+1].type == "normal" do
383 i = i + 1
384 s = s .. "\n" .. lines[i].line
385 end
386
387 if no_paragraphs then
388 table.insert(out, span_transform(s))
389 else
390 table.insert(out, "<p>" .. span_transform(s) .. "</p>")
391 end
392 elseif line.type == "header" then
393 local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">"
394 table.insert(out, s)
395 else
396 table.insert(out, line.line)
397 end
398 i = i + 1
399 end
400 return out
501 end 401 end
502 402
503 -- Find list blocks and convert them to protected data blocks 403 -- Find list blocks and convert them to protected data blocks
504 function lists(array, sublist) 404 local function lists(array, sublist)
505 local function process_list(arr) 405 local function process_list(arr)
506 local function any_blanks(arr) 406 local function any_blanks(arr)
507 for i = 1, #arr do 407 for i = 1, #arr do
508 if arr[i].type == "blank" then return true end 408 if arr[i].type == "blank" then return true end
509 end 409 end
510 return false 410 return false
511 end 411 end
512 412
513 local function split_list_items(arr) 413 local function split_list_items(arr)
514 local acc = {arr[1]} 414 local acc = {arr[1]}
515 local res = {} 415 local res = {}
516 for i=2,#arr do 416 for i=2,#arr do
517 if arr[i].type == "list_item" then 417 if arr[i].type == "list_item" then
518 table.insert(res, acc) 418 table.insert(res, acc)
519 acc = {arr[i]} 419 acc = {arr[i]}
520 else 420 else
521 table.insert(acc, arr[i]) 421 table.insert(acc, arr[i])
522 end 422 end
523 end 423 end
524 table.insert(res, acc) 424 table.insert(res, acc)
525 return res 425 return res
526 end 426 end
527 427
528 local function process_list_item(lines, block) 428 local function process_list_item(lines, block)
529 while lines[#lines].type == "blank" do 429 while lines[#lines].type == "blank" do
530 table.remove(lines) 430 table.remove(lines)
531 end 431 end
532 432
533 local itemtext = lines[1].text 433 local itemtext = lines[1].text
534 for i=2,#lines do 434 for i=2,#lines do
535 itemtext = itemtext .. "\n" .. outdent(lines[i].line) 435 itemtext = itemtext .. "\n" .. outdent(lines[i].line)
536 end 436 end
537 if block then 437 if block then
538 itemtext = block_transform(itemtext, true) 438 itemtext = block_transform(itemtext, true)
539 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end 439 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end
540 return " <li>" .. itemtext .. "</li>" 440 return " <li>" .. itemtext .. "</li>"
541 else 441 else
542 local lines = split(itemtext) 442 local lines = split(itemtext)
543 lines = map(lines, classify) 443 lines = map(lines, classify)
544 lines = lists(lines, true) 444 lines = lists(lines, true)
545 lines = blocks_to_html(lines, true) 445 lines = blocks_to_html(lines, true)
546 itemtext = table.concat(lines, "\n") 446 itemtext = table.concat(lines, "\n")
547 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end 447 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end
548 return " <li>" .. itemtext .. "</li>" 448 return " <li>" .. itemtext .. "</li>"
549 end 449 end
550 end 450 end
551 451
552 local block_list = any_blanks(arr) 452 local block_list = any_blanks(arr)
553 local items = split_list_items(arr) 453 local items = split_list_items(arr)
554 local out = "" 454 local out = ""
555 for _, item in ipairs(items) do 455 for _, item in ipairs(items) do
556 out = out .. process_list_item(item, block_list) .. "\n" 456 out = out .. process_list_item(item, block_list) .. "\n"
557 end 457 end
558 if arr[1].list_type == "numeric" then 458 if arr[1].list_type == "numeric" then
559 return "<ol>\n" .. out .. "</ol>" 459 return "<ol>\n" .. out .. "</ol>"
560 else 460 else
561 return "<ul>\n" .. out .. "</ul>" 461 return "<ul>\n" .. out .. "</ul>"
562 end 462 end
563 end 463 end
564 464
565 -- Finds the range of lines composing the first list in the array. A list 465 -- Finds the range of lines composing the first list in the array. A list
566 -- starts with (^ list_item) or (blank list_item) and ends with 466 -- starts with (^ list_item) or (blank list_item) and ends with
567 -- (blank* $) or (blank normal). 467 -- (blank* $) or (blank normal).
568 -- 468 --
569 -- A sublist can start with just (list_item) does not need a blank... 469 -- A sublist can start with just (list_item) does not need a blank...
570 local function find_list(array, sublist) 470 local function find_list(array, sublist)
571 local function find_list_start(array, sublist) 471 local function find_list_start(array, sublist)
572 if array[1].type == "list_item" then return 1 end 472 if array[1].type == "list_item" then return 1 end
573 if sublist then 473 if sublist then
574 for i = 1,#array do 474 for i = 1,#array do
575 if array[i].type == "list_item" then return i end 475 if array[i].type == "list_item" then return i end
576 end 476 end
577 else 477 else
578 for i = 1, #array-1 do 478 for i = 1, #array-1 do
579 if array[i].type == "blank" and array[i+1].type == "list_item" then 479 if array[i].type == "blank" and array[i+1].type == "list_item" then
580 return i+1 480 return i+1
581 end 481 end
582 end 482 end
583 end 483 end
584 return nil 484 return nil
585 end 485 end
586 local function find_list_end(array, start) 486 local function find_list_end(array, start)
587 local pos = #array 487 local pos = #array
588 for i = start, #array-1 do 488 for i = start, #array-1 do
589 if array[i].type == "blank" and array[i+1].type ~= "list_item" 489 if array[i].type == "blank" and array[i+1].type ~= "list_item"
590 and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then 490 and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then
591 pos = i-1 491 pos = i-1
592 break 492 break
593 end 493 end
594 end 494 end
595 while pos > start and array[pos].type == "blank" do 495 while pos > start and array[pos].type == "blank" do
596 pos = pos - 1 496 pos = pos - 1
597 end 497 end
598 return pos 498 return pos
599 end 499 end
600 500
601 local start = find_list_start(array, sublist) 501 local start = find_list_start(array, sublist)
602 if not start then return nil end 502 if not start then return nil end
603 return start, find_list_end(array, start) 503 return start, find_list_end(array, start)
604 end 504 end
605 505
606 while true do 506 while true do
607 local start, stop = find_list(array, sublist) 507 local start, stop = find_list(array, sublist)
608 if not start then break end 508 if not start then break end
609 local text = process_list(splice(array, start, stop)) 509 local text = process_list(splice(array, start, stop))
610 local info = { 510 local info = {
611 line = text, 511 line = text,
612 type = "raw", 512 type = "raw",
613 html = text 513 html = text
614 } 514 }
615 array = splice(array, start, stop, {info}) 515 array = splice(array, start, stop, {info})
616 end 516 end
617 517
618 -- Convert any remaining list items to normal 518 -- Convert any remaining list items to normal
619 for _,line in ipairs(array) do 519 for _,line in ipairs(array) do
620 if line.type == "list_item" then line.type = "normal" end 520 if line.type == "list_item" then line.type = "normal" end
621 end 521 end
622 522
623 return array 523 return array
624 end 524 end
625 525
626 -- Find and convert blockquote markers. 526 -- Find and convert blockquote markers.
627 function blockquotes(lines) 527 local function blockquotes(lines)
628 local function find_blockquote(lines) 528 local function find_blockquote(lines)
629 local start 529 local start
630 for i,line in ipairs(lines) do 530 for i,line in ipairs(lines) do
631 if line.type == "blockquote" then 531 if line.type == "blockquote" then
632 start = i 532 start = i
633 break 533 break
634 end 534 end
635 end 535 end
636 if not start then return nil end 536 if not start then return nil end
637 537
638 local stop = #lines 538 local stop = #lines
639 for i = start+1, #lines do 539 for i = start+1, #lines do
640 if lines[i].type == "blank" or lines[i].type == "blockquote" then 540 if lines[i].type == "blank" or lines[i].type == "blockquote" then
641 elseif lines[i].type == "normal" then 541 elseif lines[i].type == "normal" then
642 if lines[i-1].type == "blank" then stop = i-1 break end 542 if lines[i-1].type == "blank" then stop = i-1 break end
643 else 543 else
644 stop = i-1 break 544 stop = i-1 break
645 end 545 end
646 end 546 end
647 while lines[stop].type == "blank" do stop = stop - 1 end 547 while lines[stop].type == "blank" do stop = stop - 1 end
648 return start, stop 548 return start, stop
649 end 549 end
650 550
651 local function process_blockquote(lines) 551 local function process_blockquote(lines)
652 local raw = lines[1].text 552 local raw = lines[1].text
653 for i = 2,#lines do 553 for i = 2,#lines do
654 raw = raw .. "\n" .. lines[i].text 554 raw = raw .. "\n" .. lines[i].text
655 end 555 end
656 local bt = block_transform(raw) 556 local bt = block_transform(raw)
657 if not bt:find("<pre>") then bt = indent(bt) end 557 if not bt:find("<pre>") then bt = indent(bt) end
658 return "<blockquote>\n " .. bt .. 558 return "<blockquote>\n " .. bt ..
659 "\n</blockquote>" 559 "\n</blockquote>"
660 end 560 end
661 561
662 while true do 562 while true do
663 local start, stop = find_blockquote(lines) 563 local start, stop = find_blockquote(lines)
664 if not start then break end 564 if not start then break end
665 local text = process_blockquote(splice(lines, start, stop)) 565 local text = process_blockquote(splice(lines, start, stop))
666 local info = { 566 local info = {
667 line = text, 567 line = text,
668 type = "raw", 568 type = "raw",
669 html = text 569 html = text
670 } 570 }
671 lines = splice(lines, start, stop, {info}) 571 lines = splice(lines, start, stop, {info})
672 end 572 end
673 return lines 573 return lines
674 end 574 end
675 575
676 -- Find and convert codeblocks. 576 -- Find and convert codeblocks.
677 function codeblocks(lines) 577 local function codeblocks(lines)
678 local function find_codeblock(lines) 578 local function find_codeblock(lines)
679 local start 579 local start
680 for i,line in ipairs(lines) do 580 for i,line in ipairs(lines) do
681 if line.type == "indented" then start = i break end 581 if line.type == "indented" then start = i break end
682 end 582 end
683 if not start then return nil end 583 if not start then return nil end
684 584
685 local stop = #lines 585 local stop = #lines
686 for i = start+1, #lines do 586 for i = start+1, #lines do
687 if lines[i].type ~= "indented" and lines[i].type ~= "blank" then 587 if lines[i].type ~= "indented" and lines[i].type ~= "blank" then
688 stop = i-1 588 stop = i-1
689 break 589 break
690 end 590 end
691 end 591 end
692 while lines[stop].type == "blank" do stop = stop - 1 end 592 while lines[stop].type == "blank" do stop = stop - 1 end
693 return start, stop 593 return start, stop
694 end 594 end
695 595
696 local function process_codeblock(lines) 596 local function process_codeblock(lines)
697 local raw = detab(encode_code(outdent(lines[1].line))) 597 local raw = detab(encode_code(outdent(lines[1].line)))
698 for i = 2,#lines do 598 for i = 2,#lines do
699 raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line))) 599 raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line)))
700 end 600 end
701 return "<pre><code>" .. raw .. "\n</code></pre>" 601 return "<pre><code>" .. raw .. "\n</code></pre>"
702 end 602 end
703 603
704 while true do 604 while true do
705 local start, stop = find_codeblock(lines) 605 local start, stop = find_codeblock(lines)
706 if not start then break end 606 if not start then break end
707 local text = process_codeblock(splice(lines, start, stop)) 607 local text = process_codeblock(splice(lines, start, stop))
708 local info = { 608 local info = {
709 line = text, 609 line = text,
710 type = "raw", 610 type = "raw",
711 html = text 611 html = text
712 } 612 }
713 lines = splice(lines, start, stop, {info}) 613 lines = splice(lines, start, stop, {info})
714 end 614 end
715 return lines 615 return lines
716 end
717
718 -- Convert lines to html code
719 function blocks_to_html(lines, no_paragraphs)
720 local out = {}
721 local i = 1
722 while i <= #lines do
723 local line = lines[i]
724 if line.type == "ruler" then
725 table.insert(out, "<hr/>")
726 elseif line.type == "raw" then
727 table.insert(out, line.html)
728 elseif line.type == "normal" then
729 local s = line.line
730
731 while i+1 <= #lines and lines[i+1].type == "normal" do
732 i = i + 1
733 s = s .. "\n" .. lines[i].line
734 end
735
736 if no_paragraphs then
737 table.insert(out, span_transform(s))
738 else
739 table.insert(out, "<p>" .. span_transform(s) .. "</p>")
740 end
741 elseif line.type == "header" then
742 local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">"
743 table.insert(out, s)
744 else
745 table.insert(out, line.line)
746 end
747 i = i + 1
748 end
749 return out
750 end 616 end
751 617
752 -- Perform all the block level transforms 618 -- Perform all the block level transforms
753 function block_transform(text, sublist) 619 function block_transform(text, sublist)
754 local lines = split(text) 620 local lines = split(text)
755 lines = map(lines, classify) 621 lines = map(lines, classify)
756 lines = headers(lines) 622 lines = headers(lines)
757 lines = lists(lines, sublist) 623 lines = lists(lines, sublist)
758 lines = codeblocks(lines) 624 lines = codeblocks(lines)
759 lines = blockquotes(lines) 625 lines = blockquotes(lines)
760 lines = blocks_to_html(lines) 626 lines = blocks_to_html(lines)
761 local text = table.concat(lines, "\n") 627 local text = table.concat(lines, "\n")
762 return text 628 return text
763 end
764
765 -- Debug function for printing a line array to see the result
766 -- of partial transforms.
767 function print_lines(lines)
768 for i, line in ipairs(lines) do
769 print(i, line.type, line.text or line.line)
770 end
771 end 629 end
772 630
773 ---------------------------------------------------------------------- 631 ----------------------------------------------------------------------
774 -- Span transform 632 -- Span transform
775 ---------------------------------------------------------------------- 633 ----------------------------------------------------------------------
776 634
777 -- Functions for transforming the text at the span level. 635 -- Functions for transforming the text at the span level.
778 636
779 -- These characters may need to be escaped because they have a special 637 -- These characters may need to be escaped because they have a special
780 -- meaning in markdown. 638 -- meaning in markdown.
781 escape_chars = "'\\`*_{}[]()>#+-.!'" 639 local escape_chars = "'\\`*_{}[]()>#+-.!'"
782 escape_table = {} 640 local escape_table = {}
783 641
784 function init_escape_table() 642 local function init_escape_table()
785 escape_table = {} 643 escape_table = {}
786 for i = 1,#escape_chars do 644 for i = 1,#escape_chars do
787 local c = escape_chars:sub(i,i) 645 local c = escape_chars:sub(i,i)
788 escape_table[c] = hash(c) 646 escape_table[c] = hash(c)
789 end 647 end
790 end 648 end
791 649
792 -- Adds a new escape to the escape table. 650 -- Adds a new escape to the escape table.
793 function add_escape(text) 651 local function add_escape(text)
794 if not escape_table[text] then 652 if not escape_table[text] then
795 escape_table[text] = hash(text) 653 escape_table[text] = hash(text)
796 end 654 end
797 return escape_table[text] 655 return escape_table[text]
798 end 656 end
657
658 -- Encode backspace-escaped characters in the markdown source.
659 local function encode_backslash_escapes(t)
660 for i=1,escape_chars:len() do
661 local c = escape_chars:sub(i,i)
662 t = t:gsub("\\%" .. c, escape_table[c])
663 end
664 return t
665 end
799 666
800 -- Escape characters that should not be disturbed by markdown. 667 -- Escape characters that should not be disturbed by markdown.
801 function escape_special_chars(text) 668 local function escape_special_chars(text)
802 local tokens = tokenize_html(text) 669 local tokens = tokenize_html(text)
803 670
804 local out = "" 671 local out = ""
805 for _, token in ipairs(tokens) do 672 for _, token in ipairs(tokens) do
806 local t = token.text 673 local t = token.text
807 if token.type == "tag" then 674 if token.type == "tag" then
808 -- In tags, encode * and _ so they don't conflict with their use in markdown. 675 -- In tags, encode * and _ so they don't conflict with their use in markdown.
809 t = t:gsub("%*", escape_table["*"]) 676 t = t:gsub("%*", escape_table["*"])
810 t = t:gsub("%_", escape_table["_"]) 677 t = t:gsub("%_", escape_table["_"])
811 else 678 else
812 t = encode_backslash_escapes(t) 679 t = encode_backslash_escapes(t)
813 end 680 end
814 out = out .. t 681 out = out .. t
815 end 682 end
816 return out 683 return out
817 end
818
819 -- Encode backspace-escaped characters in the markdown source.
820 function encode_backslash_escapes(t)
821 for i=1,escape_chars:len() do
822 local c = escape_chars:sub(i,i)
823 t = t:gsub("\\%" .. c, escape_table[c])
824 end
825 return t
826 end 684 end
827 685
828 -- Unescape characters that have been encoded. 686 -- Unescape characters that have been encoded.
829 function unescape_special_chars(t) 687 local function unescape_special_chars(t)
830 local tin = t 688 local tin = t
831 for k,v in pairs(escape_table) do 689 for k,v in pairs(escape_table) do
832 k = k:gsub("%%", "%%%%") 690 k = k:gsub("%%", "%%%%")
833 t = t:gsub(v,k) 691 t = t:gsub(v,k)
834 end 692 end
835 if t ~= tin then t = unescape_special_chars(t) end 693 if t ~= tin then t = unescape_special_chars(t) end
836 return t 694 return t
837 end 695 end
838 696
839 -- Encode/escape certain characters inside Markdown code runs. 697 -- Encode/escape certain characters inside Markdown code runs.
840 -- The point is that in code, these characters are literals, 698 -- The point is that in code, these characters are literals,
841 -- and lose their special Markdown meanings. 699 -- and lose their special Markdown meanings.
842 function encode_code(s) 700 function encode_code(s)
843 s = s:gsub("%&", "&amp;") 701 s = s:gsub("%&", "&amp;")
844 s = s:gsub("<", "&lt;") 702 s = s:gsub("<", "&lt;")
845 s = s:gsub(">", "&gt;") 703 s = s:gsub(">", "&gt;")
846 for k,v in pairs(escape_table) do 704 for k,v in pairs(escape_table) do
847 s = s:gsub("%"..k, v) 705 s = s:gsub("%"..k, v)
848 end 706 end
849 return s 707 return s
850 end 708 end
851 709
852 -- Handle backtick blocks. 710 -- Handle backtick blocks.
853 function code_spans(s) 711 local function code_spans(s)
854 s = s:gsub("\\\\", escape_table["\\"]) 712 s = s:gsub("\\\\", escape_table["\\"])
855 s = s:gsub("\\`", escape_table["`"]) 713 s = s:gsub("\\`", escape_table["`"])
856 714
857 local pos = 1 715 local pos = 1
858 while true do 716 while true do
859 local start, stop = s:find("`+", pos) 717 local start, stop = s:find("`+", pos)
860 if not start then return s end 718 if not start then return s end
861 local count = stop - start + 1 719 local count = stop - start + 1
862 -- Find a matching numbert of backticks 720 -- Find a matching numbert of backticks
863 local estart, estop = s:find(string.rep("`", count), stop+1) 721 local estart, estop = s:find(string.rep("`", count), stop+1)
864 local brstart = s:find("\n", stop+1) 722 local brstart = s:find("\n", stop+1)
865 if estart and (not brstart or estart < brstart) then 723 if estart and (not brstart or estart < brstart) then
866 local code = s:sub(stop+1, estart-1) 724 local code = s:sub(stop+1, estart-1)
867 code = code:gsub("^[ \t]+", "") 725 code = code:gsub("^[ \t]+", "")
868 code = code:gsub("[ \t]+$", "") 726 code = code:gsub("[ \t]+$", "")
869 code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"]) 727 code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"])
870 code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"]) 728 code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"])
871 code = "<code>" .. encode_code(code) .. "</code>" 729 code = "<code>" .. encode_code(code) .. "</code>"
872 code = add_escape(code) 730 code = add_escape(code)
873 s = s:sub(1, start-1) .. code .. s:sub(estop+1) 731 s = s:sub(1, start-1) .. code .. s:sub(estop+1)
874 pos = start + code:len() 732 pos = start + code:len()
875 else 733 else
876 pos = stop + 1 734 pos = stop + 1
877 end 735 end
878 end 736 end
879 return s 737 return s
880 end 738 end
881 739
882 -- Encode alt text... enodes &, and ". 740 -- Encode alt text... enodes &, and ".
883 function encode_alt(s) 741 local function encode_alt(s)
884 if not s then return s end 742 if not s then return s end
885 s = s:gsub('&', '&amp;') 743 s = s:gsub('&', '&amp;')
886 s = s:gsub('"', '&quot;') 744 s = s:gsub('"', '&quot;')
887 s = s:gsub('<', '&lt;') 745 s = s:gsub('<', '&lt;')
888 return s 746 return s
889 end 747 end
748
749 -- Forward declaration for link_db as returned by strip_link_definitions.
750 local link_database
890 751
891 -- Handle image references 752 -- Handle image references
892 function images(text) 753 local function images(text)
893 local function reference_link(alt, id) 754 local function reference_link(alt, id)
894 alt = encode_alt(alt:match("%b[]"):sub(2,-2)) 755 alt = encode_alt(alt:match("%b[]"):sub(2,-2))
895 id = id:match("%[(.*)%]"):lower() 756 id = id:match("%[(.*)%]"):lower()
896 if id == "" then id = text:lower() end 757 if id == "" then id = text:lower() end
897 link_database[id] = link_database[id] or {} 758 link_database[id] = link_database[id] or {}
898 if not link_database[id].url then return nil end 759 if not link_database[id].url then return nil end
899 local url = link_database[id].url or id 760 local url = link_database[id].url or id
900 url = encode_alt(url) 761 url = encode_alt(url)
901 local title = encode_alt(link_database[id].title) 762 local title = encode_alt(link_database[id].title)
902 if title then title = " title=\"" .. title .. "\"" else title = "" end 763 if title then title = " title=\"" .. title .. "\"" else title = "" end
903 return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>") 764 return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>")
904 end 765 end
905 766
906 local function inline_link(alt, link) 767 local function inline_link(alt, link)
907 alt = encode_alt(alt:match("%b[]"):sub(2,-2)) 768 alt = encode_alt(alt:match("%b[]"):sub(2,-2))
908 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") 769 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
909 url = url or link:match("%(<?(.-)>?%)") 770 url = url or link:match("%(<?(.-)>?%)")
910 url = encode_alt(url) 771 url = encode_alt(url)
911 title = encode_alt(title) 772 title = encode_alt(title)
912 if title then 773 if title then
913 return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>') 774 return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>')
914 else 775 else
915 return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>') 776 return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>')
916 end 777 end
917 end 778 end
918 779
919 text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) 780 text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
920 text = text:gsub("!(%b[])(%b())", inline_link) 781 text = text:gsub("!(%b[])(%b())", inline_link)
921 return text 782 return text
922 end 783 end
923 784
924 -- Handle anchor references 785 -- Handle anchor references
925 function anchors(text) 786 local function anchors(text)
926 local function reference_link(text, id) 787 local function reference_link(text, id)
927 text = text:match("%b[]"):sub(2,-2) 788 text = text:match("%b[]"):sub(2,-2)
928 id = id:match("%b[]"):sub(2,-2):lower() 789 id = id:match("%b[]"):sub(2,-2):lower()
929 if id == "" then id = text:lower() end 790 if id == "" then id = text:lower() end
930 link_database[id] = link_database[id] or {} 791 link_database[id] = link_database[id] or {}
931 if not link_database[id].url then return nil end 792 if not link_database[id].url then return nil end
932 local url = link_database[id].url or id 793 local url = link_database[id].url or id
933 url = encode_alt(url) 794 url = encode_alt(url)
934 local title = encode_alt(link_database[id].title) 795 local title = encode_alt(link_database[id].title)
935 if title then title = " title=\"" .. title .. "\"" else title = "" end 796 if title then title = " title=\"" .. title .. "\"" else title = "" end
936 return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>") 797 return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>")
937 end 798 end
938 799
939 local function inline_link(text, link) 800 local function inline_link(text, link)
940 text = text:match("%b[]"):sub(2,-2) 801 text = text:match("%b[]"):sub(2,-2)
941 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") 802 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
942 title = encode_alt(title) 803 title = encode_alt(title)
943 url = url or link:match("%(<?(.-)>?%)") or "" 804 url = url or link:match("%(<?(.-)>?%)") or ""
944 url = encode_alt(url) 805 url = encode_alt(url)
945 if title then 806 if title then
946 return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>" 807 return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>"
947 else 808 else
948 return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>") 809 return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>")
949 end 810 end
950 end 811 end
951 812
952 text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) 813 text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
953 text = text:gsub("(%b[])(%b())", inline_link) 814 text = text:gsub("(%b[])(%b())", inline_link)
954 return text 815 return text
955 end 816 end
956 817
957 -- Handle auto links, i.e. <http://www.google.com/>. 818 -- Handle auto links, i.e. <http://www.google.com/>.
958 function auto_links(text) 819 local function auto_links(text)
959 local function link(s) 820 local function link(s)
960 return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>" 821 return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>"
961 end 822 end
962 -- Encode chars as a mix of dec and hex entitites to (perhaps) fool 823 -- Encode chars as a mix of dec and hex entitites to (perhaps) fool
963 -- spambots. 824 -- spambots.
964 local function encode_email_address(s) 825 local function encode_email_address(s)
965 -- Use a deterministic encoding to make unit testing possible. 826 -- Use a deterministic encoding to make unit testing possible.
966 -- Code 45% hex, 45% dec, 10% plain. 827 -- Code 45% hex, 45% dec, 10% plain.
967 local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45} 828 local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45}
968 local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45} 829 local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45}
969 local plain = {code = function(c) return c end, count = 0, rate = 0.1} 830 local plain = {code = function(c) return c end, count = 0, rate = 0.1}
970 local codes = {hex, dec, plain} 831 local codes = {hex, dec, plain}
971 local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end 832 local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end
972 833
973 local out = "" 834 local out = ""
974 for i = 1,s:len() do 835 for i = 1,s:len() do
975 for _,code in ipairs(codes) do code.count = code.count + code.rate end 836 for _,code in ipairs(codes) do code.count = code.count + code.rate end
976 if codes[1].count < codes[2].count then swap(codes,1,2) end 837 if codes[1].count < codes[2].count then swap(codes,1,2) end
977 if codes[2].count < codes[3].count then swap(codes,2,3) end 838 if codes[2].count < codes[3].count then swap(codes,2,3) end
978 if codes[1].count < codes[2].count then swap(codes,1,2) end 839 if codes[1].count < codes[2].count then swap(codes,1,2) end
979 840
980 local code = codes[1] 841 local code = codes[1]
981 local c = s:sub(i,i) 842 local c = s:sub(i,i)
982 -- Force encoding of "@" to make email address more invisible. 843 -- Force encoding of "@" to make email address more invisible.
983 if c == "@" and code == plain then code = codes[2] end 844 if c == "@" and code == plain then code = codes[2] end
984 out = out .. code.code(c) 845 out = out .. code.code(c)
985 code.count = code.count - 1 846 code.count = code.count - 1
986 end 847 end
987 return out 848 return out
988 end 849 end
989 local function mail(s) 850 local function mail(s)
990 s = unescape_special_chars(s) 851 s = unescape_special_chars(s)
991 local address = encode_email_address("mailto:" .. s) 852 local address = encode_email_address("mailto:" .. s)
992 local text = encode_email_address(s) 853 local text = encode_email_address(s)
993 return add_escape("<a href=\"" .. address .. "\">") .. text .. "</a>" 854 return add_escape("<a href=\"" .. address .. "\">") .. text .. "</a>"
994 end 855 end
995 -- links 856 -- links
996 text = text:gsub("<(https?:[^'\">%s]+)>", link) 857 text = text:gsub("<(https?:[^'\">%s]+)>", link)
997 text = text:gsub("<(ftp:[^'\">%s]+)>", link) 858 text = text:gsub("<(ftp:[^'\">%s]+)>", link)
998 859
999 -- mail 860 -- mail
1000 text = text:gsub("<mailto:([^'\">%s]+)>", mail) 861 text = text:gsub("<mailto:([^'\">%s]+)>", mail)
1001 text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) 862 text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail)
1002 return text 863 return text
1003 end 864 end
1004 865
1005 -- Encode free standing amps (&) and angles (<)... note that this does not 866 -- Encode free standing amps (&) and angles (<)... note that this does not
1006 -- encode free >. 867 -- encode free >.
1007 function amps_and_angles(s) 868 local function amps_and_angles(s)
1008 -- encode amps not part of &..; expression 869 -- encode amps not part of &..; expression
1009 local pos = 1 870 local pos = 1
1010 while true do 871 while true do
1011 local amp = s:find("&", pos) 872 local amp = s:find("&", pos)
1012 if not amp then break end 873 if not amp then break end
1013 local semi = s:find(";", amp+1) 874 local semi = s:find(";", amp+1)
1014 local stop = s:find("[ \t\n&]", amp+1) 875 local stop = s:find("[ \t\n&]", amp+1)
1015 if not semi or (stop and stop < semi) or (semi - amp) > 15 then 876 if not semi or (stop and stop < semi) or (semi - amp) > 15 then
1016 s = s:sub(1,amp-1) .. "&amp;" .. s:sub(amp+1) 877 s = s:sub(1,amp-1) .. "&amp;" .. s:sub(amp+1)
1017 pos = amp+1 878 pos = amp+1
1018 else 879 else
1019 pos = amp+1 880 pos = amp+1
1020 end 881 end
1021 end 882 end
1022 883
1023 -- encode naked <'s 884 -- encode naked <'s
1024 s = s:gsub("<([^a-zA-Z/?$!])", "&lt;%1") 885 s = s:gsub("<([^a-zA-Z/?$!])", "&lt;%1")
1025 s = s:gsub("<$", "&lt;") 886 s = s:gsub("<$", "&lt;")
1026 887
1027 -- what about >, nothing done in the original markdown source to handle them 888 -- what about >, nothing done in the original markdown source to handle them
1028 return s 889 return s
1029 end 890 end
1030 891
1031 -- Handles emphasis markers (* and _) in the text. 892 -- Handles emphasis markers (* and _) in the text.
1032 function emphasis(text) 893 local function emphasis(text)
1033 for _, s in ipairs {"%*%*", "%_%_"} do 894 for _, s in ipairs {"%*%*", "%_%_"} do
1034 text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>") 895 text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>")
1035 text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>") 896 text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>")
1036 end 897 end
1037 for _, s in ipairs {"%*", "%_"} do 898 for _, s in ipairs {"%*", "%_"} do
1038 text = text:gsub(s .. "([^%s_])" .. s, "<em>%1</em>") 899 text = text:gsub(s .. "([^%s_])" .. s, "<em>%1</em>")
1039 text = text:gsub(s .. "(<strong>[^%s_]</strong>)" .. s, "<em>%1</em>") 900 text = text:gsub(s .. "(<strong>[^%s_]</strong>)" .. s, "<em>%1</em>")
1040 text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "<em>%1</em>") 901 text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "<em>%1</em>")
1041 text = text:gsub(s .. "([^<>_]-<strong>[^<>_]-</strong>[^<>_]-)" .. s, "<em>%1</em>") 902 text = text:gsub(s .. "([^<>_]-<strong>[^<>_]-</strong>[^<>_]-)" .. s, "<em>%1</em>")
1042 end 903 end
1043 return text 904 return text
1044 end 905 end
1045 906
1046 -- Handles line break markers in the text. 907 -- Handles line break markers in the text.
1047 function line_breaks(text) 908 local function line_breaks(text)
1048 return text:gsub(" +\n", " <br/>\n") 909 return text:gsub(" +\n", " <br/>\n")
1049 end 910 end
1050 911
1051 -- Perform all span level transforms. 912 -- Perform all span level transforms.
1052 function span_transform(text) 913 function span_transform(text)
1053 text = code_spans(text) 914 text = code_spans(text)
1054 text = escape_special_chars(text) 915 text = escape_special_chars(text)
1055 text = images(text) 916 text = images(text)
1056 text = anchors(text) 917 text = anchors(text)
1057 text = auto_links(text) 918 text = auto_links(text)
1058 text = amps_and_angles(text) 919 text = amps_and_angles(text)
1059 text = emphasis(text) 920 text = emphasis(text)
1060 text = line_breaks(text) 921 text = line_breaks(text)
1061 return text 922 return text
1062 end 923 end
1063 924
1064 ---------------------------------------------------------------------- 925 ----------------------------------------------------------------------
1065 -- Markdown 926 -- Markdown
1066 ---------------------------------------------------------------------- 927 ----------------------------------------------------------------------
1067 928
1068 -- Cleanup the text by normalizing some possible variations to make further 929 -- Cleanup the text by normalizing some possible variations to make further
1069 -- processing easier. 930 -- processing easier.
1070 function cleanup(text) 931 local function cleanup(text)
1071 -- Standardize line endings 932 -- Standardize line endings
1072 text = text:gsub("\r\n", "\n") -- DOS to UNIX 933 text = text:gsub("\r\n", "\n") -- DOS to UNIX
1073 text = text:gsub("\r", "\n") -- Mac to UNIX 934 text = text:gsub("\r", "\n") -- Mac to UNIX
1074 935
1075 -- Convert all tabs to spaces 936 -- Convert all tabs to spaces
1076 text = detab(text) 937 text = detab(text)
1077 938
1078 -- Strip lines with only spaces and tabs 939 -- Strip lines with only spaces and tabs
1079 while true do 940 while true do
1080 local subs 941 local subs
1081 text, subs = text:gsub("\n[ \t]+\n", "\n\n") 942 text, subs = text:gsub("\n[ \t]+\n", "\n\n")
1082 if subs == 0 then break end 943 if subs == 0 then break end
1083 end 944 end
1084 945
1085 return "\n" .. text .. "\n" 946 return "\n" .. text .. "\n"
1086 end 947 end
1087 948
1088 -- Strips link definitions from the text and stores the data in a lookup table. 949 -- Strips link definitions from the text and stores the data in a lookup table.
1089 function strip_link_definitions(text) 950 local function strip_link_definitions(text)
1090 local linkdb = {} 951 local linkdb = {}
1091 952
1092 local function link_def(id, url, title) 953 local function link_def(id, url, title)
1093 id = id:match("%[(.+)%]"):lower() 954 id = id:match("%[(.+)%]"):lower()
1094 linkdb[id] = linkdb[id] or {} 955 linkdb[id] = linkdb[id] or {}
1095 linkdb[id].url = url or linkdb[id].url 956 linkdb[id].url = url or linkdb[id].url
1096 linkdb[id].title = title or linkdb[id].title 957 linkdb[id].title = title or linkdb[id].title
1097 return "" 958 return ""
1098 end 959 end
1099 960
1100 local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*<?([^%s>]+)>?[ \t]*" 961 local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*<?([^%s>]+)>?[ \t]*"
1101 local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" 962 local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*"
1102 local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" 963 local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*"
1103 local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" 964 local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*"
1104 965
1105 text = text:gsub(def_title1, link_def) 966 text = text:gsub(def_title1, link_def)
1106 text = text:gsub(def_title2, link_def) 967 text = text:gsub(def_title2, link_def)
1107 text = text:gsub(def_title3, link_def) 968 text = text:gsub(def_title3, link_def)
1108 text = text:gsub(def_no_title, link_def) 969 text = text:gsub(def_no_title, link_def)
1109 return text, linkdb 970 return text, linkdb
1110 end 971 end
1111
1112 link_database = {}
1113 972
1114 -- Main markdown processing function 973 -- Main markdown processing function
1115 function markdown(text) 974 local function markdown(text)
1116 init_hash(text) 975 init_hash(text)
1117 init_escape_table() 976 init_escape_table()
1118 977
1119 text = cleanup(text) 978 text = cleanup(text)
1120 text = protect(text) 979 text = protect(text)
1121 text, link_database = strip_link_definitions(text) 980 text, link_database = strip_link_definitions(text)
1122 text = block_transform(text) 981 text = block_transform(text)
1123 text = unescape_special_chars(text) 982 text = unescape_special_chars(text)
1124 return text 983 return text
1125 end 984 end
1126 985
1127 ---------------------------------------------------------------------- 986 ----------------------------------------------------------------------
1128 -- End of module 987 -- End of module
1129 ---------------------------------------------------------------------- 988 ----------------------------------------------------------------------
1130 989
1131 setfenv(1, _G) 990 -- For compatibility, set markdown function as a global
1132 M.lock(M) 991 _G.markdown = markdown
1133
1134 -- Expose markdown function to the world
1135 markdown = M.markdown
1136 992
1137 -- Class for parsing command-line options 993 -- Class for parsing command-line options
1138 local OptionParser = {} 994 local OptionParser = {}
1139 OptionParser.__index = OptionParser 995 OptionParser.__index = OptionParser
1140 996
1141 -- Creates a new option parser 997 -- Creates a new option parser
1142 function OptionParser:new() 998 function OptionParser:new()
1143 local o = {short = {}, long = {}} 999 local o = {short = {}, long = {}}
1144 setmetatable(o, self) 1000 setmetatable(o, self)
1145 return o 1001 return o
1146 end 1002 end
1147 1003
1148 -- Calls f() whenever a flag with specified short and long name is encountered 1004 -- Calls f() whenever a flag with specified short and long name is encountered
1149 function OptionParser:flag(short, long, f) 1005 function OptionParser:flag(short, long, f)
1150 local info = {type = "flag", f = f} 1006 local info = {type = "flag", f = f}
1151 if short then self.short[short] = info end 1007 if short then self.short[short] = info end
1152 if long then self.long[long] = info end 1008 if long then self.long[long] = info end
1153 end 1009 end
1154 1010
1155 -- Calls f(param) whenever a parameter flag with specified short and long name is encountered 1011 -- Calls f(param) whenever a parameter flag with specified short and long name is encountered
1156 function OptionParser:param(short, long, f) 1012 function OptionParser:param(short, long, f)
1157 local info = {type = "param", f = f} 1013 local info = {type = "param", f = f}
1158 if short then self.short[short] = info end 1014 if short then self.short[short] = info end
1159 if long then self.long[long] = info end 1015 if long then self.long[long] = info end
1160 end 1016 end
1161 1017
1162 -- Calls f(v) for each non-flag argument 1018 -- Calls f(v) for each non-flag argument
1163 function OptionParser:arg(f) 1019 function OptionParser:arg(f)
1164 self.arg = f 1020 self.arg = f
1165 end 1021 end
1166 1022
1167 -- Runs the option parser for the specified set of arguments. Returns true if all arguments 1023 -- Runs the option parser for the specified set of arguments. Returns true if all arguments
1168 -- where successfully parsed and false otherwise. 1024 -- where successfully parsed and false otherwise.
1169 function OptionParser:run(args) 1025 function OptionParser:run(args)
1170 local pos = 1 1026 local pos = 1
1171 while pos <= #args do 1027 while pos <= #args do
1172 local arg = args[pos] 1028 local arg = args[pos]
1173 if arg == "--" then 1029 if arg == "--" then
1174 for i=pos+1,#args do 1030 for i=pos+1,#args do
1175 if self.arg then self.arg(args[i]) end 1031 if self.arg then self.arg(args[i]) end
1176 return true 1032 return true
1177 end 1033 end
1178 end 1034 end
1179 if arg:match("^%-%-") then 1035 if arg:match("^%-%-") then
1180 local info = self.long[arg:sub(3)] 1036 local info = self.long[arg:sub(3)]
1181 if not info then print("Unknown flag: " .. arg) return false end 1037 if not info then print("Unknown flag: " .. arg) return false end
1182 if info.type == "flag" then 1038 if info.type == "flag" then
1183 info.f() 1039 info.f()
1184 pos = pos + 1 1040 pos = pos + 1
1185 else 1041 else
1186 param = args[pos+1] 1042 local param = args[pos+1]
1187 if not param then print("No parameter for flag: " .. arg) return false end 1043 if not param then print("No parameter for flag: " .. arg) return false end
1188 info.f(param) 1044 info.f(param)
1189 pos = pos+2 1045 pos = pos+2
1190 end 1046 end
1191 elseif arg:match("^%-") then 1047 elseif arg:match("^%-") then
1192 for i=2,arg:len() do 1048 for i=2,arg:len() do
1193 local c = arg:sub(i,i) 1049 local c = arg:sub(i,i)
1194 local info = self.short[c] 1050 local info = self.short[c]
1195 if not info then print("Unknown flag: -" .. c) return false end 1051 if not info then print("Unknown flag: -" .. c) return false end
1196 if info.type == "flag" then 1052 if info.type == "flag" then
1197 info.f() 1053 info.f()
1198 else 1054 else
1199 if i == arg:len() then 1055 if i == arg:len() then
1200 param = args[pos+1] 1056 local param = args[pos+1]
1201 if not param then print("No parameter for flag: -" .. c) return false end 1057 if not param then print("No parameter for flag: -" .. c) return false end
1202 info.f(param) 1058 info.f(param)
1203 pos = pos + 1 1059 pos = pos + 1
1204 else 1060 else
1205 param = arg:sub(i+1) 1061 local param = arg:sub(i+1)
1206 info.f(param) 1062 info.f(param)
1207 end 1063 end
1208 break 1064 break
1209 end 1065 end
1210 end 1066 end
1211 pos = pos + 1 1067 pos = pos + 1
1212 else 1068 else
1213 if self.arg then self.arg(arg) end 1069 if self.arg then self.arg(arg) end
1214 pos = pos + 1 1070 pos = pos + 1
1215 end 1071 end
1216 end 1072 end
1217 return true 1073 return true
1074 end
1075
1076 local function read_file(path, descr)
1077 local file = io.open(path) or error("Could not open " .. descr .. " file: " .. path)
1078 local contents = file:read("*a") or error("Could not read " .. descr .. " from " .. path)
1079 file:close()
1080 return contents
1218 end 1081 end
1219 1082
1220 -- Handles the case when markdown is run from the command line 1083 -- Handles the case when markdown is run from the command line
1221 local function run_command_line(arg) 1084 local function run_command_line(arg)
1222 -- Generate output for input s given options 1085 -- Generate output for input s given options
1223 local function run(s, options) 1086 local function run(s, options)
1224 s = markdown(s) 1087 s = markdown(s)
1225 if not options.wrap_header then return s end 1088 if not options.wrap_header then return s end
1226 local header = "" 1089 local header
1227 if options.header then 1090 if options.header then
1228 local f = io.open(options.header) or error("Could not open file: " .. options.header) 1091 header = read_file(options.header, "header")
1229 header = f:read("*a") 1092 else
1230 f:close() 1093 header = [[
1231 else
1232 header = [[
1233 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 1094 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
1234 <html> 1095 <html>
1235 <head> 1096 <head>
1236 <meta http-equiv="content-type" content="text/html; charset=CHARSET" /> 1097 <meta http-equiv="content-type" content="text/html; charset=CHARSET" />
1237 <title>TITLE</title> 1098 <title>TITLE</title>
1238 <link rel="stylesheet" type="text/css" href="STYLESHEET" /> 1099 <link rel="stylesheet" type="text/css" href="STYLESHEET" />
1239 </head> 1100 </head>
1240 <body> 1101 <body>
1241 ]] 1102 ]]
1242 local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or 1103 local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or
1243 s:match("<h3>(.-)</h3>") or "Untitled" 1104 s:match("<h3>(.-)</h3>") or "Untitled"
1244 header = header:gsub("TITLE", title) 1105 header = header:gsub("TITLE", title)
1245 if options.inline_style then 1106 if options.inline_style then
1246 local style = "" 1107 local style = read_file(options.stylesheet, "style sheet")
1247 local f = io.open(options.stylesheet) 1108 header = header:gsub('<link rel="stylesheet" type="text/css" href="STYLESHEET" />',
1248 if f then 1109 "<style type=\"text/css\"><!--\n" .. style .. "\n--></style>")
1249 style = f:read("*a") f:close() 1110 else
1250 else 1111 header = header:gsub("STYLESHEET", options.stylesheet)
1251 error("Could not include style sheet " .. options.stylesheet .. ": File not found") 1112 end
1252 end 1113 header = header:gsub("CHARSET", options.charset)
1253 header = header:gsub('<link rel="stylesheet" type="text/css" href="STYLESHEET" />', 1114 end
1254 "<style type=\"text/css\"><!--\n" .. style .. "\n--></style>") 1115 local footer = "</body></html>"
1255 else 1116 if options.footer then
1256 header = header:gsub("STYLESHEET", options.stylesheet) 1117 footer = read_file(options.footer, "footer")
1257 end 1118 end
1258 header = header:gsub("CHARSET", options.charset) 1119 return header .. s .. footer
1259 end 1120 end
1260 local footer = "</body></html>" 1121
1261 if options.footer then 1122 -- Generate output path name from input path name given options.
1262 local f = io.open(options.footer) or error("Could not open file: " .. options.footer) 1123 local function outpath(path, options)
1263 footer = f:read("*a") 1124 if options.append then return path .. ".html" end
1264 f:close() 1125 local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end
1265 end 1126 m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end
1266 return header .. s .. footer 1127 return path .. ".html"
1267 end 1128 end
1268 1129
1269 -- Generate output path name from input path name given options. 1130 -- Default commandline options
1270 local function outpath(path, options) 1131 local options = {
1271 if options.append then return path .. ".html" end 1132 wrap_header = true,
1272 local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end 1133 header = nil,
1273 m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end 1134 footer = nil,
1274 return path .. ".html" 1135 charset = "utf-8",
1275 end 1136 title = nil,
1276 1137 stylesheet = "default.css",
1277 -- Default commandline options 1138 inline_style = false
1278 local options = { 1139 }
1279 wrap_header = true, 1140 local help = [[
1280 header = nil,
1281 footer = nil,
1282 charset = "utf-8",
1283 title = nil,
1284 stylesheet = "default.css",
1285 inline_style = false
1286 }
1287 local help = [[
1288 Usage: markdown.lua [OPTION] [FILE] 1141 Usage: markdown.lua [OPTION] [FILE]
1289 Runs the markdown text markup to HTML converter on each file specified on the 1142 Runs the markdown text markup to HTML converter on each file specified on the
1290 command line. If no files are specified, runs on standard input. 1143 command line. If no files are specified, runs on standard input.
1291 1144
1292 No header: 1145 No header:
1296 -f, --footer FILE Use content of FILE for footer. 1149 -f, --footer FILE Use content of FILE for footer.
1297 Generated header: 1150 Generated header:
1298 -c, --charset SET Specifies charset (default utf-8). 1151 -c, --charset SET Specifies charset (default utf-8).
1299 -i, --title TITLE Specifies title (default from first <h1> tag). 1152 -i, --title TITLE Specifies title (default from first <h1> tag).
1300 -s, --style STYLE Specifies style sheet file (default default.css). 1153 -s, --style STYLE Specifies style sheet file (default default.css).
1301 -l, --inline-style Include the style sheet file inline in the header. 1154 -l, --inline-style Include the style sheet file inline in the header.
1302 Generated files: 1155 Generated files:
1303 -a, --append Append .html extension (instead of replacing). 1156 -a, --append Append .html extension (instead of replacing).
1304 Other options: 1157 Other options:
1305 -h, --help Print this help text. 1158 -h, --help Print this help text.
1306 -t, --test Run the unit tests. 1159 -t, --test Run the unit tests.
1307 ]] 1160 ]]
1308 1161
1309 local run_stdin = true 1162 local run_stdin = true
1310 local op = OptionParser:new() 1163 local op = OptionParser:new()
1311 op:flag("n", "no-wrap", function () options.wrap_header = false end) 1164 op:flag("n", "no-wrap", function () options.wrap_header = false end)
1312 op:param("e", "header", function (x) options.header = x end) 1165 op:param("e", "header", function (x) options.header = x end)
1313 op:param("f", "footer", function (x) options.footer = x end) 1166 op:param("f", "footer", function (x) options.footer = x end)
1314 op:param("c", "charset", function (x) options.charset = x end) 1167 op:param("c", "charset", function (x) options.charset = x end)
1315 op:param("i", "title", function(x) options.title = x end) 1168 op:param("i", "title", function(x) options.title = x end)
1316 op:param("s", "style", function(x) options.stylesheet = x end) 1169 op:param("s", "style", function(x) options.stylesheet = x end)
1317 op:flag("l", "inline-style", function(x) options.inline_style = true end) 1170 op:flag("l", "inline-style", function() options.inline_style = true end)
1318 op:flag("a", "append", function() options.append = true end) 1171 op:flag("a", "append", function() options.append = true end)
1319 op:flag("t", "test", function() 1172 op:flag("t", "test", function()
1320 local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") 1173 local n = arg[0]:gsub("markdown%.lua", "markdown-tests.lua")
1321 local f = io.open(n) 1174 local f = io.open(n)
1322 if f then 1175 if f then
1323 f:close() dofile(n) 1176 f:close()
1324 else 1177 package.loaded.markdown = markdown
1325 error("Cannot find markdown-tests.lua") 1178 dofile(n)
1326 end 1179 else
1327 run_stdin = false 1180 error("Cannot find markdown-tests.lua")
1328 end) 1181 end
1329 op:flag("h", "help", function() print(help) run_stdin = false end) 1182 run_stdin = false
1330 op:arg(function(path) 1183 end)
1331 local file = io.open(path) or error("Could not open file: " .. path) 1184 op:flag("h", "help", function() print(help) run_stdin = false end)
1332 local s = file:read("*a") 1185 op:arg(function(path)
1333 file:close() 1186 local s = read_file(path, "input")
1334 s = run(s, options) 1187 s = run(s, options)
1335 file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options)) 1188 local file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options))
1336 file:write(s) 1189 file:write(s)
1337 file:close() 1190 file:close()
1338 run_stdin = false 1191 run_stdin = false
1339 end 1192 end
1340 ) 1193 )
1341 1194
1342 if not op:run(arg) then 1195 if not op:run(arg) then
1343 print(help) 1196 print(help)
1344 run_stdin = false 1197 run_stdin = false
1345 end 1198 end
1346 1199
1347 if run_stdin then 1200 if run_stdin then
1348 local s = io.read("*a") 1201 local s = io.read("*a")
1349 s = run(s, options) 1202 s = run(s, options)
1350 io.write(s) 1203 io.write(s)
1351 end 1204 end
1352 end 1205 end
1353 1206
1354 -- If we are being run from the command-line, act accordingly 1207 -- If we are being run from the command-line, act accordingly
1355 if arg and arg[0]:find("markdown%.lua$") then 1208 if arg and arg[0]:find("markdown%.lua$") then
1356 run_command_line(arg) 1209 run_command_line(arg)
1357 else 1210 else
1358 return markdown 1211 return markdown
1359 end 1212 end

mercurial