smartypants.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. //
  11. // SmartyPants rendering
  12. //
  13. //
  14. package blackfriday
  15. import (
  16. "bytes"
  17. )
  18. type smartypantsData struct {
  19. inSingleQuote bool
  20. inDoubleQuote bool
  21. }
  22. func wordBoundary(c byte) bool {
  23. return c == 0 || isspace(c) || ispunct(c)
  24. }
  25. func tolower(c byte) byte {
  26. if c >= 'A' && c <= 'Z' {
  27. return c - 'A' + 'a'
  28. }
  29. return c
  30. }
  31. func isdigit(c byte) bool {
  32. return c >= '0' && c <= '9'
  33. }
  34. func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
  35. // edge of the buffer is likely to be a tag that we don't get to see,
  36. // so we treat it like text sometimes
  37. // enumerate all sixteen possibilities for (previousChar, nextChar)
  38. // each can be one of {0, space, punct, other}
  39. switch {
  40. case previousChar == 0 && nextChar == 0:
  41. // context is not any help here, so toggle
  42. *isOpen = !*isOpen
  43. case isspace(previousChar) && nextChar == 0:
  44. // [ "] might be [ "<code>foo...]
  45. *isOpen = true
  46. case ispunct(previousChar) && nextChar == 0:
  47. // [!"] hmm... could be [Run!"] or [("<code>...]
  48. *isOpen = false
  49. case /* isnormal(previousChar) && */ nextChar == 0:
  50. // [a"] is probably a close
  51. *isOpen = false
  52. case previousChar == 0 && isspace(nextChar):
  53. // [" ] might be [...foo</code>" ]
  54. *isOpen = false
  55. case isspace(previousChar) && isspace(nextChar):
  56. // [ " ] context is not any help here, so toggle
  57. *isOpen = !*isOpen
  58. case ispunct(previousChar) && isspace(nextChar):
  59. // [!" ] is probably a close
  60. *isOpen = false
  61. case /* isnormal(previousChar) && */ isspace(nextChar):
  62. // [a" ] this is one of the easy cases
  63. *isOpen = false
  64. case previousChar == 0 && ispunct(nextChar):
  65. // ["!] hmm... could be ["$1.95] or [</code>"!...]
  66. *isOpen = false
  67. case isspace(previousChar) && ispunct(nextChar):
  68. // [ "!] looks more like [ "$1.95]
  69. *isOpen = true
  70. case ispunct(previousChar) && ispunct(nextChar):
  71. // [!"!] context is not any help here, so toggle
  72. *isOpen = !*isOpen
  73. case /* isnormal(previousChar) && */ ispunct(nextChar):
  74. // [a"!] is probably a close
  75. *isOpen = false
  76. case previousChar == 0 /* && isnormal(nextChar) */ :
  77. // ["a] is probably an open
  78. *isOpen = true
  79. case isspace(previousChar) /* && isnormal(nextChar) */ :
  80. // [ "a] this is one of the easy cases
  81. *isOpen = true
  82. case ispunct(previousChar) /* && isnormal(nextChar) */ :
  83. // [!"a] is probably an open
  84. *isOpen = true
  85. default:
  86. // [a'b] maybe a contraction?
  87. *isOpen = false
  88. }
  89. out.WriteByte('&')
  90. if *isOpen {
  91. out.WriteByte('l')
  92. } else {
  93. out.WriteByte('r')
  94. }
  95. out.WriteByte(quote)
  96. out.WriteString("quo;")
  97. return true
  98. }
  99. func smartSingleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  100. if len(text) >= 2 {
  101. t1 := tolower(text[1])
  102. if t1 == '\'' {
  103. nextChar := byte(0)
  104. if len(text) >= 3 {
  105. nextChar = text[2]
  106. }
  107. if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) {
  108. return 1
  109. }
  110. }
  111. if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
  112. out.WriteString("&rsquo;")
  113. return 0
  114. }
  115. if len(text) >= 3 {
  116. t2 := tolower(text[2])
  117. if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
  118. (len(text) < 4 || wordBoundary(text[3])) {
  119. out.WriteString("&rsquo;")
  120. return 0
  121. }
  122. }
  123. }
  124. nextChar := byte(0)
  125. if len(text) > 1 {
  126. nextChar = text[1]
  127. }
  128. if smartQuoteHelper(out, previousChar, nextChar, 's', &smrt.inSingleQuote) {
  129. return 0
  130. }
  131. out.WriteByte(text[0])
  132. return 0
  133. }
  134. func smartParens(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  135. if len(text) >= 3 {
  136. t1 := tolower(text[1])
  137. t2 := tolower(text[2])
  138. if t1 == 'c' && t2 == ')' {
  139. out.WriteString("&copy;")
  140. return 2
  141. }
  142. if t1 == 'r' && t2 == ')' {
  143. out.WriteString("&reg;")
  144. return 2
  145. }
  146. if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
  147. out.WriteString("&trade;")
  148. return 3
  149. }
  150. }
  151. out.WriteByte(text[0])
  152. return 0
  153. }
  154. func smartDash(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  155. if len(text) >= 2 {
  156. if text[1] == '-' {
  157. out.WriteString("&mdash;")
  158. return 1
  159. }
  160. if wordBoundary(previousChar) && wordBoundary(text[1]) {
  161. out.WriteString("&ndash;")
  162. return 0
  163. }
  164. }
  165. out.WriteByte(text[0])
  166. return 0
  167. }
  168. func smartDashLatex(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  169. if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
  170. out.WriteString("&mdash;")
  171. return 2
  172. }
  173. if len(text) >= 2 && text[1] == '-' {
  174. out.WriteString("&ndash;")
  175. return 1
  176. }
  177. out.WriteByte(text[0])
  178. return 0
  179. }
  180. func smartAmpVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte) int {
  181. if bytes.HasPrefix(text, []byte("&quot;")) {
  182. nextChar := byte(0)
  183. if len(text) >= 7 {
  184. nextChar = text[6]
  185. }
  186. if smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) {
  187. return 5
  188. }
  189. }
  190. if bytes.HasPrefix(text, []byte("&#0;")) {
  191. return 3
  192. }
  193. out.WriteByte('&')
  194. return 0
  195. }
  196. func smartAmp(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  197. return smartAmpVariant(out, smrt, previousChar, text, 'd')
  198. }
  199. func smartAmpAngledQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  200. return smartAmpVariant(out, smrt, previousChar, text, 'a')
  201. }
  202. func smartPeriod(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  203. if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
  204. out.WriteString("&hellip;")
  205. return 2
  206. }
  207. if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
  208. out.WriteString("&hellip;")
  209. return 4
  210. }
  211. out.WriteByte(text[0])
  212. return 0
  213. }
  214. func smartBacktick(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  215. if len(text) >= 2 && text[1] == '`' {
  216. nextChar := byte(0)
  217. if len(text) >= 3 {
  218. nextChar = text[2]
  219. }
  220. if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) {
  221. return 1
  222. }
  223. }
  224. out.WriteByte(text[0])
  225. return 0
  226. }
  227. func smartNumberGeneric(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  228. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  229. // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
  230. // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
  231. // and avoid changing dates like 1/23/2005 into fractions.
  232. numEnd := 0
  233. for len(text) > numEnd && isdigit(text[numEnd]) {
  234. numEnd++
  235. }
  236. if numEnd == 0 {
  237. out.WriteByte(text[0])
  238. return 0
  239. }
  240. denStart := numEnd + 1
  241. if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
  242. denStart = numEnd + 3
  243. } else if len(text) < numEnd+2 || text[numEnd] != '/' {
  244. out.WriteByte(text[0])
  245. return 0
  246. }
  247. denEnd := denStart
  248. for len(text) > denEnd && isdigit(text[denEnd]) {
  249. denEnd++
  250. }
  251. if denEnd == denStart {
  252. out.WriteByte(text[0])
  253. return 0
  254. }
  255. if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
  256. out.WriteString("<sup>")
  257. out.Write(text[:numEnd])
  258. out.WriteString("</sup>&frasl;<sub>")
  259. out.Write(text[denStart:denEnd])
  260. out.WriteString("</sub>")
  261. return denEnd - 1
  262. }
  263. }
  264. out.WriteByte(text[0])
  265. return 0
  266. }
  267. func smartNumber(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  268. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  269. if text[0] == '1' && text[1] == '/' && text[2] == '2' {
  270. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
  271. out.WriteString("&frac12;")
  272. return 2
  273. }
  274. }
  275. if text[0] == '1' && text[1] == '/' && text[2] == '4' {
  276. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
  277. out.WriteString("&frac14;")
  278. return 2
  279. }
  280. }
  281. if text[0] == '3' && text[1] == '/' && text[2] == '4' {
  282. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
  283. out.WriteString("&frac34;")
  284. return 2
  285. }
  286. }
  287. }
  288. out.WriteByte(text[0])
  289. return 0
  290. }
  291. func smartDoubleQuoteVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte) int {
  292. nextChar := byte(0)
  293. if len(text) > 1 {
  294. nextChar = text[1]
  295. }
  296. if !smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) {
  297. out.WriteString("&quot;")
  298. }
  299. return 0
  300. }
  301. func smartDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  302. return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'd')
  303. }
  304. func smartAngledDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  305. return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'a')
  306. }
  307. func smartLeftAngle(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  308. i := 0
  309. for i < len(text) && text[i] != '>' {
  310. i++
  311. }
  312. out.Write(text[:i+1])
  313. return i
  314. }
  315. type smartCallback func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int
  316. type smartypantsRenderer [256]smartCallback
  317. func smartypants(flags int) *smartypantsRenderer {
  318. r := new(smartypantsRenderer)
  319. if flags&HTML_SMARTYPANTS_ANGLED_QUOTES == 0 {
  320. r['"'] = smartDoubleQuote
  321. r['&'] = smartAmp
  322. } else {
  323. r['"'] = smartAngledDoubleQuote
  324. r['&'] = smartAmpAngledQuote
  325. }
  326. r['\''] = smartSingleQuote
  327. r['('] = smartParens
  328. if flags&HTML_SMARTYPANTS_DASHES != 0 {
  329. if flags&HTML_SMARTYPANTS_LATEX_DASHES == 0 {
  330. r['-'] = smartDash
  331. } else {
  332. r['-'] = smartDashLatex
  333. }
  334. }
  335. r['.'] = smartPeriod
  336. if flags&HTML_SMARTYPANTS_FRACTIONS == 0 {
  337. r['1'] = smartNumber
  338. r['3'] = smartNumber
  339. } else {
  340. for ch := '1'; ch <= '9'; ch++ {
  341. r[ch] = smartNumberGeneric
  342. }
  343. }
  344. r['<'] = smartLeftAngle
  345. r['`'] = smartBacktick
  346. return r
  347. }