helpers.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. // Copyright (c) 2014, David Kitchen <david@buro9.com>
  2. //
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice, this
  9. // list of conditions and the following disclaimer.
  10. //
  11. // * Redistributions in binary form must reproduce the above copyright notice,
  12. // this list of conditions and the following disclaimer in the documentation
  13. // and/or other materials provided with the distribution.
  14. //
  15. // * Neither the name of the organisation (Microcosm) nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  23. // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  25. // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  27. // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. package bluemonday
  30. import (
  31. "encoding/base64"
  32. "net/url"
  33. "regexp"
  34. )
  35. // A selection of regular expressions that can be used as .Matching() rules on
  36. // HTML attributes.
  37. var (
  38. // CellAlign handles the `align` attribute
  39. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align
  40. CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)
  41. // CellVerticalAlign handles the `valign` attribute
  42. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign
  43. CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)
  44. // Direction handles the `dir` attribute
  45. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir
  46. Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)
  47. // ImageAlign handles the `align` attribute on the `image` tag
  48. // http://www.w3.org/MarkUp/Test/Img/imgtest.html
  49. ImageAlign = regexp.MustCompile(
  50. `(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,
  51. )
  52. // Integer describes whole positive integers (including 0) used in places
  53. // like td.colspan
  54. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan
  55. Integer = regexp.MustCompile(`^[0-9]+$`)
  56. // ISO8601 according to the W3 group is only a subset of the ISO8601
  57. // standard: http://www.w3.org/TR/NOTE-datetime
  58. //
  59. // Used in places like time.datetime
  60. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime
  61. //
  62. // Matches patterns:
  63. // Year:
  64. // YYYY (eg 1997)
  65. // Year and month:
  66. // YYYY-MM (eg 1997-07)
  67. // Complete date:
  68. // YYYY-MM-DD (eg 1997-07-16)
  69. // Complete date plus hours and minutes:
  70. // YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
  71. // Complete date plus hours, minutes and seconds:
  72. // YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
  73. // Complete date plus hours, minutes, seconds and a decimal fraction of a
  74. // second
  75. // YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
  76. ISO8601 = regexp.MustCompile(
  77. `^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
  78. `?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,
  79. )
  80. // ListType encapsulates the common value as well as the latest spec
  81. // values for lists
  82. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
  83. ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)
  84. // SpaceSeparatedTokens is used in places like `a.rel` and the common attribute
  85. // `class` which both contain space delimited lists of data tokens
  86. // http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def
  87. // Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers
  88. SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)
  89. // Number is a double value used on HTML5 meter and progress elements
  90. // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element
  91. Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)
  92. // NumberOrPercent is used predominantly as units of measurement in width
  93. // and height attributes
  94. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height
  95. NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)
  96. // Paragraph of text in an attribute such as *.'title', img.alt, etc
  97. // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title
  98. // Note that we are not allowing chars that could close tags like '>'
  99. Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)
  100. // dataURIImagePrefix is used by AllowDataURIImages to define the acceptable
  101. // prefix of data URIs that contain common web image formats.
  102. //
  103. // This is not exported as it's not useful by itself, and only has value
  104. // within the AllowDataURIImages func
  105. dataURIImagePrefix = regexp.MustCompile(
  106. `^image/(gif|jpeg|png|webp);base64,`,
  107. )
  108. )
  109. // AllowStandardURLs is a convenience function that will enable rel="nofollow"
  110. // on "a", "area" and "link" (if you have allowed those elements) and will
  111. // ensure that the URL values are parseable and either relative or belong to the
  112. // "mailto", "http", or "https" schemes
  113. func (p *Policy) AllowStandardURLs() {
  114. // URLs must be parseable by net/url.Parse()
  115. p.RequireParseableURLs(true)
  116. // !url.IsAbs() is permitted
  117. p.AllowRelativeURLs(true)
  118. // Most common URL schemes only
  119. p.AllowURLSchemes("mailto", "http", "https")
  120. // For all anchors we will add rel="nofollow" if it does not already exist
  121. // This applies to "a" "area" "link"
  122. p.RequireNoFollowOnLinks(true)
  123. }
  124. // AllowStandardAttributes will enable "id", "title" and the language specific
  125. // attributes "dir" and "lang" on all elements that are whitelisted
  126. func (p *Policy) AllowStandardAttributes() {
  127. // "dir" "lang" are permitted as both language attributes affect charsets
  128. // and direction of text.
  129. p.AllowAttrs("dir").Matching(Direction).Globally()
  130. p.AllowAttrs(
  131. "lang",
  132. ).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()
  133. // "id" is permitted. This is pretty much as some HTML elements require this
  134. // to work well ("dfn" is an example of a "id" being value)
  135. // This does create a risk that JavaScript and CSS within your web page
  136. // might identify the wrong elements. Ensure that you select things
  137. // accurately
  138. p.AllowAttrs("id").Matching(
  139. regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),
  140. ).Globally()
  141. // "title" is permitted as it improves accessibility.
  142. p.AllowAttrs("title").Matching(Paragraph).Globally()
  143. }
  144. // AllowStyling presently enables the class attribute globally.
  145. //
  146. // Note: When bluemonday ships a CSS parser and we can safely sanitise that,
  147. // this will also allow sanitized styling of elements via the style attribute.
  148. func (p *Policy) AllowStyling() {
  149. // "class" is permitted globally
  150. p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()
  151. }
  152. // AllowImages enables the img element and some popular attributes. It will also
  153. // ensure that URL values are parseable. This helper does not enable data URI
  154. // images, for that you should also use the AllowDataURIImages() helper.
  155. func (p *Policy) AllowImages() {
  156. // "img" is permitted
  157. p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")
  158. p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")
  159. p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")
  160. // Standard URLs enabled
  161. p.AllowStandardURLs()
  162. p.AllowAttrs("src").OnElements("img")
  163. }
  164. // AllowDataURIImages permits the use of inline images defined in RFC2397
  165. // http://tools.ietf.org/html/rfc2397
  166. // http://en.wikipedia.org/wiki/Data_URI_scheme
  167. //
  168. // Images must have a mimetype matching:
  169. // image/gif
  170. // image/jpeg
  171. // image/png
  172. // image/webp
  173. //
  174. // NOTE: There is a potential security risk to allowing data URIs and you should
  175. // only permit them on content you already trust.
  176. // http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/
  177. // https://capec.mitre.org/data/definitions/244.html
  178. func (p *Policy) AllowDataURIImages() {
  179. // URLs must be parseable by net/url.Parse()
  180. p.RequireParseableURLs(true)
  181. // Supply a function to validate images contained within data URI
  182. p.AllowURLSchemeWithCustomPolicy(
  183. "data",
  184. func(url *url.URL) (allowUrl bool) {
  185. if url.RawQuery != "" || url.Fragment != "" {
  186. return false
  187. }
  188. matched := dataURIImagePrefix.FindString(url.Opaque)
  189. if matched == "" {
  190. return false
  191. }
  192. _, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])
  193. if err != nil {
  194. return false
  195. }
  196. return true
  197. },
  198. )
  199. }
  200. // AllowLists will enabled ordered and unordered lists, as well as definition
  201. // lists
  202. func (p *Policy) AllowLists() {
  203. // "ol" "ul" are permitted
  204. p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")
  205. // "li" is permitted
  206. p.AllowAttrs("type").Matching(ListType).OnElements("li")
  207. p.AllowAttrs("value").Matching(Integer).OnElements("li")
  208. // "dl" "dt" "dd" are permitted
  209. p.AllowElements("dl", "dt", "dd")
  210. }
  211. // AllowTables will enable a rich set of elements and attributes to describe
  212. // HTML tables
  213. func (p *Policy) AllowTables() {
  214. // "table" is permitted
  215. p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")
  216. p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")
  217. // "caption" is permitted
  218. p.AllowElements("caption")
  219. // "col" "colgroup" are permitted
  220. p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")
  221. p.AllowAttrs("height", "width").Matching(
  222. NumberOrPercent,
  223. ).OnElements("col", "colgroup")
  224. p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")
  225. p.AllowAttrs("valign").Matching(
  226. CellVerticalAlign,
  227. ).OnElements("col", "colgroup")
  228. // "thead" "tr" are permitted
  229. p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")
  230. p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")
  231. // "td" "th" are permitted
  232. p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")
  233. p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")
  234. p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")
  235. p.AllowAttrs("headers").Matching(
  236. SpaceSeparatedTokens,
  237. ).OnElements("td", "th")
  238. p.AllowAttrs("height", "width").Matching(
  239. NumberOrPercent,
  240. ).OnElements("td", "th")
  241. p.AllowAttrs(
  242. "scope",
  243. ).Matching(
  244. regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),
  245. ).OnElements("td", "th")
  246. p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")
  247. p.AllowAttrs("nowrap").Matching(
  248. regexp.MustCompile(`(?i)|nowrap`),
  249. ).OnElements("td", "th")
  250. // "tbody" "tfoot"
  251. p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")
  252. p.AllowAttrs("valign").Matching(
  253. CellVerticalAlign,
  254. ).OnElements("tbody", "tfoot")
  255. }