123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648 |
- // Copyright 2012 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // +build ignore
- package main
- // This program generates table.go and table_test.go.
- // Invoke as
- //
- // go run gen.go |gofmt >table.go
- // go run gen.go -test |gofmt >table_test.go
- import (
- "flag"
- "fmt"
- "math/rand"
- "os"
- "sort"
- "strings"
- )
- // identifier converts s to a Go exported identifier.
- // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
- func identifier(s string) string {
- b := make([]byte, 0, len(s))
- cap := true
- for _, c := range s {
- if c == '-' {
- cap = true
- continue
- }
- if cap && 'a' <= c && c <= 'z' {
- c -= 'a' - 'A'
- }
- cap = false
- b = append(b, byte(c))
- }
- return string(b)
- }
- var test = flag.Bool("test", false, "generate table_test.go")
- func main() {
- flag.Parse()
- var all []string
- all = append(all, elements...)
- all = append(all, attributes...)
- all = append(all, eventHandlers...)
- all = append(all, extra...)
- sort.Strings(all)
- if *test {
- fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
- fmt.Printf("package atom\n\n")
- fmt.Printf("var testAtomList = []string{\n")
- for _, s := range all {
- fmt.Printf("\t%q,\n", s)
- }
- fmt.Printf("}\n")
- return
- }
- // uniq - lists have dups
- // compute max len too
- maxLen := 0
- w := 0
- for _, s := range all {
- if w == 0 || all[w-1] != s {
- if maxLen < len(s) {
- maxLen = len(s)
- }
- all[w] = s
- w++
- }
- }
- all = all[:w]
- // Find hash that minimizes table size.
- var best *table
- for i := 0; i < 1000000; i++ {
- if best != nil && 1<<(best.k-1) < len(all) {
- break
- }
- h := rand.Uint32()
- for k := uint(0); k <= 16; k++ {
- if best != nil && k >= best.k {
- break
- }
- var t table
- if t.init(h, k, all) {
- best = &t
- break
- }
- }
- }
- if best == nil {
- fmt.Fprintf(os.Stderr, "failed to construct string table\n")
- os.Exit(1)
- }
- // Lay out strings, using overlaps when possible.
- layout := append([]string{}, all...)
- // Remove strings that are substrings of other strings
- for changed := true; changed; {
- changed = false
- for i, s := range layout {
- if s == "" {
- continue
- }
- for j, t := range layout {
- if i != j && t != "" && strings.Contains(s, t) {
- changed = true
- layout[j] = ""
- }
- }
- }
- }
- // Join strings where one suffix matches another prefix.
- for {
- // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
- // maximizing overlap length k.
- besti := -1
- bestj := -1
- bestk := 0
- for i, s := range layout {
- if s == "" {
- continue
- }
- for j, t := range layout {
- if i == j {
- continue
- }
- for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
- if s[len(s)-k:] == t[:k] {
- besti = i
- bestj = j
- bestk = k
- }
- }
- }
- }
- if bestk > 0 {
- layout[besti] += layout[bestj][bestk:]
- layout[bestj] = ""
- continue
- }
- break
- }
- text := strings.Join(layout, "")
- atom := map[string]uint32{}
- for _, s := range all {
- off := strings.Index(text, s)
- if off < 0 {
- panic("lost string " + s)
- }
- atom[s] = uint32(off<<8 | len(s))
- }
- // Generate the Go code.
- fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
- fmt.Printf("package atom\n\nconst (\n")
- for _, s := range all {
- fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
- }
- fmt.Printf(")\n\n")
- fmt.Printf("const hash0 = %#x\n\n", best.h0)
- fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
- fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
- for i, s := range best.tab {
- if s == "" {
- continue
- }
- fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
- }
- fmt.Printf("}\n")
- datasize := (1 << best.k) * 4
- fmt.Printf("const atomText =\n")
- textsize := len(text)
- for len(text) > 60 {
- fmt.Printf("\t%q +\n", text[:60])
- text = text[60:]
- }
- fmt.Printf("\t%q\n\n", text)
- fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
- }
- type byLen []string
- func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
- func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
- func (x byLen) Len() int { return len(x) }
- // fnv computes the FNV hash with an arbitrary starting value h.
- func fnv(h uint32, s string) uint32 {
- for i := 0; i < len(s); i++ {
- h ^= uint32(s[i])
- h *= 16777619
- }
- return h
- }
- // A table represents an attempt at constructing the lookup table.
- // The lookup table uses cuckoo hashing, meaning that each string
- // can be found in one of two positions.
- type table struct {
- h0 uint32
- k uint
- mask uint32
- tab []string
- }
- // hash returns the two hashes for s.
- func (t *table) hash(s string) (h1, h2 uint32) {
- h := fnv(t.h0, s)
- h1 = h & t.mask
- h2 = (h >> 16) & t.mask
- return
- }
- // init initializes the table with the given parameters.
- // h0 is the initial hash value,
- // k is the number of bits of hash value to use, and
- // x is the list of strings to store in the table.
- // init returns false if the table cannot be constructed.
- func (t *table) init(h0 uint32, k uint, x []string) bool {
- t.h0 = h0
- t.k = k
- t.tab = make([]string, 1<<k)
- t.mask = 1<<k - 1
- for _, s := range x {
- if !t.insert(s) {
- return false
- }
- }
- return true
- }
- // insert inserts s in the table.
- func (t *table) insert(s string) bool {
- h1, h2 := t.hash(s)
- if t.tab[h1] == "" {
- t.tab[h1] = s
- return true
- }
- if t.tab[h2] == "" {
- t.tab[h2] = s
- return true
- }
- if t.push(h1, 0) {
- t.tab[h1] = s
- return true
- }
- if t.push(h2, 0) {
- t.tab[h2] = s
- return true
- }
- return false
- }
- // push attempts to push aside the entry in slot i.
- func (t *table) push(i uint32, depth int) bool {
- if depth > len(t.tab) {
- return false
- }
- s := t.tab[i]
- h1, h2 := t.hash(s)
- j := h1 + h2 - i
- if t.tab[j] != "" && !t.push(j, depth+1) {
- return false
- }
- t.tab[j] = s
- return true
- }
- // The lists of element names and attribute keys were taken from
- // https://html.spec.whatwg.org/multipage/indices.html#index
- // as of the "HTML Living Standard - Last Updated 21 February 2015" version.
- var elements = []string{
- "a",
- "abbr",
- "address",
- "area",
- "article",
- "aside",
- "audio",
- "b",
- "base",
- "bdi",
- "bdo",
- "blockquote",
- "body",
- "br",
- "button",
- "canvas",
- "caption",
- "cite",
- "code",
- "col",
- "colgroup",
- "command",
- "data",
- "datalist",
- "dd",
- "del",
- "details",
- "dfn",
- "dialog",
- "div",
- "dl",
- "dt",
- "em",
- "embed",
- "fieldset",
- "figcaption",
- "figure",
- "footer",
- "form",
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6",
- "head",
- "header",
- "hgroup",
- "hr",
- "html",
- "i",
- "iframe",
- "img",
- "input",
- "ins",
- "kbd",
- "keygen",
- "label",
- "legend",
- "li",
- "link",
- "map",
- "mark",
- "menu",
- "menuitem",
- "meta",
- "meter",
- "nav",
- "noscript",
- "object",
- "ol",
- "optgroup",
- "option",
- "output",
- "p",
- "param",
- "pre",
- "progress",
- "q",
- "rp",
- "rt",
- "ruby",
- "s",
- "samp",
- "script",
- "section",
- "select",
- "small",
- "source",
- "span",
- "strong",
- "style",
- "sub",
- "summary",
- "sup",
- "table",
- "tbody",
- "td",
- "template",
- "textarea",
- "tfoot",
- "th",
- "thead",
- "time",
- "title",
- "tr",
- "track",
- "u",
- "ul",
- "var",
- "video",
- "wbr",
- }
- // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
- var attributes = []string{
- "abbr",
- "accept",
- "accept-charset",
- "accesskey",
- "action",
- "alt",
- "async",
- "autocomplete",
- "autofocus",
- "autoplay",
- "challenge",
- "charset",
- "checked",
- "cite",
- "class",
- "cols",
- "colspan",
- "command",
- "content",
- "contenteditable",
- "contextmenu",
- "controls",
- "coords",
- "crossorigin",
- "data",
- "datetime",
- "default",
- "defer",
- "dir",
- "dirname",
- "disabled",
- "download",
- "draggable",
- "dropzone",
- "enctype",
- "for",
- "form",
- "formaction",
- "formenctype",
- "formmethod",
- "formnovalidate",
- "formtarget",
- "headers",
- "height",
- "hidden",
- "high",
- "href",
- "hreflang",
- "http-equiv",
- "icon",
- "id",
- "inputmode",
- "ismap",
- "itemid",
- "itemprop",
- "itemref",
- "itemscope",
- "itemtype",
- "keytype",
- "kind",
- "label",
- "lang",
- "list",
- "loop",
- "low",
- "manifest",
- "max",
- "maxlength",
- "media",
- "mediagroup",
- "method",
- "min",
- "minlength",
- "multiple",
- "muted",
- "name",
- "novalidate",
- "open",
- "optimum",
- "pattern",
- "ping",
- "placeholder",
- "poster",
- "preload",
- "radiogroup",
- "readonly",
- "rel",
- "required",
- "reversed",
- "rows",
- "rowspan",
- "sandbox",
- "spellcheck",
- "scope",
- "scoped",
- "seamless",
- "selected",
- "shape",
- "size",
- "sizes",
- "sortable",
- "sorted",
- "span",
- "src",
- "srcdoc",
- "srclang",
- "start",
- "step",
- "style",
- "tabindex",
- "target",
- "title",
- "translate",
- "type",
- "typemustmatch",
- "usemap",
- "value",
- "width",
- "wrap",
- }
- var eventHandlers = []string{
- "onabort",
- "onautocomplete",
- "onautocompleteerror",
- "onafterprint",
- "onbeforeprint",
- "onbeforeunload",
- "onblur",
- "oncancel",
- "oncanplay",
- "oncanplaythrough",
- "onchange",
- "onclick",
- "onclose",
- "oncontextmenu",
- "oncuechange",
- "ondblclick",
- "ondrag",
- "ondragend",
- "ondragenter",
- "ondragleave",
- "ondragover",
- "ondragstart",
- "ondrop",
- "ondurationchange",
- "onemptied",
- "onended",
- "onerror",
- "onfocus",
- "onhashchange",
- "oninput",
- "oninvalid",
- "onkeydown",
- "onkeypress",
- "onkeyup",
- "onlanguagechange",
- "onload",
- "onloadeddata",
- "onloadedmetadata",
- "onloadstart",
- "onmessage",
- "onmousedown",
- "onmousemove",
- "onmouseout",
- "onmouseover",
- "onmouseup",
- "onmousewheel",
- "onoffline",
- "ononline",
- "onpagehide",
- "onpageshow",
- "onpause",
- "onplay",
- "onplaying",
- "onpopstate",
- "onprogress",
- "onratechange",
- "onreset",
- "onresize",
- "onscroll",
- "onseeked",
- "onseeking",
- "onselect",
- "onshow",
- "onsort",
- "onstalled",
- "onstorage",
- "onsubmit",
- "onsuspend",
- "ontimeupdate",
- "ontoggle",
- "onunload",
- "onvolumechange",
- "onwaiting",
- }
- // extra are ad-hoc values not covered by any of the lists above.
- var extra = []string{
- "align",
- "annotation",
- "annotation-xml",
- "applet",
- "basefont",
- "bgsound",
- "big",
- "blink",
- "center",
- "color",
- "desc",
- "face",
- "font",
- "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
- "foreignobject",
- "frame",
- "frameset",
- "image",
- "isindex",
- "listing",
- "malignmark",
- "marquee",
- "math",
- "mglyph",
- "mi",
- "mn",
- "mo",
- "ms",
- "mtext",
- "nobr",
- "noembed",
- "noframes",
- "plaintext",
- "prompt",
- "public",
- "spacer",
- "strike",
- "svg",
- "system",
- "tt",
- "xmp",
- }
|