gen_index.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. // This file generates derivative tables based on the language package itself.
  7. import (
  8. "bytes"
  9. "flag"
  10. "fmt"
  11. "io/ioutil"
  12. "log"
  13. "reflect"
  14. "sort"
  15. "strings"
  16. "golang.org/x/text/internal/gen"
  17. "golang.org/x/text/language"
  18. "golang.org/x/text/unicode/cldr"
  19. )
  20. var (
  21. test = flag.Bool("test", false,
  22. "test existing tables; can be used to compare web data with package data.")
  23. draft = flag.String("draft",
  24. "contributed",
  25. `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
  26. )
  27. func main() {
  28. gen.Init()
  29. // Read the CLDR zip file.
  30. r := gen.OpenCLDRCoreZip()
  31. defer r.Close()
  32. d := &cldr.Decoder{}
  33. data, err := d.DecodeZip(r)
  34. if err != nil {
  35. log.Fatalf("DecodeZip: %v", err)
  36. }
  37. w := gen.NewCodeWriter()
  38. defer func() {
  39. buf := &bytes.Buffer{}
  40. if _, err = w.WriteGo(buf, "language"); err != nil {
  41. log.Fatalf("Error formatting file index.go: %v", err)
  42. }
  43. // Since we're generating a table for our own package we need to rewrite
  44. // doing the equivalent of go fmt -r 'language.b -> b'. Using
  45. // bytes.Replace will do.
  46. out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
  47. if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
  48. log.Fatalf("Could not create file index.go: %v", err)
  49. }
  50. }()
  51. m := map[language.Tag]bool{}
  52. for _, lang := range data.Locales() {
  53. // We include all locales unconditionally to be consistent with en_US.
  54. // We want en_US, even though it has no data associated with it.
  55. // TODO: put any of the languages for which no data exists at the end
  56. // of the index. This allows all components based on ICU to use that
  57. // as the cutoff point.
  58. // if x := data.RawLDML(lang); false ||
  59. // x.LocaleDisplayNames != nil ||
  60. // x.Characters != nil ||
  61. // x.Delimiters != nil ||
  62. // x.Measurement != nil ||
  63. // x.Dates != nil ||
  64. // x.Numbers != nil ||
  65. // x.Units != nil ||
  66. // x.ListPatterns != nil ||
  67. // x.Collations != nil ||
  68. // x.Segmentations != nil ||
  69. // x.Rbnf != nil ||
  70. // x.Annotations != nil ||
  71. // x.Metadata != nil {
  72. // TODO: support POSIX natively, albeit non-standard.
  73. tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
  74. m[tag] = true
  75. // }
  76. }
  77. // Include locales for plural rules, which uses a different structure.
  78. for _, plurals := range data.Supplemental().Plurals {
  79. for _, rules := range plurals.PluralRules {
  80. for _, lang := range strings.Split(rules.Locales, " ") {
  81. m[language.Make(lang)] = true
  82. }
  83. }
  84. }
  85. var core, special []language.Tag
  86. for t := range m {
  87. if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
  88. log.Fatalf("Unexpected extension %v in %v", x, t)
  89. }
  90. if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
  91. core = append(core, t)
  92. } else {
  93. special = append(special, t)
  94. }
  95. }
  96. w.WriteComment(`
  97. NumCompactTags is the number of common tags. The maximum tag is
  98. NumCompactTags-1.`)
  99. w.WriteConst("NumCompactTags", len(core)+len(special))
  100. sort.Sort(byAlpha(special))
  101. w.WriteVar("specialTags", special)
  102. // TODO: order by frequency?
  103. sort.Sort(byAlpha(core))
  104. // Size computations are just an estimate.
  105. w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
  106. w.Size += len(core) * 6 // size of uint32 and uint16
  107. fmt.Fprintln(w)
  108. fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
  109. fmt.Fprintln(w, "0x0: 0, // und")
  110. i := len(special) + 1 // Und and special tags already written.
  111. for _, t := range core {
  112. if t == language.Und {
  113. continue
  114. }
  115. fmt.Fprint(w.Hash, t, i)
  116. b, s, r := t.Raw()
  117. fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
  118. getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
  119. getIndex(s, 2),
  120. getIndex(r, 3),
  121. i, t)
  122. i++
  123. }
  124. fmt.Fprintln(w, "}")
  125. }
  126. // getIndex prints the subtag type and extracts its index of size nibble.
  127. // If the index is less than n nibbles, the result is prefixed with 0s.
  128. func getIndex(x interface{}, n int) string {
  129. s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
  130. s = s[strings.Index(s, "0x")+2 : len(s)-1]
  131. return strings.Repeat("0", n-len(s)) + s
  132. }
  133. type byAlpha []language.Tag
  134. func (a byAlpha) Len() int { return len(a) }
  135. func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  136. func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }