123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- package encoding
- import (
- "errors"
- "io"
- "strconv"
- "unicode/utf8"
- "golang.org/x/text/encoding/internal/identifier"
- "golang.org/x/text/transform"
- )
- type Encoding interface {
-
- NewDecoder() *Decoder
-
- NewEncoder() *Encoder
- }
- type Decoder struct {
- transform.Transformer
-
-
-
- _ struct{}
- }
- func (d *Decoder) Bytes(b []byte) ([]byte, error) {
- b, _, err := transform.Bytes(d, b)
- if err != nil {
- return nil, err
- }
- return b, nil
- }
- func (d *Decoder) String(s string) (string, error) {
- s, _, err := transform.String(d, s)
- if err != nil {
- return "", err
- }
- return s, nil
- }
- func (d *Decoder) Reader(r io.Reader) io.Reader {
- return transform.NewReader(r, d)
- }
- type Encoder struct {
- transform.Transformer
-
-
-
- _ struct{}
- }
- func (e *Encoder) Bytes(b []byte) ([]byte, error) {
- b, _, err := transform.Bytes(e, b)
- if err != nil {
- return nil, err
- }
- return b, nil
- }
- func (e *Encoder) String(s string) (string, error) {
- s, _, err := transform.String(e, s)
- if err != nil {
- return "", err
- }
- return s, nil
- }
- func (e *Encoder) Writer(w io.Writer) io.Writer {
- return transform.NewWriter(w, e)
- }
- const ASCIISub = '\x1a'
- var Nop Encoding = nop{}
- type nop struct{}
- func (nop) NewDecoder() *Decoder {
- return &Decoder{Transformer: transform.Nop}
- }
- func (nop) NewEncoder() *Encoder {
- return &Encoder{Transformer: transform.Nop}
- }
- var Replacement Encoding = replacement{}
- type replacement struct{}
- func (replacement) NewDecoder() *Decoder {
- return &Decoder{Transformer: replacementDecoder{}}
- }
- func (replacement) NewEncoder() *Encoder {
- return &Encoder{Transformer: replacementEncoder{}}
- }
- func (replacement) ID() (mib identifier.MIB, other string) {
- return identifier.Replacement, ""
- }
- type replacementDecoder struct{ transform.NopResetter }
- func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- if len(dst) < 3 {
- return 0, 0, transform.ErrShortDst
- }
- if atEOF {
- const fffd = "\ufffd"
- dst[0] = fffd[0]
- dst[1] = fffd[1]
- dst[2] = fffd[2]
- nDst = 3
- }
- return nDst, len(src), nil
- }
- type replacementEncoder struct{ transform.NopResetter }
- func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- r, size := rune(0), 0
- for ; nSrc < len(src); nSrc += size {
- r = rune(src[nSrc])
-
- if r < utf8.RuneSelf {
- size = 1
- } else {
-
- r, size = utf8.DecodeRune(src[nSrc:])
- if size == 1 {
-
-
-
- if !atEOF && !utf8.FullRune(src[nSrc:]) {
- err = transform.ErrShortSrc
- break
- }
- r = '\ufffd'
- }
- }
- if nDst+utf8.RuneLen(r) > len(dst) {
- err = transform.ErrShortDst
- break
- }
- nDst += utf8.EncodeRune(dst[nDst:], r)
- }
- return nDst, nSrc, err
- }
- func HTMLEscapeUnsupported(e *Encoder) *Encoder {
- return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
- }
- func ReplaceUnsupported(e *Encoder) *Encoder {
- return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
- }
- type errorHandler struct {
- *Encoder
- handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
- }
- type repertoireError interface {
- Replacement() byte
- }
- func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
- for err != nil {
- rerr, ok := err.(repertoireError)
- if !ok {
- return nDst, nSrc, err
- }
- r, sz := utf8.DecodeRune(src[nSrc:])
- n, ok := h.handler(dst[nDst:], r, rerr)
- if !ok {
- return nDst, nSrc, transform.ErrShortDst
- }
- err = nil
- nDst += n
- if nSrc += sz; nSrc < len(src) {
- var dn, sn int
- dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
- nDst += dn
- nSrc += sn
- }
- }
- return nDst, nSrc, err
- }
- func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
- buf := [8]byte{}
- b := strconv.AppendUint(buf[:0], uint64(r), 10)
- if n = len(b) + len("&#;"); n >= len(dst) {
- return 0, false
- }
- dst[0] = '&'
- dst[1] = '#'
- dst[copy(dst[2:], b)+2] = ';'
- return n, true
- }
- func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
- if len(dst) == 0 {
- return 0, false
- }
- dst[0] = err.Replacement()
- return 1, true
- }
- var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
- var UTF8Validator transform.Transformer = utf8Validator{}
- type utf8Validator struct{ transform.NopResetter }
- func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- n := len(src)
- if n > len(dst) {
- n = len(dst)
- }
- for i := 0; i < n; {
- if c := src[i]; c < utf8.RuneSelf {
- dst[i] = c
- i++
- continue
- }
- _, size := utf8.DecodeRune(src[i:])
- if size == 1 {
-
-
-
- err = ErrInvalidUTF8
- if !atEOF && !utf8.FullRune(src[i:]) {
- err = transform.ErrShortSrc
- }
- return i, i, err
- }
- if i+size > len(dst) {
- return i, i, transform.ErrShortDst
- }
- for ; size > 0; size-- {
- dst[i] = src[i]
- i++
- }
- }
- if len(src) > len(dst) {
- err = transform.ErrShortDst
- }
- return n, n, err
- }
|