snappy.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Modified for deflate by Klaus Post (c) 2015.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package flate
  6. // emitLiteral writes a literal chunk and returns the number of bytes written.
  7. func emitLiteral(dst *tokens, lit []byte) {
  8. ol := int(dst.n)
  9. for i, v := range lit {
  10. dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
  11. }
  12. dst.n += uint16(len(lit))
  13. }
  14. // emitCopy writes a copy chunk and returns the number of bytes written.
  15. func emitCopy(dst *tokens, offset, length int) {
  16. dst.tokens[dst.n] = matchToken(uint32(length-3), uint32(offset-minOffsetSize))
  17. dst.n++
  18. }
  19. type snappyEnc interface {
  20. Encode(dst *tokens, src []byte)
  21. Reset()
  22. }
  23. func newSnappy(level int) snappyEnc {
  24. switch level {
  25. case 1:
  26. return &snappyL1{}
  27. case 2:
  28. return &snappyL2{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
  29. case 3:
  30. return &snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
  31. case 4:
  32. return &snappyL4{snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}}
  33. default:
  34. panic("invalid level specified")
  35. }
  36. }
  37. const (
  38. tableBits = 14 // Bits used in the table
  39. tableSize = 1 << tableBits // Size of the table
  40. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  41. tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
  42. baseMatchOffset = 1 // The smallest match offset
  43. baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
  44. maxMatchOffset = 1 << 15 // The largest match offset
  45. )
  46. func load32(b []byte, i int) uint32 {
  47. b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  48. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
  49. }
  50. func load64(b []byte, i int) uint64 {
  51. b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  52. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
  53. uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
  54. }
  55. func hash(u uint32) uint32 {
  56. return (u * 0x1e35a7bd) >> tableShift
  57. }
  58. // snappyL1 encapsulates level 1 compression
  59. type snappyL1 struct{}
  60. func (e *snappyL1) Reset() {}
  61. func (e *snappyL1) Encode(dst *tokens, src []byte) {
  62. const (
  63. inputMargin = 16 - 1
  64. minNonLiteralBlockSize = 1 + 1 + inputMargin
  65. )
  66. // This check isn't in the Snappy implementation, but there, the caller
  67. // instead of the callee handles this case.
  68. if len(src) < minNonLiteralBlockSize {
  69. // We do not fill the token table.
  70. // This will be picked up by caller.
  71. dst.n = uint16(len(src))
  72. return
  73. }
  74. // Initialize the hash table.
  75. //
  76. // The table element type is uint16, as s < sLimit and sLimit < len(src)
  77. // and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
  78. var table [tableSize]uint16
  79. // sLimit is when to stop looking for offset/length copies. The inputMargin
  80. // lets us use a fast path for emitLiteral in the main loop, while we are
  81. // looking for copies.
  82. sLimit := len(src) - inputMargin
  83. // nextEmit is where in src the next emitLiteral should start from.
  84. nextEmit := 0
  85. // The encoded form must start with a literal, as there are no previous
  86. // bytes to copy, so we start looking for hash matches at s == 1.
  87. s := 1
  88. nextHash := hash(load32(src, s))
  89. for {
  90. // Copied from the C++ snappy implementation:
  91. //
  92. // Heuristic match skipping: If 32 bytes are scanned with no matches
  93. // found, start looking only at every other byte. If 32 more bytes are
  94. // scanned (or skipped), look at every third byte, etc.. When a match
  95. // is found, immediately go back to looking at every byte. This is a
  96. // small loss (~5% performance, ~0.1% density) for compressible data
  97. // due to more bookkeeping, but for non-compressible data (such as
  98. // JPEG) it's a huge win since the compressor quickly "realizes" the
  99. // data is incompressible and doesn't bother looking for matches
  100. // everywhere.
  101. //
  102. // The "skip" variable keeps track of how many bytes there are since
  103. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  104. // the number of bytes to move ahead for each iteration.
  105. skip := 32
  106. nextS := s
  107. candidate := 0
  108. for {
  109. s = nextS
  110. bytesBetweenHashLookups := skip >> 5
  111. nextS = s + bytesBetweenHashLookups
  112. skip += bytesBetweenHashLookups
  113. if nextS > sLimit {
  114. goto emitRemainder
  115. }
  116. candidate = int(table[nextHash&tableMask])
  117. table[nextHash&tableMask] = uint16(s)
  118. nextHash = hash(load32(src, nextS))
  119. if s-candidate <= maxMatchOffset && load32(src, s) == load32(src, candidate) {
  120. break
  121. }
  122. }
  123. // A 4-byte match has been found. We'll later see if more than 4 bytes
  124. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  125. // them as literal bytes.
  126. emitLiteral(dst, src[nextEmit:s])
  127. // Call emitCopy, and then see if another emitCopy could be our next
  128. // move. Repeat until we find no match for the input immediately after
  129. // what was consumed by the last emitCopy call.
  130. //
  131. // If we exit this loop normally then we need to call emitLiteral next,
  132. // though we don't yet know how big the literal will be. We handle that
  133. // by proceeding to the next iteration of the main loop. We also can
  134. // exit this loop via goto if we get close to exhausting the input.
  135. for {
  136. // Invariant: we have a 4-byte match at s, and no need to emit any
  137. // literal bytes prior to s.
  138. base := s
  139. // Extend the 4-byte match as long as possible.
  140. //
  141. // This is an inlined version of Snappy's:
  142. // s = extendMatch(src, candidate+4, s+4)
  143. s += 4
  144. s1 := base + maxMatchLength
  145. if s1 > len(src) {
  146. s1 = len(src)
  147. }
  148. a := src[s:s1]
  149. b := src[candidate+4:]
  150. b = b[:len(a)]
  151. l := len(a)
  152. for i := range a {
  153. if a[i] != b[i] {
  154. l = i
  155. break
  156. }
  157. }
  158. s += l
  159. // matchToken is flate's equivalent of Snappy's emitCopy.
  160. dst.tokens[dst.n] = matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset))
  161. dst.n++
  162. nextEmit = s
  163. if s >= sLimit {
  164. goto emitRemainder
  165. }
  166. // We could immediately start working at s now, but to improve
  167. // compression we first update the hash table at s-1 and at s. If
  168. // another emitCopy is not our next move, also calculate nextHash
  169. // at s+1. At least on GOARCH=amd64, these three hash calculations
  170. // are faster as one load64 call (with some shifts) instead of
  171. // three load32 calls.
  172. x := load64(src, s-1)
  173. prevHash := hash(uint32(x >> 0))
  174. table[prevHash&tableMask] = uint16(s - 1)
  175. currHash := hash(uint32(x >> 8))
  176. candidate = int(table[currHash&tableMask])
  177. table[currHash&tableMask] = uint16(s)
  178. if s-candidate > maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
  179. nextHash = hash(uint32(x >> 16))
  180. s++
  181. break
  182. }
  183. }
  184. }
  185. emitRemainder:
  186. if nextEmit < len(src) {
  187. emitLiteral(dst, src[nextEmit:])
  188. }
  189. }
  190. type tableEntry struct {
  191. val uint32
  192. offset int32
  193. }
  194. func load3232(b []byte, i int32) uint32 {
  195. b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  196. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
  197. }
  198. func load6432(b []byte, i int32) uint64 {
  199. b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  200. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
  201. uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
  202. }
  203. // snappyGen maintains the table for matches,
  204. // and the previous byte block for level 2.
  205. // This is the generic implementation.
  206. type snappyGen struct {
  207. prev []byte
  208. cur int32
  209. }
  210. // snappyGen maintains the table for matches,
  211. // and the previous byte block for level 2.
  212. // This is the generic implementation.
  213. type snappyL2 struct {
  214. snappyGen
  215. table [tableSize]tableEntry
  216. }
  217. // EncodeL2 uses a similar algorithm to level 1, but is capable
  218. // of matching across blocks giving better compression at a small slowdown.
  219. func (e *snappyL2) Encode(dst *tokens, src []byte) {
  220. const (
  221. inputMargin = 8 - 1
  222. minNonLiteralBlockSize = 1 + 1 + inputMargin
  223. )
  224. // Protect against e.cur wraparound.
  225. if e.cur > 1<<30 {
  226. for i := range e.table {
  227. e.table[i] = tableEntry{}
  228. }
  229. e.cur = maxStoreBlockSize
  230. }
  231. // This check isn't in the Snappy implementation, but there, the caller
  232. // instead of the callee handles this case.
  233. if len(src) < minNonLiteralBlockSize {
  234. // We do not fill the token table.
  235. // This will be picked up by caller.
  236. dst.n = uint16(len(src))
  237. e.cur += maxStoreBlockSize
  238. e.prev = e.prev[:0]
  239. return
  240. }
  241. // sLimit is when to stop looking for offset/length copies. The inputMargin
  242. // lets us use a fast path for emitLiteral in the main loop, while we are
  243. // looking for copies.
  244. sLimit := int32(len(src) - inputMargin)
  245. // nextEmit is where in src the next emitLiteral should start from.
  246. nextEmit := int32(0)
  247. s := int32(0)
  248. cv := load3232(src, s)
  249. nextHash := hash(cv)
  250. for {
  251. // Copied from the C++ snappy implementation:
  252. //
  253. // Heuristic match skipping: If 32 bytes are scanned with no matches
  254. // found, start looking only at every other byte. If 32 more bytes are
  255. // scanned (or skipped), look at every third byte, etc.. When a match
  256. // is found, immediately go back to looking at every byte. This is a
  257. // small loss (~5% performance, ~0.1% density) for compressible data
  258. // due to more bookkeeping, but for non-compressible data (such as
  259. // JPEG) it's a huge win since the compressor quickly "realizes" the
  260. // data is incompressible and doesn't bother looking for matches
  261. // everywhere.
  262. //
  263. // The "skip" variable keeps track of how many bytes there are since
  264. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  265. // the number of bytes to move ahead for each iteration.
  266. skip := int32(32)
  267. nextS := s
  268. var candidate tableEntry
  269. for {
  270. s = nextS
  271. bytesBetweenHashLookups := skip >> 5
  272. nextS = s + bytesBetweenHashLookups
  273. skip += bytesBetweenHashLookups
  274. if nextS > sLimit {
  275. goto emitRemainder
  276. }
  277. candidate = e.table[nextHash&tableMask]
  278. now := load3232(src, nextS)
  279. e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
  280. nextHash = hash(now)
  281. offset := s - (candidate.offset - e.cur)
  282. if offset > maxMatchOffset || cv != candidate.val {
  283. // Out of range or not matched.
  284. cv = now
  285. continue
  286. }
  287. break
  288. }
  289. // A 4-byte match has been found. We'll later see if more than 4 bytes
  290. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  291. // them as literal bytes.
  292. emitLiteral(dst, src[nextEmit:s])
  293. // Call emitCopy, and then see if another emitCopy could be our next
  294. // move. Repeat until we find no match for the input immediately after
  295. // what was consumed by the last emitCopy call.
  296. //
  297. // If we exit this loop normally then we need to call emitLiteral next,
  298. // though we don't yet know how big the literal will be. We handle that
  299. // by proceeding to the next iteration of the main loop. We also can
  300. // exit this loop via goto if we get close to exhausting the input.
  301. for {
  302. // Invariant: we have a 4-byte match at s, and no need to emit any
  303. // literal bytes prior to s.
  304. // Extend the 4-byte match as long as possible.
  305. //
  306. s += 4
  307. t := candidate.offset - e.cur + 4
  308. l := e.matchlen(s, t, src)
  309. // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
  310. dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
  311. dst.n++
  312. s += l
  313. nextEmit = s
  314. if s >= sLimit {
  315. t += l
  316. // Index first pair after match end.
  317. if int(t+4) < len(src) && t > 0 {
  318. cv := load3232(src, t)
  319. e.table[hash(cv)&tableMask] = tableEntry{offset: t + e.cur, val: cv}
  320. }
  321. goto emitRemainder
  322. }
  323. // We could immediately start working at s now, but to improve
  324. // compression we first update the hash table at s-1 and at s. If
  325. // another emitCopy is not our next move, also calculate nextHash
  326. // at s+1. At least on GOARCH=amd64, these three hash calculations
  327. // are faster as one load64 call (with some shifts) instead of
  328. // three load32 calls.
  329. x := load6432(src, s-1)
  330. prevHash := hash(uint32(x))
  331. e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
  332. x >>= 8
  333. currHash := hash(uint32(x))
  334. candidate = e.table[currHash&tableMask]
  335. e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
  336. offset := s - (candidate.offset - e.cur)
  337. if offset > maxMatchOffset || uint32(x) != candidate.val {
  338. cv = uint32(x >> 8)
  339. nextHash = hash(cv)
  340. s++
  341. break
  342. }
  343. }
  344. }
  345. emitRemainder:
  346. if int(nextEmit) < len(src) {
  347. emitLiteral(dst, src[nextEmit:])
  348. }
  349. e.cur += int32(len(src))
  350. e.prev = e.prev[:len(src)]
  351. copy(e.prev, src)
  352. }
  353. type tableEntryPrev struct {
  354. Cur tableEntry
  355. Prev tableEntry
  356. }
  357. // snappyL3
  358. type snappyL3 struct {
  359. snappyGen
  360. table [tableSize]tableEntryPrev
  361. }
  362. // Encode uses a similar algorithm to level 2, will check up to two candidates.
  363. func (e *snappyL3) Encode(dst *tokens, src []byte) {
  364. const (
  365. inputMargin = 8 - 1
  366. minNonLiteralBlockSize = 1 + 1 + inputMargin
  367. )
  368. // Protect against e.cur wraparound.
  369. if e.cur > 1<<30 {
  370. for i := range e.table {
  371. e.table[i] = tableEntryPrev{}
  372. }
  373. e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]}
  374. }
  375. // This check isn't in the Snappy implementation, but there, the caller
  376. // instead of the callee handles this case.
  377. if len(src) < minNonLiteralBlockSize {
  378. // We do not fill the token table.
  379. // This will be picked up by caller.
  380. dst.n = uint16(len(src))
  381. e.cur += maxStoreBlockSize
  382. e.prev = e.prev[:0]
  383. return
  384. }
  385. // sLimit is when to stop looking for offset/length copies. The inputMargin
  386. // lets us use a fast path for emitLiteral in the main loop, while we are
  387. // looking for copies.
  388. sLimit := int32(len(src) - inputMargin)
  389. // nextEmit is where in src the next emitLiteral should start from.
  390. nextEmit := int32(0)
  391. s := int32(0)
  392. cv := load3232(src, s)
  393. nextHash := hash(cv)
  394. for {
  395. // Copied from the C++ snappy implementation:
  396. //
  397. // Heuristic match skipping: If 32 bytes are scanned with no matches
  398. // found, start looking only at every other byte. If 32 more bytes are
  399. // scanned (or skipped), look at every third byte, etc.. When a match
  400. // is found, immediately go back to looking at every byte. This is a
  401. // small loss (~5% performance, ~0.1% density) for compressible data
  402. // due to more bookkeeping, but for non-compressible data (such as
  403. // JPEG) it's a huge win since the compressor quickly "realizes" the
  404. // data is incompressible and doesn't bother looking for matches
  405. // everywhere.
  406. //
  407. // The "skip" variable keeps track of how many bytes there are since
  408. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  409. // the number of bytes to move ahead for each iteration.
  410. skip := int32(32)
  411. nextS := s
  412. var candidate tableEntry
  413. for {
  414. s = nextS
  415. bytesBetweenHashLookups := skip >> 5
  416. nextS = s + bytesBetweenHashLookups
  417. skip += bytesBetweenHashLookups
  418. if nextS > sLimit {
  419. goto emitRemainder
  420. }
  421. candidates := e.table[nextHash&tableMask]
  422. now := load3232(src, nextS)
  423. e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
  424. nextHash = hash(now)
  425. // Check both candidates
  426. candidate = candidates.Cur
  427. if cv == candidate.val {
  428. offset := s - (candidate.offset - e.cur)
  429. if offset <= maxMatchOffset {
  430. break
  431. }
  432. } else {
  433. // We only check if value mismatches.
  434. // Offset will always be invalid in other cases.
  435. candidate = candidates.Prev
  436. if cv == candidate.val {
  437. offset := s - (candidate.offset - e.cur)
  438. if offset <= maxMatchOffset {
  439. break
  440. }
  441. }
  442. }
  443. cv = now
  444. }
  445. // A 4-byte match has been found. We'll later see if more than 4 bytes
  446. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  447. // them as literal bytes.
  448. emitLiteral(dst, src[nextEmit:s])
  449. // Call emitCopy, and then see if another emitCopy could be our next
  450. // move. Repeat until we find no match for the input immediately after
  451. // what was consumed by the last emitCopy call.
  452. //
  453. // If we exit this loop normally then we need to call emitLiteral next,
  454. // though we don't yet know how big the literal will be. We handle that
  455. // by proceeding to the next iteration of the main loop. We also can
  456. // exit this loop via goto if we get close to exhausting the input.
  457. for {
  458. // Invariant: we have a 4-byte match at s, and no need to emit any
  459. // literal bytes prior to s.
  460. // Extend the 4-byte match as long as possible.
  461. //
  462. s += 4
  463. t := candidate.offset - e.cur + 4
  464. l := e.matchlen(s, t, src)
  465. // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
  466. dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
  467. dst.n++
  468. s += l
  469. nextEmit = s
  470. if s >= sLimit {
  471. t += l
  472. // Index first pair after match end.
  473. if int(t+4) < len(src) && t > 0 {
  474. cv := load3232(src, t)
  475. nextHash = hash(cv)
  476. e.table[nextHash&tableMask] = tableEntryPrev{
  477. Prev: e.table[nextHash&tableMask].Cur,
  478. Cur: tableEntry{offset: e.cur + t, val: cv},
  479. }
  480. }
  481. goto emitRemainder
  482. }
  483. // We could immediately start working at s now, but to improve
  484. // compression we first update the hash table at s-3 to s. If
  485. // another emitCopy is not our next move, also calculate nextHash
  486. // at s+1. At least on GOARCH=amd64, these three hash calculations
  487. // are faster as one load64 call (with some shifts) instead of
  488. // three load32 calls.
  489. x := load6432(src, s-3)
  490. prevHash := hash(uint32(x))
  491. e.table[prevHash&tableMask] = tableEntryPrev{
  492. Prev: e.table[prevHash&tableMask].Cur,
  493. Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
  494. }
  495. x >>= 8
  496. prevHash = hash(uint32(x))
  497. e.table[prevHash&tableMask] = tableEntryPrev{
  498. Prev: e.table[prevHash&tableMask].Cur,
  499. Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
  500. }
  501. x >>= 8
  502. prevHash = hash(uint32(x))
  503. e.table[prevHash&tableMask] = tableEntryPrev{
  504. Prev: e.table[prevHash&tableMask].Cur,
  505. Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
  506. }
  507. x >>= 8
  508. currHash := hash(uint32(x))
  509. candidates := e.table[currHash&tableMask]
  510. cv = uint32(x)
  511. e.table[currHash&tableMask] = tableEntryPrev{
  512. Prev: candidates.Cur,
  513. Cur: tableEntry{offset: s + e.cur, val: cv},
  514. }
  515. // Check both candidates
  516. candidate = candidates.Cur
  517. if cv == candidate.val {
  518. offset := s - (candidate.offset - e.cur)
  519. if offset <= maxMatchOffset {
  520. continue
  521. }
  522. } else {
  523. // We only check if value mismatches.
  524. // Offset will always be invalid in other cases.
  525. candidate = candidates.Prev
  526. if cv == candidate.val {
  527. offset := s - (candidate.offset - e.cur)
  528. if offset <= maxMatchOffset {
  529. continue
  530. }
  531. }
  532. }
  533. cv = uint32(x >> 8)
  534. nextHash = hash(cv)
  535. s++
  536. break
  537. }
  538. }
  539. emitRemainder:
  540. if int(nextEmit) < len(src) {
  541. emitLiteral(dst, src[nextEmit:])
  542. }
  543. e.cur += int32(len(src))
  544. e.prev = e.prev[:len(src)]
  545. copy(e.prev, src)
  546. }
  547. // snappyL4
  548. type snappyL4 struct {
  549. snappyL3
  550. }
  551. // Encode uses a similar algorithm to level 3,
  552. // but will check up to two candidates if first isn't long enough.
  553. func (e *snappyL4) Encode(dst *tokens, src []byte) {
  554. const (
  555. inputMargin = 8 - 3
  556. minNonLiteralBlockSize = 1 + 1 + inputMargin
  557. matchLenGood = 12
  558. )
  559. // Protect against e.cur wraparound.
  560. if e.cur > 1<<30 {
  561. for i := range e.table {
  562. e.table[i] = tableEntryPrev{}
  563. }
  564. e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]}
  565. }
  566. // This check isn't in the Snappy implementation, but there, the caller
  567. // instead of the callee handles this case.
  568. if len(src) < minNonLiteralBlockSize {
  569. // We do not fill the token table.
  570. // This will be picked up by caller.
  571. dst.n = uint16(len(src))
  572. e.cur += maxStoreBlockSize
  573. e.prev = e.prev[:0]
  574. return
  575. }
  576. // sLimit is when to stop looking for offset/length copies. The inputMargin
  577. // lets us use a fast path for emitLiteral in the main loop, while we are
  578. // looking for copies.
  579. sLimit := int32(len(src) - inputMargin)
  580. // nextEmit is where in src the next emitLiteral should start from.
  581. nextEmit := int32(0)
  582. s := int32(0)
  583. cv := load3232(src, s)
  584. nextHash := hash(cv)
  585. for {
  586. // Copied from the C++ snappy implementation:
  587. //
  588. // Heuristic match skipping: If 32 bytes are scanned with no matches
  589. // found, start looking only at every other byte. If 32 more bytes are
  590. // scanned (or skipped), look at every third byte, etc.. When a match
  591. // is found, immediately go back to looking at every byte. This is a
  592. // small loss (~5% performance, ~0.1% density) for compressible data
  593. // due to more bookkeeping, but for non-compressible data (such as
  594. // JPEG) it's a huge win since the compressor quickly "realizes" the
  595. // data is incompressible and doesn't bother looking for matches
  596. // everywhere.
  597. //
  598. // The "skip" variable keeps track of how many bytes there are since
  599. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  600. // the number of bytes to move ahead for each iteration.
  601. skip := int32(32)
  602. nextS := s
  603. var candidate tableEntry
  604. var candidateAlt tableEntry
  605. for {
  606. s = nextS
  607. bytesBetweenHashLookups := skip >> 5
  608. nextS = s + bytesBetweenHashLookups
  609. skip += bytesBetweenHashLookups
  610. if nextS > sLimit {
  611. goto emitRemainder
  612. }
  613. candidates := e.table[nextHash&tableMask]
  614. now := load3232(src, nextS)
  615. e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
  616. nextHash = hash(now)
  617. // Check both candidates
  618. candidate = candidates.Cur
  619. if cv == candidate.val {
  620. offset := s - (candidate.offset - e.cur)
  621. if offset < maxMatchOffset {
  622. offset = s - (candidates.Prev.offset - e.cur)
  623. if cv == candidates.Prev.val && offset < maxMatchOffset {
  624. candidateAlt = candidates.Prev
  625. }
  626. break
  627. }
  628. } else {
  629. // We only check if value mismatches.
  630. // Offset will always be invalid in other cases.
  631. candidate = candidates.Prev
  632. if cv == candidate.val {
  633. offset := s - (candidate.offset - e.cur)
  634. if offset < maxMatchOffset {
  635. break
  636. }
  637. }
  638. }
  639. cv = now
  640. }
  641. // A 4-byte match has been found. We'll later see if more than 4 bytes
  642. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  643. // them as literal bytes.
  644. emitLiteral(dst, src[nextEmit:s])
  645. // Call emitCopy, and then see if another emitCopy could be our next
  646. // move. Repeat until we find no match for the input immediately after
  647. // what was consumed by the last emitCopy call.
  648. //
  649. // If we exit this loop normally then we need to call emitLiteral next,
  650. // though we don't yet know how big the literal will be. We handle that
  651. // by proceeding to the next iteration of the main loop. We also can
  652. // exit this loop via goto if we get close to exhausting the input.
  653. for {
  654. // Invariant: we have a 4-byte match at s, and no need to emit any
  655. // literal bytes prior to s.
  656. // Extend the 4-byte match as long as possible.
  657. //
  658. s += 4
  659. t := candidate.offset - e.cur + 4
  660. l := e.matchlen(s, t, src)
  661. // Try alternative candidate if match length < matchLenGood.
  662. if l < matchLenGood-4 && candidateAlt.offset != 0 {
  663. t2 := candidateAlt.offset - e.cur + 4
  664. l2 := e.matchlen(s, t2, src)
  665. if l2 > l {
  666. l = l2
  667. t = t2
  668. }
  669. }
  670. // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
  671. dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
  672. dst.n++
  673. s += l
  674. nextEmit = s
  675. if s >= sLimit {
  676. t += l
  677. // Index first pair after match end.
  678. if int(t+4) < len(src) && t > 0 {
  679. cv := load3232(src, t)
  680. nextHash = hash(cv)
  681. e.table[nextHash&tableMask] = tableEntryPrev{
  682. Prev: e.table[nextHash&tableMask].Cur,
  683. Cur: tableEntry{offset: e.cur + t, val: cv},
  684. }
  685. }
  686. goto emitRemainder
  687. }
  688. // We could immediately start working at s now, but to improve
  689. // compression we first update the hash table at s-3 to s. If
  690. // another emitCopy is not our next move, also calculate nextHash
  691. // at s+1. At least on GOARCH=amd64, these three hash calculations
  692. // are faster as one load64 call (with some shifts) instead of
  693. // three load32 calls.
  694. x := load6432(src, s-3)
  695. prevHash := hash(uint32(x))
  696. e.table[prevHash&tableMask] = tableEntryPrev{
  697. Prev: e.table[prevHash&tableMask].Cur,
  698. Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
  699. }
  700. x >>= 8
  701. prevHash = hash(uint32(x))
  702. e.table[prevHash&tableMask] = tableEntryPrev{
  703. Prev: e.table[prevHash&tableMask].Cur,
  704. Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
  705. }
  706. x >>= 8
  707. prevHash = hash(uint32(x))
  708. e.table[prevHash&tableMask] = tableEntryPrev{
  709. Prev: e.table[prevHash&tableMask].Cur,
  710. Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
  711. }
  712. x >>= 8
  713. currHash := hash(uint32(x))
  714. candidates := e.table[currHash&tableMask]
  715. cv = uint32(x)
  716. e.table[currHash&tableMask] = tableEntryPrev{
  717. Prev: candidates.Cur,
  718. Cur: tableEntry{offset: s + e.cur, val: cv},
  719. }
  720. // Check both candidates
  721. candidate = candidates.Cur
  722. candidateAlt = tableEntry{}
  723. if cv == candidate.val {
  724. offset := s - (candidate.offset - e.cur)
  725. if offset <= maxMatchOffset {
  726. offset = s - (candidates.Prev.offset - e.cur)
  727. if cv == candidates.Prev.val && offset <= maxMatchOffset {
  728. candidateAlt = candidates.Prev
  729. }
  730. continue
  731. }
  732. } else {
  733. // We only check if value mismatches.
  734. // Offset will always be invalid in other cases.
  735. candidate = candidates.Prev
  736. if cv == candidate.val {
  737. offset := s - (candidate.offset - e.cur)
  738. if offset <= maxMatchOffset {
  739. continue
  740. }
  741. }
  742. }
  743. cv = uint32(x >> 8)
  744. nextHash = hash(cv)
  745. s++
  746. break
  747. }
  748. }
  749. emitRemainder:
  750. if int(nextEmit) < len(src) {
  751. emitLiteral(dst, src[nextEmit:])
  752. }
  753. e.cur += int32(len(src))
  754. e.prev = e.prev[:len(src)]
  755. copy(e.prev, src)
  756. }
  757. func (e *snappyGen) matchlen(s, t int32, src []byte) int32 {
  758. s1 := int(s) + maxMatchLength - 4
  759. if s1 > len(src) {
  760. s1 = len(src)
  761. }
  762. // If we are inside the current block
  763. if t >= 0 {
  764. b := src[t:]
  765. a := src[s:s1]
  766. b = b[:len(a)]
  767. // Extend the match to be as long as possible.
  768. for i := range a {
  769. if a[i] != b[i] {
  770. return int32(i)
  771. }
  772. }
  773. return int32(len(a))
  774. }
  775. // We found a match in the previous block.
  776. tp := int32(len(e.prev)) + t
  777. if tp < 0 {
  778. return 0
  779. }
  780. // Extend the match to be as long as possible.
  781. a := src[s:s1]
  782. b := e.prev[tp:]
  783. if len(b) > len(a) {
  784. b = b[:len(a)]
  785. }
  786. a = a[:len(b)]
  787. for i := range b {
  788. if a[i] != b[i] {
  789. return int32(i)
  790. }
  791. }
  792. // If we reached our limit, we matched everything we are
  793. // allowed to in the previous block and we return.
  794. n := int32(len(b))
  795. if int(s+n) == s1 {
  796. return n
  797. }
  798. // Continue looking for more matches in the current block.
  799. a = src[s+n : s1]
  800. b = src[:len(a)]
  801. for i := range a {
  802. if a[i] != b[i] {
  803. return int32(i) + n
  804. }
  805. }
  806. return int32(len(a)) + n
  807. }
  808. // Reset the encoding table.
  809. func (e *snappyGen) Reset() {
  810. e.prev = e.prev[:0]
  811. e.cur += maxMatchOffset
  812. }