blockenc.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "errors"
  7. "fmt"
  8. "math"
  9. "math/bits"
  10. "slices"
  11. "github.com/klauspost/compress/huff0"
  12. )
  13. type blockEnc struct {
  14. size int
  15. literals []byte
  16. sequences []seq
  17. coders seqCoders
  18. litEnc *huff0.Scratch
  19. dictLitEnc *huff0.Scratch
  20. wr bitWriter
  21. extraLits int
  22. output []byte
  23. recentOffsets [3]uint32
  24. prevRecentOffsets [3]uint32
  25. last bool
  26. lowMem bool
  27. }
  28. // init should be used once the block has been created.
  29. // If called more than once, the effect is the same as calling reset.
  30. func (b *blockEnc) init() {
  31. if b.lowMem {
  32. // 1K literals
  33. if cap(b.literals) < 1<<10 {
  34. b.literals = make([]byte, 0, 1<<10)
  35. }
  36. const defSeqs = 20
  37. if cap(b.sequences) < defSeqs {
  38. b.sequences = make([]seq, 0, defSeqs)
  39. }
  40. // 1K
  41. if cap(b.output) < 1<<10 {
  42. b.output = make([]byte, 0, 1<<10)
  43. }
  44. } else {
  45. if cap(b.literals) < maxCompressedBlockSize {
  46. b.literals = make([]byte, 0, maxCompressedBlockSize)
  47. }
  48. const defSeqs = 2000
  49. if cap(b.sequences) < defSeqs {
  50. b.sequences = make([]seq, 0, defSeqs)
  51. }
  52. if cap(b.output) < maxCompressedBlockSize {
  53. b.output = make([]byte, 0, maxCompressedBlockSize)
  54. }
  55. }
  56. if b.coders.mlEnc == nil {
  57. b.coders.mlEnc = &fseEncoder{}
  58. b.coders.mlPrev = &fseEncoder{}
  59. b.coders.ofEnc = &fseEncoder{}
  60. b.coders.ofPrev = &fseEncoder{}
  61. b.coders.llEnc = &fseEncoder{}
  62. b.coders.llPrev = &fseEncoder{}
  63. }
  64. b.litEnc = &huff0.Scratch{WantLogLess: 4}
  65. b.reset(nil)
  66. }
  67. // initNewEncode can be used to reset offsets and encoders to the initial state.
  68. func (b *blockEnc) initNewEncode() {
  69. b.recentOffsets = [3]uint32{1, 4, 8}
  70. b.litEnc.Reuse = huff0.ReusePolicyNone
  71. b.coders.setPrev(nil, nil, nil)
  72. }
  73. // reset will reset the block for a new encode, but in the same stream,
  74. // meaning that state will be carried over, but the block content is reset.
  75. // If a previous block is provided, the recent offsets are carried over.
  76. func (b *blockEnc) reset(prev *blockEnc) {
  77. b.extraLits = 0
  78. b.literals = b.literals[:0]
  79. b.size = 0
  80. b.sequences = b.sequences[:0]
  81. b.output = b.output[:0]
  82. b.last = false
  83. if prev != nil {
  84. b.recentOffsets = prev.prevRecentOffsets
  85. }
  86. b.dictLitEnc = nil
  87. }
  88. // reset will reset the block for a new encode, but in the same stream,
  89. // meaning that state will be carried over, but the block content is reset.
  90. // If a previous block is provided, the recent offsets are carried over.
  91. func (b *blockEnc) swapEncoders(prev *blockEnc) {
  92. b.coders.swap(&prev.coders)
  93. b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
  94. }
  95. // blockHeader contains the information for a block header.
  96. type blockHeader uint32
  97. // setLast sets the 'last' indicator on a block.
  98. func (h *blockHeader) setLast(b bool) {
  99. if b {
  100. *h = *h | 1
  101. } else {
  102. const mask = (1 << 24) - 2
  103. *h = *h & mask
  104. }
  105. }
  106. // setSize will store the compressed size of a block.
  107. func (h *blockHeader) setSize(v uint32) {
  108. const mask = 7
  109. *h = (*h)&mask | blockHeader(v<<3)
  110. }
  111. // setType sets the block type.
  112. func (h *blockHeader) setType(t blockType) {
  113. const mask = 1 | (((1 << 24) - 1) ^ 7)
  114. *h = (*h & mask) | blockHeader(t<<1)
  115. }
  116. // appendTo will append the block header to a slice.
  117. func (h blockHeader) appendTo(b []byte) []byte {
  118. return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
  119. }
  120. // String returns a string representation of the block.
  121. func (h blockHeader) String() string {
  122. return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
  123. }
  124. // literalsHeader contains literals header information.
  125. type literalsHeader uint64
  126. // setType can be used to set the type of literal block.
  127. func (h *literalsHeader) setType(t literalsBlockType) {
  128. const mask = math.MaxUint64 - 3
  129. *h = (*h & mask) | literalsHeader(t)
  130. }
  131. // setSize can be used to set a single size, for uncompressed and RLE content.
  132. func (h *literalsHeader) setSize(regenLen int) {
  133. inBits := bits.Len32(uint32(regenLen))
  134. // Only retain 2 bits
  135. const mask = 3
  136. lh := uint64(*h & mask)
  137. switch {
  138. case inBits < 5:
  139. lh |= (uint64(regenLen) << 3) | (1 << 60)
  140. if debugEncoder {
  141. got := int(lh>>3) & 0xff
  142. if got != regenLen {
  143. panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
  144. }
  145. }
  146. case inBits < 12:
  147. lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
  148. case inBits < 20:
  149. lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
  150. default:
  151. panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
  152. }
  153. *h = literalsHeader(lh)
  154. }
  155. // setSizes will set the size of a compressed literals section and the input length.
  156. func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
  157. compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
  158. // Only retain 2 bits
  159. const mask = 3
  160. lh := uint64(*h & mask)
  161. switch {
  162. case compBits <= 10 && inBits <= 10:
  163. if !single {
  164. lh |= 1 << 2
  165. }
  166. lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
  167. if debugEncoder {
  168. const mmask = (1 << 24) - 1
  169. n := (lh >> 4) & mmask
  170. if int(n&1023) != inLen {
  171. panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
  172. }
  173. if int(n>>10) != compLen {
  174. panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
  175. }
  176. }
  177. case compBits <= 14 && inBits <= 14:
  178. lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
  179. if single {
  180. panic("single stream used with more than 10 bits length.")
  181. }
  182. case compBits <= 18 && inBits <= 18:
  183. lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
  184. if single {
  185. panic("single stream used with more than 10 bits length.")
  186. }
  187. default:
  188. panic("internal error: block too big")
  189. }
  190. *h = literalsHeader(lh)
  191. }
  192. // appendTo will append the literals header to a byte slice.
  193. func (h literalsHeader) appendTo(b []byte) []byte {
  194. size := uint8(h >> 60)
  195. switch size {
  196. case 1:
  197. b = append(b, uint8(h))
  198. case 2:
  199. b = append(b, uint8(h), uint8(h>>8))
  200. case 3:
  201. b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
  202. case 4:
  203. b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
  204. case 5:
  205. b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
  206. default:
  207. panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
  208. }
  209. return b
  210. }
  211. // size returns the output size with currently set values.
  212. func (h literalsHeader) size() int {
  213. return int(h >> 60)
  214. }
  215. func (h literalsHeader) String() string {
  216. return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
  217. }
  218. // pushOffsets will push the recent offsets to the backup store.
  219. func (b *blockEnc) pushOffsets() {
  220. b.prevRecentOffsets = b.recentOffsets
  221. }
  222. // pushOffsets will push the recent offsets to the backup store.
  223. func (b *blockEnc) popOffsets() {
  224. b.recentOffsets = b.prevRecentOffsets
  225. }
  226. // matchOffset will adjust recent offsets and return the adjusted one,
  227. // if it matches a previous offset.
  228. func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
  229. // Check if offset is one of the recent offsets.
  230. // Adjusts the output offset accordingly.
  231. // Gives a tiny bit of compression, typically around 1%.
  232. if true {
  233. if lits > 0 {
  234. switch offset {
  235. case b.recentOffsets[0]:
  236. offset = 1
  237. case b.recentOffsets[1]:
  238. b.recentOffsets[1] = b.recentOffsets[0]
  239. b.recentOffsets[0] = offset
  240. offset = 2
  241. case b.recentOffsets[2]:
  242. b.recentOffsets[2] = b.recentOffsets[1]
  243. b.recentOffsets[1] = b.recentOffsets[0]
  244. b.recentOffsets[0] = offset
  245. offset = 3
  246. default:
  247. b.recentOffsets[2] = b.recentOffsets[1]
  248. b.recentOffsets[1] = b.recentOffsets[0]
  249. b.recentOffsets[0] = offset
  250. offset += 3
  251. }
  252. } else {
  253. switch offset {
  254. case b.recentOffsets[1]:
  255. b.recentOffsets[1] = b.recentOffsets[0]
  256. b.recentOffsets[0] = offset
  257. offset = 1
  258. case b.recentOffsets[2]:
  259. b.recentOffsets[2] = b.recentOffsets[1]
  260. b.recentOffsets[1] = b.recentOffsets[0]
  261. b.recentOffsets[0] = offset
  262. offset = 2
  263. case b.recentOffsets[0] - 1:
  264. b.recentOffsets[2] = b.recentOffsets[1]
  265. b.recentOffsets[1] = b.recentOffsets[0]
  266. b.recentOffsets[0] = offset
  267. offset = 3
  268. default:
  269. b.recentOffsets[2] = b.recentOffsets[1]
  270. b.recentOffsets[1] = b.recentOffsets[0]
  271. b.recentOffsets[0] = offset
  272. offset += 3
  273. }
  274. }
  275. } else {
  276. offset += 3
  277. }
  278. return offset
  279. }
  280. // encodeRaw can be used to set the output to a raw representation of supplied bytes.
  281. func (b *blockEnc) encodeRaw(a []byte) {
  282. var bh blockHeader
  283. bh.setLast(b.last)
  284. bh.setSize(uint32(len(a)))
  285. bh.setType(blockTypeRaw)
  286. b.output = bh.appendTo(b.output[:0])
  287. b.output = append(b.output, a...)
  288. if debugEncoder {
  289. println("Adding RAW block, length", len(a), "last:", b.last)
  290. }
  291. }
  292. // encodeRaw can be used to set the output to a raw representation of supplied bytes.
  293. func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
  294. var bh blockHeader
  295. bh.setLast(b.last)
  296. bh.setSize(uint32(len(src)))
  297. bh.setType(blockTypeRaw)
  298. dst = bh.appendTo(dst)
  299. dst = append(dst, src...)
  300. if debugEncoder {
  301. println("Adding RAW block, length", len(src), "last:", b.last)
  302. }
  303. return dst
  304. }
  305. // encodeLits can be used if the block is only litLen.
  306. func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
  307. var bh blockHeader
  308. bh.setLast(b.last)
  309. bh.setSize(uint32(len(lits)))
  310. // Don't compress extremely small blocks
  311. if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
  312. if debugEncoder {
  313. println("Adding RAW block, length", len(lits), "last:", b.last)
  314. }
  315. bh.setType(blockTypeRaw)
  316. b.output = bh.appendTo(b.output)
  317. b.output = append(b.output, lits...)
  318. return nil
  319. }
  320. var (
  321. out []byte
  322. reUsed, single bool
  323. err error
  324. )
  325. if b.dictLitEnc != nil {
  326. b.litEnc.TransferCTable(b.dictLitEnc)
  327. b.litEnc.Reuse = huff0.ReusePolicyAllow
  328. b.dictLitEnc = nil
  329. }
  330. if len(lits) >= 1024 {
  331. // Use 4 Streams.
  332. out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
  333. } else if len(lits) > 16 {
  334. // Use 1 stream
  335. single = true
  336. out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
  337. } else {
  338. err = huff0.ErrIncompressible
  339. }
  340. if err == nil && len(out)+5 > len(lits) {
  341. // If we are close, we may still be worse or equal to raw.
  342. var lh literalsHeader
  343. lh.setSizes(len(out), len(lits), single)
  344. if len(out)+lh.size() >= len(lits) {
  345. err = huff0.ErrIncompressible
  346. }
  347. }
  348. switch err {
  349. case huff0.ErrIncompressible:
  350. if debugEncoder {
  351. println("Adding RAW block, length", len(lits), "last:", b.last)
  352. }
  353. bh.setType(blockTypeRaw)
  354. b.output = bh.appendTo(b.output)
  355. b.output = append(b.output, lits...)
  356. return nil
  357. case huff0.ErrUseRLE:
  358. if debugEncoder {
  359. println("Adding RLE block, length", len(lits))
  360. }
  361. bh.setType(blockTypeRLE)
  362. b.output = bh.appendTo(b.output)
  363. b.output = append(b.output, lits[0])
  364. return nil
  365. case nil:
  366. default:
  367. return err
  368. }
  369. // Compressed...
  370. // Now, allow reuse
  371. b.litEnc.Reuse = huff0.ReusePolicyAllow
  372. bh.setType(blockTypeCompressed)
  373. var lh literalsHeader
  374. if reUsed {
  375. if debugEncoder {
  376. println("Reused tree, compressed to", len(out))
  377. }
  378. lh.setType(literalsBlockTreeless)
  379. } else {
  380. if debugEncoder {
  381. println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
  382. }
  383. lh.setType(literalsBlockCompressed)
  384. }
  385. // Set sizes
  386. lh.setSizes(len(out), len(lits), single)
  387. bh.setSize(uint32(len(out) + lh.size() + 1))
  388. // Write block headers.
  389. b.output = bh.appendTo(b.output)
  390. b.output = lh.appendTo(b.output)
  391. // Add compressed data.
  392. b.output = append(b.output, out...)
  393. // No sequences.
  394. b.output = append(b.output, 0)
  395. return nil
  396. }
  397. // encodeRLE will encode an RLE block.
  398. func (b *blockEnc) encodeRLE(val byte, length uint32) {
  399. var bh blockHeader
  400. bh.setLast(b.last)
  401. bh.setSize(length)
  402. bh.setType(blockTypeRLE)
  403. b.output = bh.appendTo(b.output)
  404. b.output = append(b.output, val)
  405. }
  406. // fuzzFseEncoder can be used to fuzz the FSE encoder.
  407. func fuzzFseEncoder(data []byte) int {
  408. if len(data) > maxSequences || len(data) < 2 {
  409. return 0
  410. }
  411. enc := fseEncoder{}
  412. hist := enc.Histogram()
  413. maxSym := uint8(0)
  414. for i, v := range data {
  415. v = v & 63
  416. data[i] = v
  417. hist[v]++
  418. if v > maxSym {
  419. maxSym = v
  420. }
  421. }
  422. if maxSym == 0 {
  423. // All 0
  424. return 0
  425. }
  426. cnt := int(slices.Max(hist[:maxSym]))
  427. if cnt == len(data) {
  428. // RLE
  429. return 0
  430. }
  431. enc.HistogramFinished(maxSym, cnt)
  432. err := enc.normalizeCount(len(data))
  433. if err != nil {
  434. return 0
  435. }
  436. _, err = enc.writeCount(nil)
  437. if err != nil {
  438. panic(err)
  439. }
  440. return 1
  441. }
  442. // encode will encode the block and append the output in b.output.
  443. // Previous offset codes must be pushed if more blocks are expected.
  444. func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
  445. if len(b.sequences) == 0 {
  446. return b.encodeLits(b.literals, rawAllLits)
  447. }
  448. if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 {
  449. // Check common RLE cases.
  450. seq := b.sequences[0]
  451. if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 {
  452. // Offset == 1 and 0 or 1 literals.
  453. b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen)
  454. return nil
  455. }
  456. }
  457. // We want some difference to at least account for the headers.
  458. saved := b.size - len(b.literals) - (b.size >> 6)
  459. if saved < 16 {
  460. if org == nil {
  461. return errIncompressible
  462. }
  463. b.popOffsets()
  464. return b.encodeLits(org, rawAllLits)
  465. }
  466. var bh blockHeader
  467. var lh literalsHeader
  468. bh.setLast(b.last)
  469. bh.setType(blockTypeCompressed)
  470. // Store offset of the block header. Needed when we know the size.
  471. bhOffset := len(b.output)
  472. b.output = bh.appendTo(b.output)
  473. var (
  474. out []byte
  475. reUsed, single bool
  476. err error
  477. )
  478. if b.dictLitEnc != nil {
  479. b.litEnc.TransferCTable(b.dictLitEnc)
  480. b.litEnc.Reuse = huff0.ReusePolicyAllow
  481. b.dictLitEnc = nil
  482. }
  483. if len(b.literals) >= 1024 && !raw {
  484. // Use 4 Streams.
  485. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
  486. } else if len(b.literals) > 16 && !raw {
  487. // Use 1 stream
  488. single = true
  489. out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
  490. } else {
  491. err = huff0.ErrIncompressible
  492. }
  493. if err == nil && len(out)+5 > len(b.literals) {
  494. // If we are close, we may still be worse or equal to raw.
  495. var lh literalsHeader
  496. lh.setSize(len(b.literals))
  497. szRaw := lh.size()
  498. lh.setSizes(len(out), len(b.literals), single)
  499. szComp := lh.size()
  500. if len(out)+szComp >= len(b.literals)+szRaw {
  501. err = huff0.ErrIncompressible
  502. }
  503. }
  504. switch err {
  505. case huff0.ErrIncompressible:
  506. lh.setType(literalsBlockRaw)
  507. lh.setSize(len(b.literals))
  508. b.output = lh.appendTo(b.output)
  509. b.output = append(b.output, b.literals...)
  510. if debugEncoder {
  511. println("Adding literals RAW, length", len(b.literals))
  512. }
  513. case huff0.ErrUseRLE:
  514. lh.setType(literalsBlockRLE)
  515. lh.setSize(len(b.literals))
  516. b.output = lh.appendTo(b.output)
  517. b.output = append(b.output, b.literals[0])
  518. if debugEncoder {
  519. println("Adding literals RLE")
  520. }
  521. case nil:
  522. // Compressed litLen...
  523. if reUsed {
  524. if debugEncoder {
  525. println("reused tree")
  526. }
  527. lh.setType(literalsBlockTreeless)
  528. } else {
  529. if debugEncoder {
  530. println("new tree, size:", len(b.litEnc.OutTable))
  531. }
  532. lh.setType(literalsBlockCompressed)
  533. if debugEncoder {
  534. _, _, err := huff0.ReadTable(out, nil)
  535. if err != nil {
  536. panic(err)
  537. }
  538. }
  539. }
  540. lh.setSizes(len(out), len(b.literals), single)
  541. if debugEncoder {
  542. printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
  543. println("Adding literal header:", lh)
  544. }
  545. b.output = lh.appendTo(b.output)
  546. b.output = append(b.output, out...)
  547. b.litEnc.Reuse = huff0.ReusePolicyAllow
  548. if debugEncoder {
  549. println("Adding literals compressed")
  550. }
  551. default:
  552. if debugEncoder {
  553. println("Adding literals ERROR:", err)
  554. }
  555. return err
  556. }
  557. // Sequence compression
  558. // Write the number of sequences
  559. switch {
  560. case len(b.sequences) < 128:
  561. b.output = append(b.output, uint8(len(b.sequences)))
  562. case len(b.sequences) < 0x7f00: // TODO: this could be wrong
  563. n := len(b.sequences)
  564. b.output = append(b.output, 128+uint8(n>>8), uint8(n))
  565. default:
  566. n := len(b.sequences) - 0x7f00
  567. b.output = append(b.output, 255, uint8(n), uint8(n>>8))
  568. }
  569. if debugEncoder {
  570. println("Encoding", len(b.sequences), "sequences")
  571. }
  572. b.genCodes()
  573. llEnc := b.coders.llEnc
  574. ofEnc := b.coders.ofEnc
  575. mlEnc := b.coders.mlEnc
  576. err = llEnc.normalizeCount(len(b.sequences))
  577. if err != nil {
  578. return err
  579. }
  580. err = ofEnc.normalizeCount(len(b.sequences))
  581. if err != nil {
  582. return err
  583. }
  584. err = mlEnc.normalizeCount(len(b.sequences))
  585. if err != nil {
  586. return err
  587. }
  588. // Choose the best compression mode for each type.
  589. // Will evaluate the new vs predefined and previous.
  590. chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
  591. // See if predefined/previous is better
  592. hist := cur.count[:cur.symbolLen]
  593. nSize := cur.approxSize(hist) + cur.maxHeaderSize()
  594. predefSize := preDef.approxSize(hist)
  595. prevSize := prev.approxSize(hist)
  596. // Add a small penalty for new encoders.
  597. // Don't bother with extremely small (<2 byte gains).
  598. nSize = nSize + (nSize+2*8*16)>>4
  599. switch {
  600. case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
  601. if debugEncoder {
  602. println("Using predefined", predefSize>>3, "<=", nSize>>3)
  603. }
  604. return preDef, compModePredefined
  605. case prevSize <= nSize:
  606. if debugEncoder {
  607. println("Using previous", prevSize>>3, "<=", nSize>>3)
  608. }
  609. return prev, compModeRepeat
  610. default:
  611. if debugEncoder {
  612. println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
  613. println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
  614. }
  615. return cur, compModeFSE
  616. }
  617. }
  618. // Write compression mode
  619. var mode uint8
  620. if llEnc.useRLE {
  621. mode |= uint8(compModeRLE) << 6
  622. llEnc.setRLE(b.sequences[0].llCode)
  623. if debugEncoder {
  624. println("llEnc.useRLE")
  625. }
  626. } else {
  627. var m seqCompMode
  628. llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
  629. mode |= uint8(m) << 6
  630. }
  631. if ofEnc.useRLE {
  632. mode |= uint8(compModeRLE) << 4
  633. ofEnc.setRLE(b.sequences[0].ofCode)
  634. if debugEncoder {
  635. println("ofEnc.useRLE")
  636. }
  637. } else {
  638. var m seqCompMode
  639. ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
  640. mode |= uint8(m) << 4
  641. }
  642. if mlEnc.useRLE {
  643. mode |= uint8(compModeRLE) << 2
  644. mlEnc.setRLE(b.sequences[0].mlCode)
  645. if debugEncoder {
  646. println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
  647. }
  648. } else {
  649. var m seqCompMode
  650. mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
  651. mode |= uint8(m) << 2
  652. }
  653. b.output = append(b.output, mode)
  654. if debugEncoder {
  655. printf("Compression modes: 0b%b", mode)
  656. }
  657. b.output, err = llEnc.writeCount(b.output)
  658. if err != nil {
  659. return err
  660. }
  661. start := len(b.output)
  662. b.output, err = ofEnc.writeCount(b.output)
  663. if err != nil {
  664. return err
  665. }
  666. if false {
  667. println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
  668. fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
  669. for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
  670. fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
  671. }
  672. }
  673. b.output, err = mlEnc.writeCount(b.output)
  674. if err != nil {
  675. return err
  676. }
  677. // Maybe in block?
  678. wr := &b.wr
  679. wr.reset(b.output)
  680. var ll, of, ml cState
  681. // Current sequence
  682. seq := len(b.sequences) - 1
  683. s := b.sequences[seq]
  684. llEnc.setBits(llBitsTable[:])
  685. mlEnc.setBits(mlBitsTable[:])
  686. ofEnc.setBits(nil)
  687. llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
  688. // We have 3 bounds checks here (and in the loop).
  689. // Since we are iterating backwards it is kinda hard to avoid.
  690. llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
  691. ll.init(wr, &llEnc.ct, llB)
  692. of.init(wr, &ofEnc.ct, ofB)
  693. wr.flush32()
  694. ml.init(wr, &mlEnc.ct, mlB)
  695. // Each of these lookups also generates a bounds check.
  696. wr.addBits32NC(s.litLen, llB.outBits)
  697. wr.addBits32NC(s.matchLen, mlB.outBits)
  698. wr.flush32()
  699. wr.addBits32NC(s.offset, ofB.outBits)
  700. if debugSequences {
  701. println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
  702. }
  703. seq--
  704. // Store sequences in reverse...
  705. for seq >= 0 {
  706. s = b.sequences[seq]
  707. ofB := ofTT[s.ofCode]
  708. wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
  709. //of.encode(ofB)
  710. nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
  711. dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
  712. wr.addBits16NC(of.state, uint8(nbBitsOut))
  713. of.state = of.stateTable[dstState]
  714. // Accumulate extra bits.
  715. outBits := ofB.outBits & 31
  716. extraBits := uint64(s.offset & bitMask32[outBits])
  717. extraBitsN := outBits
  718. mlB := mlTT[s.mlCode]
  719. //ml.encode(mlB)
  720. nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
  721. dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
  722. wr.addBits16NC(ml.state, uint8(nbBitsOut))
  723. ml.state = ml.stateTable[dstState]
  724. outBits = mlB.outBits & 31
  725. extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
  726. extraBitsN += outBits
  727. llB := llTT[s.llCode]
  728. //ll.encode(llB)
  729. nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
  730. dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
  731. wr.addBits16NC(ll.state, uint8(nbBitsOut))
  732. ll.state = ll.stateTable[dstState]
  733. outBits = llB.outBits & 31
  734. extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
  735. extraBitsN += outBits
  736. wr.flush32()
  737. wr.addBits64NC(extraBits, extraBitsN)
  738. if debugSequences {
  739. println("Encoded seq", seq, s)
  740. }
  741. seq--
  742. }
  743. ml.flush(mlEnc.actualTableLog)
  744. of.flush(ofEnc.actualTableLog)
  745. ll.flush(llEnc.actualTableLog)
  746. wr.close()
  747. b.output = wr.out
  748. // Maybe even add a bigger margin.
  749. if len(b.output)-3-bhOffset >= b.size {
  750. // Discard and encode as raw block.
  751. b.output = b.encodeRawTo(b.output[:bhOffset], org)
  752. b.popOffsets()
  753. b.litEnc.Reuse = huff0.ReusePolicyNone
  754. return nil
  755. }
  756. // Size is output minus block header.
  757. bh.setSize(uint32(len(b.output)-bhOffset) - 3)
  758. if debugEncoder {
  759. println("Rewriting block header", bh)
  760. }
  761. _ = bh.appendTo(b.output[bhOffset:bhOffset])
  762. b.coders.setPrev(llEnc, mlEnc, ofEnc)
  763. return nil
  764. }
  765. var errIncompressible = errors.New("incompressible")
  766. func (b *blockEnc) genCodes() {
  767. if len(b.sequences) == 0 {
  768. // nothing to do
  769. return
  770. }
  771. if len(b.sequences) > math.MaxUint16 {
  772. panic("can only encode up to 64K sequences")
  773. }
  774. // No bounds checks after here:
  775. llH := b.coders.llEnc.Histogram()
  776. ofH := b.coders.ofEnc.Histogram()
  777. mlH := b.coders.mlEnc.Histogram()
  778. for i := range llH {
  779. llH[i] = 0
  780. }
  781. for i := range ofH {
  782. ofH[i] = 0
  783. }
  784. for i := range mlH {
  785. mlH[i] = 0
  786. }
  787. var llMax, ofMax, mlMax uint8
  788. for i := range b.sequences {
  789. seq := &b.sequences[i]
  790. v := llCode(seq.litLen)
  791. seq.llCode = v
  792. llH[v]++
  793. if v > llMax {
  794. llMax = v
  795. }
  796. v = ofCode(seq.offset)
  797. seq.ofCode = v
  798. ofH[v]++
  799. if v > ofMax {
  800. ofMax = v
  801. }
  802. v = mlCode(seq.matchLen)
  803. seq.mlCode = v
  804. mlH[v]++
  805. if v > mlMax {
  806. mlMax = v
  807. if debugAsserts && mlMax > maxMatchLengthSymbol {
  808. panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
  809. }
  810. }
  811. }
  812. if debugAsserts && mlMax > maxMatchLengthSymbol {
  813. panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
  814. }
  815. if debugAsserts && ofMax > maxOffsetBits {
  816. panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
  817. }
  818. if debugAsserts && llMax > maxLiteralLengthSymbol {
  819. panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
  820. }
  821. b.coders.mlEnc.HistogramFinished(mlMax, int(slices.Max(mlH[:mlMax+1])))
  822. b.coders.ofEnc.HistogramFinished(ofMax, int(slices.Max(ofH[:ofMax+1])))
  823. b.coders.llEnc.HistogramFinished(llMax, int(slices.Max(llH[:llMax+1])))
  824. }