001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020/*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025package org.apache.commons.compress.compressors.bzip2;
026
027import java.io.IOException;
028import java.io.InputStream;
029import java.nio.ByteOrder;
030import java.util.Arrays;
031
032import org.apache.commons.compress.compressors.CompressorInputStream;
033import org.apache.commons.compress.utils.BitInputStream;
034import org.apache.commons.compress.utils.CloseShieldFilterInputStream;
035import org.apache.commons.compress.utils.InputStreamStatistics;
036
037/**
038 * An input stream that decompresses from the BZip2 format to be read as any other stream.
039 *
040 * @NotThreadSafe
041 */
042public class BZip2CompressorInputStream extends CompressorInputStream
043    implements BZip2Constants, InputStreamStatistics {
044
045    private static final class Data {
046
047        // (with blockSize 900k)
048        final boolean[] inUse = new boolean[256]; // 256 byte
049
050        final byte[] seqToUnseq = new byte[256]; // 256 byte
051        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
052        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
053
054        /**
055         * Freq table collected to save a pass over the data during
056         * decompression.
057         */
058        final int[] unzftab = new int[256]; // 1024 byte
059
060        final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
061        final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
062        final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
063        final int[] minLens = new int[N_GROUPS]; // 24 byte
064
065        final int[] cftab = new int[257]; // 1028 byte
066        final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
067        final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
068        // byte
069        final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
070        // ---------------
071        // 60798 byte
072
073        int[] tt; // 3600000 byte
074        final byte[] ll8; // 900000 byte
075
076        // ---------------
077        // 4560782 byte
078        // ===============
079
080        Data(final int blockSize100k) {
081            this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
082        }
083
084        /**
085         * Initializes the {@link #tt} array.
086         *
087         * This method is called when the required length of the array is known.
088         * I don't initialize it at construction time to avoid unnecessary
089         * memory allocation when compressing small files.
090         */
091        int[] initTT(final int length) {
092            int[] ttShadow = this.tt;
093
094            // tt.length should always be >= length, but theoretically
095            // it can happen, if the compressor mixed small and large
096            // blocks. Normally only the last block will be smaller
097            // than others.
098            if (ttShadow == null || ttShadow.length < length) {
099                this.tt = ttShadow = new int[length];
100            }
101
102            return ttShadow;
103        }
104
105    }
106
107    private static final int EOF = 0;
108
109    private static final int START_BLOCK_STATE = 1;
110
111    private static final int RAND_PART_A_STATE = 2;
112
113    private static final int RAND_PART_B_STATE = 3;
114
115    private static final int RAND_PART_C_STATE = 4;
116
117    private static final int NO_RAND_PART_A_STATE = 5;
118    private static final int NO_RAND_PART_B_STATE = 6;
119
120    private static final int NO_RAND_PART_C_STATE = 7;
121    private static boolean bsGetBit(final BitInputStream bin) throws IOException {
122        return bsR(bin, 1) != 0;
123    }
124    private static int bsGetInt(final BitInputStream bin) throws IOException {
125        return bsR(bin, 32);
126    }
127    private static char bsGetUByte(final BitInputStream bin) throws IOException {
128        return (char) bsR(bin, 8);
129    }
130    /**
131     * read bits from the input stream
132     * @param n the number of bits to read, must not exceed 32?
133     * @return the requested bits combined into an int
134     * @throws IOException
135     */
136    private static int bsR(final BitInputStream bin, final int n) throws IOException {
137        final long thech = bin.readBits(n);
138        if (thech < 0) {
139            throw new IOException("Unexpected end of stream");
140        }
141        return (int) thech;
142    }
143    private static void checkBounds(final int checkVal, final int limitExclusive, final String name)
144        throws IOException {
145        if (checkVal < 0) {
146            throw new IOException("Corrupted input, " + name + " value negative");
147        }
148        if (checkVal >= limitExclusive) {
149            throw new IOException("Corrupted input, " + name + " value too big");
150        }
151    }
152    /**
153     * Called by createHuffmanDecodingTables() exclusively.
154     */
155    private static void hbCreateDecodeTables(final int[] limit,
156                                             final int[] base, final int[] perm, final char[] length,
157                                             final int minLen, final int maxLen, final int alphaSize)
158        throws IOException {
159        for (int i = minLen, pp = 0; i <= maxLen; i++) {
160            for (int j = 0; j < alphaSize; j++) {
161                if (length[j] == i) {
162                    perm[pp++] = j;
163                }
164            }
165        }
166
167        for (int i = MAX_CODE_LEN; --i > 0;) {
168            base[i] = 0;
169            limit[i] = 0;
170        }
171
172        for (int i = 0; i < alphaSize; i++) {
173            final int l = length[i];
174            checkBounds(l, MAX_ALPHA_SIZE, "length");
175            base[l + 1]++;
176        }
177
178        for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
179            b += base[i];
180            base[i] = b;
181        }
182
183        for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
184            final int nb = base[i + 1];
185            vec += nb - b;
186            b = nb;
187            limit[i] = vec - 1;
188            vec <<= 1;
189        }
190
191        for (int i = minLen + 1; i <= maxLen; i++) {
192            base[i] = (limit[i - 1] + 1 << 1) - base[i];
193        }
194    }
195    /**
196     * Checks if the signature matches what is expected for a bzip2 file.
197     *
198     * @param signature
199     *            the bytes to check
200     * @param length
201     *            the number of bytes to check
202     * @return true, if this stream is a bzip2 compressed stream, false otherwise
203     *
204     * @since 1.1
205     */
206    public static boolean matches(final byte[] signature, final int length) {
207        return length >= 3 && signature[0] == 'B' &&
208                signature[1] == 'Z' && signature[2] == 'h';
209    }
210
211    /**
212     * Index of the last char in the block, so the block size == last + 1.
213     */
214    private int last;
215
216    /**
217     * Index in zptr[] of original string after sorting.
218     */
219    private int origPtr;
220    /**
221     * always: in the range 0 .. 9. The current block size is 100000 * this
222     * number.
223     */
224    private int blockSize100k;
225
226    // Variables used by setup* methods exclusively
227
228    private boolean blockRandomised;
229    private final CRC crc = new CRC();
230    private int nInUse;
231    private BitInputStream bin;
232    private final boolean decompressConcatenated;
233    private int currentState = START_BLOCK_STATE;
234    private int storedBlockCRC, storedCombinedCRC;
235    private int computedBlockCRC, computedCombinedCRC;
236    private int su_count;
237
238    private int su_ch2;
239
240    private int su_chPrev;
241
242    private int su_i2;
243
244    private int su_j2;
245
246    private int su_rNToGo;
247
248    private int su_rTPos;
249
250    private int su_tPos;
251
252    private char su_z;
253
254    /**
255     * All memory intensive stuff. This field is initialized by initBlock().
256     */
257    private BZip2CompressorInputStream.Data data;
258
259    /**
260     * Constructs a new BZip2CompressorInputStream which decompresses bytes
261     * read from the specified stream. This doesn't support decompressing
262     * concatenated .bz2 files.
263     *
264     * @param in the InputStream from which this object should be created
265     * @throws IOException
266     *             if the stream content is malformed or an I/O error occurs.
267     * @throws NullPointerException
268     *             if {@code in == null}
269     */
270    public BZip2CompressorInputStream(final InputStream in) throws IOException {
271        this(in, false);
272    }
273
274    /**
275     * Constructs a new BZip2CompressorInputStream which decompresses bytes
276     * read from the specified stream.
277     *
278     * @param in the InputStream from which this object should be created
279     * @param decompressConcatenated
280     *                     if true, decompress until the end of the input;
281     *                     if false, stop after the first .bz2 stream and
282     *                     leave the input position to point to the next
283     *                     byte after the .bz2 stream
284     *
285     * @throws IOException
286     *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
287     */
288    public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
289        this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in,
290            ByteOrder.BIG_ENDIAN);
291        this.decompressConcatenated = decompressConcatenated;
292
293        init(true);
294        initBlock();
295    }
296
297    @Override
298    public void close() throws IOException {
299        final BitInputStream inShadow = this.bin;
300        if (inShadow != null) {
301            try {
302                inShadow.close();
303            } finally {
304                this.data = null;
305                this.bin = null;
306            }
307        }
308    }
309
310    private boolean complete() throws IOException {
311        this.storedCombinedCRC = bsGetInt(bin);
312        this.currentState = EOF;
313        this.data = null;
314
315        if (this.storedCombinedCRC != this.computedCombinedCRC) {
316            throw new IOException("BZip2 CRC error");
317        }
318
319        // Look for the next .bz2 stream if decompressing
320        // concatenated files.
321        return !decompressConcatenated || !init(false);
322    }
323
324    /**
325     * Called by recvDecodingTables() exclusively.
326     */
327    private void createHuffmanDecodingTables(final int alphaSize,
328                                             final int nGroups) throws IOException {
329        final Data dataShadow = this.data;
330        final char[][] len = dataShadow.temp_charArray2d;
331        final int[] minLens = dataShadow.minLens;
332        final int[][] limit = dataShadow.limit;
333        final int[][] base = dataShadow.base;
334        final int[][] perm = dataShadow.perm;
335
336        for (int t = 0; t < nGroups; t++) {
337            int minLen = 32;
338            int maxLen = 0;
339            final char[] len_t = len[t];
340            for (int i = alphaSize; --i >= 0;) {
341                final char lent = len_t[i];
342                if (lent > maxLen) {
343                    maxLen = lent;
344                }
345                if (lent < minLen) {
346                    minLen = lent;
347                }
348            }
349            hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
350                                 maxLen, alphaSize);
351            minLens[t] = minLen;
352        }
353    }
354
355    private void endBlock() throws IOException {
356        this.computedBlockCRC = this.crc.getFinalCRC();
357
358        // A bad CRC is considered a fatal error.
359        if (this.storedBlockCRC != this.computedBlockCRC) {
360            // make next blocks readable without error
361            // (repair feature, not yet documented, not tested)
362            this.computedCombinedCRC = this.storedCombinedCRC << 1
363                | this.storedCombinedCRC >>> 31;
364            this.computedCombinedCRC ^= this.storedBlockCRC;
365
366            throw new IOException("BZip2 CRC error");
367        }
368
369        this.computedCombinedCRC = this.computedCombinedCRC << 1
370            | this.computedCombinedCRC >>> 31;
371        this.computedCombinedCRC ^= this.computedBlockCRC;
372    }
373
374    private void getAndMoveToFrontDecode() throws IOException {
375        final BitInputStream bin = this.bin;
376        this.origPtr = bsR(bin, 24);
377        recvDecodingTables();
378
379        final Data dataShadow = this.data;
380        final byte[] ll8 = dataShadow.ll8;
381        final int[] unzftab = dataShadow.unzftab;
382        final byte[] selector = dataShadow.selector;
383        final byte[] seqToUnseq = dataShadow.seqToUnseq;
384        final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
385        final int[] minLens = dataShadow.minLens;
386        final int[][] limit = dataShadow.limit;
387        final int[][] base = dataShadow.base;
388        final int[][] perm = dataShadow.perm;
389        final int limitLast = this.blockSize100k * 100000;
390
391        /*
392         * Setting up the unzftab entries here is not strictly necessary, but it
393         * does save having to do it later in a separate pass, and so saves a
394         * block's worth of cache misses.
395         */
396        for (int i = 256; --i >= 0;) {
397            yy[i] = (char) i;
398            unzftab[i] = 0;
399        }
400
401        int groupNo = 0;
402        int groupPos = G_SIZE - 1;
403        final int eob = this.nInUse + 1;
404        int nextSym = getAndMoveToFrontDecode0();
405        int lastShadow = -1;
406        int zt = selector[groupNo] & 0xff;
407        checkBounds(zt, N_GROUPS, "zt");
408        int[] base_zt = base[zt];
409        int[] limit_zt = limit[zt];
410        int[] perm_zt = perm[zt];
411        int minLens_zt = minLens[zt];
412
413        while (nextSym != eob) {
414            if (nextSym == RUNA || nextSym == RUNB) {
415                int s = -1;
416
417                for (int n = 1; true; n <<= 1) {
418                    if (nextSym == RUNA) {
419                        s += n;
420                    } else if (nextSym == RUNB) {
421                        s += n << 1;
422                    } else {
423                        break;
424                    }
425
426                    if (groupPos == 0) {
427                        groupPos = G_SIZE - 1;
428                        checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
429                        zt = selector[groupNo] & 0xff;
430                        checkBounds(zt, N_GROUPS, "zt");
431                        base_zt = base[zt];
432                        limit_zt = limit[zt];
433                        perm_zt = perm[zt];
434                        minLens_zt = minLens[zt];
435                    } else {
436                        groupPos--;
437                    }
438
439                    int zn = minLens_zt;
440                    checkBounds(zn, MAX_ALPHA_SIZE, "zn");
441                    int zvec = bsR(bin, zn);
442                    while(zvec > limit_zt[zn]) {
443                        checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
444                        zvec = zvec << 1 | bsR(bin, 1);
445                    }
446                    final int tmp = zvec - base_zt[zn];
447                    checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
448                    nextSym = perm_zt[tmp];
449                }
450                checkBounds(s, this.data.ll8.length, "s");
451
452                final int yy0 = yy[0];
453                checkBounds(yy0, 256, "yy");
454                final byte ch = seqToUnseq[yy0];
455                unzftab[ch & 0xff] += s + 1;
456
457                final int from = ++lastShadow;
458                lastShadow += s;
459                checkBounds(lastShadow, this.data.ll8.length, "lastShadow");
460                Arrays.fill(ll8, from, lastShadow + 1, ch);
461
462                if (lastShadow >= limitLast) {
463                    throw new IOException("Block overrun while expanding RLE in MTF, "
464                        + lastShadow + " exceeds " + limitLast);
465                }
466            } else {
467                if (++lastShadow >= limitLast) {
468                    throw new IOException("Block overrun in MTF, "
469                        + lastShadow + " exceeds " + limitLast);
470                }
471                checkBounds(nextSym, 256 + 1, "nextSym");
472
473                final char tmp = yy[nextSym - 1];
474                checkBounds(tmp, 256, "yy");
475                unzftab[seqToUnseq[tmp] & 0xff]++;
476                ll8[lastShadow] = seqToUnseq[tmp];
477
478                /*
479                 * This loop is hammered during decompression, hence avoid
480                 * native method call overhead of System.arraycopy for very
481                 * small ranges to copy.
482                 */
483                if (nextSym <= 16) {
484                    for (int j = nextSym - 1; j > 0;) {
485                        yy[j] = yy[--j];
486                    }
487                } else {
488                    System.arraycopy(yy, 0, yy, 1, nextSym - 1);
489                }
490
491                yy[0] = tmp;
492
493                if (groupPos == 0) {
494                    groupPos = G_SIZE - 1;
495                    checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
496                    zt = selector[groupNo] & 0xff;
497                    checkBounds(zt, N_GROUPS, "zt");
498                    base_zt = base[zt];
499                    limit_zt = limit[zt];
500                    perm_zt = perm[zt];
501                    minLens_zt = minLens[zt];
502                } else {
503                    groupPos--;
504                }
505
506                int zn = minLens_zt;
507                checkBounds(zn, MAX_ALPHA_SIZE, "zn");
508                int zvec = bsR(bin, zn);
509                while(zvec > limit_zt[zn]) {
510                    checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
511                    zvec = zvec << 1 | bsR(bin, 1);
512                }
513                final int idx = zvec - base_zt[zn];
514                checkBounds(idx, MAX_ALPHA_SIZE, "zvec");
515                nextSym = perm_zt[idx];
516            }
517        }
518
519        this.last = lastShadow;
520    }
521
522    private int getAndMoveToFrontDecode0() throws IOException {
523        final Data dataShadow = this.data;
524        final int zt = dataShadow.selector[0] & 0xff;
525        checkBounds(zt, N_GROUPS, "zt");
526        final int[] limit_zt = dataShadow.limit[zt];
527        int zn = dataShadow.minLens[zt];
528        checkBounds(zn, MAX_ALPHA_SIZE, "zn");
529        int zvec = bsR(bin, zn);
530        while (zvec > limit_zt[zn]) {
531            checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
532            zvec = zvec << 1 | bsR(bin, 1);
533        }
534        final int tmp = zvec - dataShadow.base[zt][zn];
535        checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
536
537        return dataShadow.perm[zt][tmp];
538    }
539
540    /**
541     * @since 1.17
542     */
543    @Override
544    public long getCompressedCount() {
545        return bin.getBytesRead();
546    }
547
548    private boolean init(final boolean isFirstStream) throws IOException {
549        if (null == bin) {
550            throw new IOException("No InputStream");
551        }
552
553        if (!isFirstStream) {
554            bin.clearBitCache();
555        }
556
557        final int magic0 = readNextByte(this.bin);
558        if (magic0 == -1 && !isFirstStream) {
559            return false;
560        }
561        final int magic1 = readNextByte(this.bin);
562        final int magic2 = readNextByte(this.bin);
563
564        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
565            throw new IOException(isFirstStream
566                    ? "Stream is not in the BZip2 format"
567                    : "Garbage after a valid BZip2 stream");
568        }
569
570        final int blockSize = readNextByte(this.bin);
571        if (blockSize < '1' || blockSize > '9') {
572            throw new IOException("BZip2 block size is invalid");
573        }
574
575        this.blockSize100k = blockSize - '0';
576
577        this.computedCombinedCRC = 0;
578
579        return true;
580    }
581
582    private void initBlock() throws IOException {
583        final BitInputStream bin = this.bin;
584        char magic0;
585        char magic1;
586        char magic2;
587        char magic3;
588        char magic4;
589        char magic5;
590
591        while (true) {
592            // Get the block magic bytes.
593            magic0 = bsGetUByte(bin);
594            magic1 = bsGetUByte(bin);
595            magic2 = bsGetUByte(bin);
596            magic3 = bsGetUByte(bin);
597            magic4 = bsGetUByte(bin);
598            magic5 = bsGetUByte(bin);
599
600            // If isn't end of stream magic, break out of the loop.
601            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
602                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
603                break;
604            }
605
606            // End of stream was reached. Check the combined CRC and
607            // advance to the next .bz2 stream if decoding concatenated
608            // streams.
609            if (complete()) {
610                return;
611            }
612        }
613
614        if (magic0 != 0x31 || // '1'
615            magic1 != 0x41 || // ')'
616            magic2 != 0x59 || // 'Y'
617            magic3 != 0x26 || // '&'
618            magic4 != 0x53 || // 'S'
619            magic5 != 0x59 // 'Y'
620            ) {
621            this.currentState = EOF;
622            throw new IOException("Bad block header");
623        }
624        this.storedBlockCRC = bsGetInt(bin);
625        this.blockRandomised = bsR(bin, 1) == 1;
626
627        /*
628         * Allocate data here instead in constructor, so we do not allocate
629         * it if the input file is empty.
630         */
631        if (this.data == null) {
632            this.data = new Data(this.blockSize100k);
633        }
634
635        // currBlockNo++;
636        getAndMoveToFrontDecode();
637
638        this.crc.initializeCRC();
639        this.currentState = START_BLOCK_STATE;
640    }
641
642    private void makeMaps() {
643        final boolean[] inUse = this.data.inUse;
644        final byte[] seqToUnseq = this.data.seqToUnseq;
645
646        int nInUseShadow = 0;
647
648        for (int i = 0; i < 256; i++) {
649            if (inUse[i]) {
650                seqToUnseq[nInUseShadow++] = (byte) i;
651            }
652        }
653
654        this.nInUse = nInUseShadow;
655    }
656
657    @Override
658    public int read() throws IOException {
659        if (this.bin != null) {
660            final int r = read0();
661            count(r < 0 ? -1 : 1);
662            return r;
663        }
664        throw new IOException("Stream closed");
665    }
666
667    /*
668     * (non-Javadoc)
669     *
670     * @see java.io.InputStream#read(byte[], int, int)
671     */
672    @Override
673    public int read(final byte[] dest, final int offs, final int len)
674        throws IOException {
675        if (offs < 0) {
676            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
677        }
678        if (len < 0) {
679            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
680        }
681        if (offs + len > dest.length) {
682            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
683                                                + len + ") > dest.length(" + dest.length + ").");
684        }
685        if (this.bin == null) {
686            throw new IOException("Stream closed");
687        }
688        if (len == 0) {
689            return 0;
690        }
691
692        final int hi = offs + len;
693        int destOffs = offs;
694        int b;
695        while (destOffs < hi && (b = read0()) >= 0) {
696            dest[destOffs++] = (byte) b;
697            count(1);
698        }
699
700        return destOffs == offs ? -1 : destOffs - offs;
701    }
702
703    private int read0() throws IOException {
704        switch (currentState) {
705        case EOF:
706            return -1;
707
708        case START_BLOCK_STATE:
709            return setupBlock();
710
711        case RAND_PART_A_STATE:
712            throw new IllegalStateException();
713
714        case RAND_PART_B_STATE:
715            return setupRandPartB();
716
717        case RAND_PART_C_STATE:
718            return setupRandPartC();
719
720        case NO_RAND_PART_A_STATE:
721            throw new IllegalStateException();
722
723        case NO_RAND_PART_B_STATE:
724            return setupNoRandPartB();
725
726        case NO_RAND_PART_C_STATE:
727            return setupNoRandPartC();
728
729        default:
730            throw new IllegalStateException();
731        }
732    }
733
734    private int readNextByte(final BitInputStream in) throws IOException {
735        final long b = in.readBits(8);
736        return (int) b;
737    }
738
739    private void recvDecodingTables() throws IOException {
740        final BitInputStream bin = this.bin;
741        final Data dataShadow = this.data;
742        final boolean[] inUse = dataShadow.inUse;
743        final byte[] pos = dataShadow.recvDecodingTables_pos;
744        final byte[] selector = dataShadow.selector;
745        final byte[] selectorMtf = dataShadow.selectorMtf;
746
747        int inUse16 = 0;
748
749        /* Receive the mapping table */
750        for (int i = 0; i < 16; i++) {
751            if (bsGetBit(bin)) {
752                inUse16 |= 1 << i;
753            }
754        }
755
756        Arrays.fill(inUse, false);
757        for (int i = 0; i < 16; i++) {
758            if ((inUse16 & 1 << i) != 0) {
759                final int i16 = i << 4;
760                for (int j = 0; j < 16; j++) {
761                    if (bsGetBit(bin)) {
762                        inUse[i16 + j] = true;
763                    }
764                }
765            }
766        }
767
768        makeMaps();
769        final int alphaSize = this.nInUse + 2;
770        /* Now the selectors */
771        final int nGroups = bsR(bin, 3);
772        final int selectors = bsR(bin, 15);
773        if (selectors < 0) {
774            throw new IOException("Corrupted input, nSelectors value negative");
775        }
776        checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize");
777        checkBounds(nGroups, N_GROUPS + 1, "nGroups");
778
779        // Don't fail on nSelectors overflowing boundaries but discard the values in overflow
780        // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/
781        // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html
782
783        for (int i = 0; i < selectors; i++) {
784            int j = 0;
785            while (bsGetBit(bin)) {
786                j++;
787            }
788            if (i < MAX_SELECTORS) {
789                selectorMtf[i] = (byte) j;
790            }
791        }
792        final int nSelectors = Math.min(selectors, MAX_SELECTORS);
793
794        /* Undo the MTF values for the selectors. */
795        for (int v = nGroups; --v >= 0;) {
796            pos[v] = (byte) v;
797        }
798
799        for (int i = 0; i < nSelectors; i++) {
800            int v = selectorMtf[i] & 0xff;
801            checkBounds(v, N_GROUPS, "selectorMtf");
802            final byte tmp = pos[v];
803            while (v > 0) {
804                // nearly all times v is zero, 4 in most other cases
805                pos[v] = pos[v - 1];
806                v--;
807            }
808            pos[0] = tmp;
809            selector[i] = tmp;
810        }
811
812        final char[][] len = dataShadow.temp_charArray2d;
813
814        /* Now the coding tables */
815        for (int t = 0; t < nGroups; t++) {
816            int curr = bsR(bin, 5);
817            final char[] len_t = len[t];
818            for (int i = 0; i < alphaSize; i++) {
819                while (bsGetBit(bin)) {
820                    curr += bsGetBit(bin) ? -1 : 1;
821                }
822                len_t[i] = (char) curr;
823            }
824        }
825
826        // finally create the Huffman tables
827        createHuffmanDecodingTables(alphaSize, nGroups);
828    }
829
830    private int setupBlock() throws IOException {
831        if (currentState == EOF || this.data == null) {
832            return -1;
833        }
834
835        final int[] cftab = this.data.cftab;
836        final int ttLen = this.last + 1;
837        final int[] tt = this.data.initTT(ttLen);
838        final byte[] ll8 = this.data.ll8;
839        cftab[0] = 0;
840        System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
841
842        for (int i = 1, c = cftab[0]; i <= 256; i++) {
843            c += cftab[i];
844            cftab[i] = c;
845        }
846
847        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
848            final int tmp = cftab[ll8[i] & 0xff]++;
849            checkBounds(tmp, ttLen, "tt index");
850            tt[tmp] = i;
851        }
852
853        if (this.origPtr < 0 || this.origPtr >= tt.length) {
854            throw new IOException("Stream corrupted");
855        }
856
857        this.su_tPos = tt[this.origPtr];
858        this.su_count = 0;
859        this.su_i2 = 0;
860        this.su_ch2 = 256; /* not a char and not EOF */
861
862        if (this.blockRandomised) {
863            this.su_rNToGo = 0;
864            this.su_rTPos = 0;
865            return setupRandPartA();
866        }
867        return setupNoRandPartA();
868    }
869
870    private int setupNoRandPartA() throws IOException {
871        if (this.su_i2 <= this.last) {
872            this.su_chPrev = this.su_ch2;
873            final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
874            this.su_ch2 = su_ch2Shadow;
875            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
876            this.su_tPos = this.data.tt[this.su_tPos];
877            this.su_i2++;
878            this.currentState = NO_RAND_PART_B_STATE;
879            this.crc.updateCRC(su_ch2Shadow);
880            return su_ch2Shadow;
881        }
882        this.currentState = NO_RAND_PART_A_STATE;
883        endBlock();
884        initBlock();
885        return setupBlock();
886    }
887
888    private int setupNoRandPartB() throws IOException {
889        if (this.su_ch2 != this.su_chPrev) {
890            this.su_count = 1;
891            return setupNoRandPartA();
892        }
893        if (++this.su_count >= 4) {
894            checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos");
895            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
896            this.su_tPos = this.data.tt[this.su_tPos];
897            this.su_j2 = 0;
898            return setupNoRandPartC();
899        }
900        return setupNoRandPartA();
901    }
902
903    private int setupNoRandPartC() throws IOException {
904        if (this.su_j2 < this.su_z) {
905            final int su_ch2Shadow = this.su_ch2;
906            this.crc.updateCRC(su_ch2Shadow);
907            this.su_j2++;
908            this.currentState = NO_RAND_PART_C_STATE;
909            return su_ch2Shadow;
910        }
911        this.su_i2++;
912        this.su_count = 0;
913        return setupNoRandPartA();
914    }
915
916    private int setupRandPartA() throws IOException {
917        if (this.su_i2 <= this.last) {
918            this.su_chPrev = this.su_ch2;
919            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
920            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
921            this.su_tPos = this.data.tt[this.su_tPos];
922            if (this.su_rNToGo == 0) {
923                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
924                if (++this.su_rTPos == 512) {
925                    this.su_rTPos = 0;
926                }
927            } else {
928                this.su_rNToGo--;
929            }
930            this.su_ch2 = su_ch2Shadow ^= this.su_rNToGo == 1 ? 1 : 0;
931            this.su_i2++;
932            this.currentState = RAND_PART_B_STATE;
933            this.crc.updateCRC(su_ch2Shadow);
934            return su_ch2Shadow;
935        }
936        endBlock();
937        initBlock();
938        return setupBlock();
939    }
940
941    private int setupRandPartB() throws IOException {
942        if (this.su_ch2 != this.su_chPrev) {
943            this.currentState = RAND_PART_A_STATE;
944            this.su_count = 1;
945            return setupRandPartA();
946        }
947        if (++this.su_count < 4) {
948            this.currentState = RAND_PART_A_STATE;
949            return setupRandPartA();
950        }
951        this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
952        checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
953        this.su_tPos = this.data.tt[this.su_tPos];
954        if (this.su_rNToGo == 0) {
955            this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
956            if (++this.su_rTPos == 512) {
957                this.su_rTPos = 0;
958            }
959        } else {
960            this.su_rNToGo--;
961        }
962        this.su_j2 = 0;
963        this.currentState = RAND_PART_C_STATE;
964        if (this.su_rNToGo == 1) {
965            this.su_z ^= 1;
966        }
967        return setupRandPartC();
968    }
969
970    private int setupRandPartC() throws IOException {
971        if (this.su_j2 < this.su_z) {
972            this.crc.updateCRC(this.su_ch2);
973            this.su_j2++;
974            return this.su_ch2;
975        }
976        this.currentState = RAND_PART_A_STATE;
977        this.su_i2++;
978        this.su_count = 0;
979        return setupRandPartA();
980    }
981}