001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.pack200;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.stream.IntStream;
027
028/**
029 * Abstract superclass for a set of bands
030 */
031public abstract class BandSet {
032
033    /**
034     * Results obtained by trying different Codecs to encode a band
035     */
036    public class BandAnalysisResults {
037
038        // The number of Codecs tried so far
039        private int numCodecsTried;
040
041        // The number of bytes saved by using betterCodec instead of the default codec
042        private int saved;
043
044        // Extra metadata to pass to the segment header (to be appended to the
045        // band_headers band)
046        private int[] extraMetadata;
047
048        // The results of encoding the band with betterCodec
049        private byte[] encodedBand;
050
051        // The best Codec found so far, or should be null if the default is the
052        // best so far
053        private Codec betterCodec;
054
055    }
056    /**
057     * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate
058     * whether an alternative Codec could make the encoded band smaller.
059     */
060    public class BandData {
061
062        private final int[] band;
063        private int smallest = Integer.MAX_VALUE;
064        private int largest = Integer.MIN_VALUE;
065        private int smallestDelta;
066        private int largestDelta;
067
068        private int deltaIsAscending;
069        private int smallDeltaCount;
070
071        private double averageAbsoluteDelta;
072        private double averageAbsoluteValue;
073
074        private Map<Integer, Integer> distinctValues;
075
076        /**
077         * Create a new instance of BandData. The band is then analysed.
078         *
079         * @param band - the band of integers
080         */
081        public BandData(final int[] band) {
082            this.band = band;
083            final Integer one = Integer.valueOf(1);
084            for (int i = 0; i < band.length; i++) {
085                if (band[i] < smallest) {
086                    smallest = band[i];
087                }
088                if (band[i] > largest) {
089                    largest = band[i];
090                }
091                if (i != 0) {
092                    final int delta = band[i] - band[i - 1];
093                    if (delta < smallestDelta) {
094                        smallestDelta = delta;
095                    }
096                    if (delta > largestDelta) {
097                        largestDelta = delta;
098                    }
099                    if (delta >= 0) {
100                        deltaIsAscending++;
101                    }
102                    averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1);
103                    if (Math.abs(delta) < 256) {
104                        smallDeltaCount++;
105                    }
106                } else {
107                    smallestDelta = band[0];
108                    largestDelta = band[0];
109                }
110                averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length;
111                if (effort > 3) { // do calculations needed to consider population codec
112                    if (distinctValues == null) {
113                        distinctValues = new HashMap<>();
114                    }
115                    final Integer value = Integer.valueOf(band[i]);
116                    Integer count = distinctValues.get(value);
117                    if (count == null) {
118                        count = one;
119                    } else {
120                        count = Integer.valueOf(count.intValue() + 1);
121                    }
122                    distinctValues.put(value, count);
123                }
124            }
125        }
126
127        /**
128         * Returns true if any band elements are negative.
129         *
130         * @return true if any band elements are negative.
131         */
132        public boolean anyNegatives() {
133            return smallest < 0;
134        }
135
136        /**
137         * Returns true if the band deltas are mainly positive (heuristic).
138         *
139         * @return true if the band deltas are mainly positive (heuristic).
140         */
141        public boolean mainlyPositiveDeltas() {
142            // Note: the value below has been tuned - please test carefully if changing it
143            return (float) deltaIsAscending / (float) band.length > 0.95F;
144        }
145
146        /**
147         * Returns true if the deltas between adjacent band elements are mainly small (heuristic).
148         *
149         * @return true if the deltas between adjacent band elements are mainly small (heuristic).
150         */
151        public boolean mainlySmallDeltas() {
152            // Note: the value below has been tuned - please test carefully if changing it
153            return (float) smallDeltaCount / (float) band.length > 0.7F;
154        }
155
156        /**
157         * Returns the total number of distinct values found in the band.
158         *
159         * @return the total number of distinct values found in the band.
160         */
161        public int numDistinctValues() {
162            if (distinctValues == null) {
163                return band.length;
164            }
165            return distinctValues.size();
166        }
167
168        /**
169         * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic).
170         *
171         * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic).
172         */
173        public boolean wellCorrelated() {
174            // Note: the value below has been tuned - please test carefully if changing it
175            return averageAbsoluteDelta * 3.1 < averageAbsoluteValue;
176        }
177
178    }
179
180    // Minimum size of band for each effort level where we consider alternative codecs
181    // Note: these values have been tuned - please test carefully if changing them
182    private static final int[] effortThresholds = {0, 0, 1000, 500, 100, 100, 100, 100, 100, 0};
183
184    protected final SegmentHeader segmentHeader;
185    final int effort;
186
187    private long[] canonicalLargest;
188
189    private long[] canonicalSmallest;
190
191    /**
192     * Create a new BandSet
193     *
194     * @param effort - the packing effort to be used (must be 1-9)
195     * @param header - the segment header
196     */
197    public BandSet(final int effort, final SegmentHeader header) {
198        this.effort = effort;
199        this.segmentHeader = header;
200    }
201
202    private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec)
203        throws Pack200Exception {
204
205        final BandAnalysisResults results = new BandAnalysisResults();
206
207        if (canonicalLargest == null) {
208            canonicalLargest = new long[116];
209            canonicalSmallest = new long[116];
210            for (int i = 1; i < canonicalLargest.length; i++) {
211                canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest();
212                canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest();
213            }
214        }
215        final BandData bandData = new BandData(band);
216
217        // Check that there is a reasonable saving to be made
218        final byte[] encoded = defaultCodec.encode(band);
219        results.encodedBand = encoded;
220
221        // Note: these values have been tuned - please test carefully if changing them
222        if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak
223            return results;
224        }
225
226        // Check if we can use BYTE1 as that's a 1:1 mapping if we can
227        if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) {
228            results.encodedBand = Codec.BYTE1.encode(band);
229            results.betterCodec = Codec.BYTE1;
230            return results;
231        }
232
233        // Consider a population codec (but can't be nested)
234        if (effort > 3 && !name.equals("POPULATION")) {
235            final int numDistinctValues = bandData.numDistinctValues();
236            final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length;
237
238            // Note: these values have been tuned - please test carefully if changing them
239            if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02
240                || effort > 6 && distinctValuesAsProportion < 0.04) { // TODO: tweak
241                encodeWithPopulationCodec(name, band, defaultCodec, bandData, results);
242                if (timeToStop(results)) {
243                    return results;
244                }
245            }
246        }
247
248        final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>();
249
250        // See if the deltas are mainly small increments
251        if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) {
252            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2);
253        }
254
255        if (bandData.wellCorrelated()) { // Try delta encodings
256            if (bandData.mainlyPositiveDeltas()) {
257                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
258                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
259                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
260                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
261                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
262                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
263                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
264                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
265                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
266            } else {
267                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
268                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
269                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
270                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
271                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
272                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
273                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
274            }
275        } else if (bandData.anyNegatives()) {
276            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
277            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
278            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
279            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
280            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
281            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
282            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
283        } else {
284            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
285            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
286            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
287            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
288            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
289            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
290            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
291            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
292            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
293        }
294        if (name.equalsIgnoreCase("cpint")) {
295            System.out.print("");
296        }
297
298        for (final BHSDCodec[] family : codecFamiliesToTry) {
299            tryCodecs(name, band, defaultCodec, bandData, results, encoded, family);
300            if (timeToStop(results)) {
301                break;
302            }
303        }
304
305        return results;
306    }
307
308    /**
309     * Converts a list of ConstantPoolEntrys to an int[] array of their indices
310     *
311     * @param list conversion source.
312     * @return conversion result.
313     */
314    protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) {
315        final int[] array = new int[list.size()];
316        for (int i = 0; i < array.length; i++) {
317            array[i] = list.get(i).getIndex();
318            if (array[i] < 0) {
319                throw new IllegalArgumentException("Index should be > 0");
320            }
321        }
322        return array;
323    }
324
325    /**
326     * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls)
327     *
328     * @param list conversion source.
329     * @return conversion result.
330     */
331    protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) {
332        final int[] array = new int[list.size()];
333        for (int j = 0; j < array.length; j++) {
334            final ConstantPoolEntry cpEntry = list.get(j);
335            array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1;
336            if (cpEntry != null && cpEntry.getIndex() < 0) {
337                throw new IllegalArgumentException("Index should be > 0");
338            }
339        }
340        return array;
341    }
342
343    /**
344     * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater
345     * than 1.
346     *
347     * @param name - name of the band (used for debugging)
348     * @param ints - the band
349     * @param defaultCodec - the default Codec
350     * @return the encoded band
351     * @throws Pack200Exception TODO
352     */
353    public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec)
354        throws Pack200Exception {
355        byte[] encodedBand = null;
356        // Useful for debugging
357//        if (ints.length > 0) {
358//            System.out.println("encoding " + name + " " + ints.length);
359//        }
360        if (effort > 1 && ints.length >= effortThresholds[effort]) {
361            final BandAnalysisResults results = analyseBand(name, ints, defaultCodec);
362            final Codec betterCodec = results.betterCodec;
363            encodedBand = results.encodedBand;
364            if (betterCodec != null) {
365                if (betterCodec instanceof BHSDCodec) {
366                    final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec);
367                    int specifier = specifierBand[0];
368                    if (specifierBand.length > 1) {
369                        for (int i = 1; i < specifierBand.length; i++) {
370                            segmentHeader.appendBandCodingSpecifier(specifierBand[i]);
371                        }
372                    }
373                    if (defaultCodec.isSigned()) {
374                        specifier = -1 - specifier;
375                    } else {
376                        specifier = specifier + defaultCodec.getL();
377                    }
378                    final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
379                    final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
380                    System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
381                    System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
382                    return band;
383                }
384                if (betterCodec instanceof PopulationCodec) {
385                    IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier);
386                    return encodedBand;
387                }
388                if (betterCodec instanceof RunCodec) {
389
390                }
391            }
392        }
393
394        // If we get here then we've decided to use the default codec.
395        if (ints.length > 0) {
396            if (encodedBand == null) {
397                encodedBand = defaultCodec.encode(ints);
398            }
399            final int first = ints[0];
400            if (defaultCodec.getB() != 1) {
401                if (defaultCodec.isSigned() && first >= -256 && first <= -1) {
402                    final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec);
403                    final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
404                    final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
405                    System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
406                    System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
407                    return band;
408                }
409                if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) {
410                    final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL();
411                    final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
412                    final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
413                    System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
414                    System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
415                    return band;
416                }
417            }
418            return encodedBand;
419        }
420        return new byte[0];
421    }
422
423    /**
424     * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands
425     *
426     * @param name - name of the band (for debugging purposes)
427     * @param flags - the band
428     * @param loCodec - Codec for the low 32-bits band
429     * @param hiCodec - Codec for the high 32-bits band
430     * @param haveHiFlags - ignores the high band if true as all values would be zero
431     * @return the encoded band
432     * @throws Pack200Exception TODO
433     */
434    protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec,
435        final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception {
436        if (!haveHiFlags) {
437            final int[] loBits = new int[flags.length];
438            Arrays.setAll(loBits, i -> (int) flags[i]);
439            return encodeBandInt(name, loBits, loCodec);
440        }
441        final int[] hiBits = new int[flags.length];
442        final int[] loBits = new int[flags.length];
443        for (int i = 0; i < flags.length; i++) {
444            final long l = flags[i];
445            hiBits[i] = (int) (l >> 32);
446            loBits[i] = (int) l;
447        }
448        final byte[] hi = encodeBandInt(name, hiBits, hiCodec);
449        final byte[] lo = encodeBandInt(name, loBits, loCodec);
450        final byte[] total = new byte[hi.length + lo.length];
451        System.arraycopy(hi, 0, total, 0, hi.length);
452        System.arraycopy(lo, 0, total, hi.length + 1, lo.length);
453        return total;
454    }
455
456// This could be useful if further enhancements are done but is not currently used
457//
458//    private void encodeWithRunCodec(String name, int[] band, int index,
459//            BHSDCodec defaultCodec, BandData bandData,
460//            BandAnalysisResults results) throws Pack200Exception {
461//        int[] firstBand = new int[index];
462//        int[] secondBand = new int[band.length - index];
463//        System.arraycopy(band, 0, firstBand, 0, index);
464//        System.arraycopy(band, index, secondBand, 0, secondBand.length);
465//        BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec);
466//        BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec);
467//        int specifier = 117;
468//        byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
469//        int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual
470//        if (totalLength < results.encodedBand.length) {
471//            System.out.println("using run codec");
472//            results.saved += results.encodedBand.length - totalLength;
473//            byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length];
474//            System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length);
475//            System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length);
476//            System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, secondResults.encodedBand.length);
477//            results.encodedBand = encodedBand;
478//            results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec);
479//        }
480//    }
481
482    protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec,
483        final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception {
484        return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags);
485    }
486
487    /**
488     * Encode a single value with the given Codec
489     *
490     * @param value - the value to encode
491     * @param codec - Codec to use
492     * @return the encoded value
493     * @throws Pack200Exception TODO
494     */
495    public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception {
496        return codec.encode(value);
497    }
498
499    /**
500     * Encode a band without considering other Codecs
501     *
502     * @param band - the band
503     * @param codec - the Codec to use
504     * @return the encoded band
505     * @throws Pack200Exception TODO
506     */
507    public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception {
508        return codec.encode(band);
509    }
510
511    private void encodeWithPopulationCodec(final String name, final int[] band, final BHSDCodec defaultCodec,
512        final BandData bandData, final BandAnalysisResults results) throws Pack200Exception {
513        results.numCodecsTried += 3; // quite a bit more effort to try this codec
514        final Map<Integer, Integer> distinctValues = bandData.distinctValues;
515
516        final List<Integer> favored = new ArrayList<>();
517        distinctValues.forEach((k, v) -> {
518            if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak
519                favored.add(k);
520            }
521        });
522
523        // Sort the favored list with the most commonly occurring first
524        if (distinctValues.size() > 255) {
525            favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0)));
526        }
527
528        final Map<Integer, Integer> favoredToIndex = new HashMap<>();
529        for (int i = 0; i < favored.size(); i++) {
530            favoredToIndex.put(favored.get(i), Integer.valueOf(i));
531        }
532
533        final IntList unfavoured = new IntList();
534        final int[] tokens = new int[band.length];
535        for (int i = 0; i < band.length; i++) {
536            final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i]));
537            if (favouredIndex == null) {
538                tokens[i] = 0;
539                unfavoured.add(band[i]);
540            } else {
541                tokens[i] = favouredIndex.intValue() + 1;
542            }
543        }
544        favored.add(favored.get(favored.size() - 1)); // repeat last value
545        final int[] favouredBand = integerListToArray(favored);
546        final int[] unfavouredBand = unfavoured.toArray();
547
548        // Analyse the three bands to get the best codec
549        final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec);
550        final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec);
551
552        int tdefL = 0;
553        int l = 0;
554        Codec tokenCodec = null;
555        byte[] tokensEncoded;
556        final int k = favored.size() - 1;
557        if (k < 256) {
558            tdefL = 1;
559            tokensEncoded = Codec.BYTE1.encode(tokens);
560        } else {
561            final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec);
562            tokenCodec = tokenResults.betterCodec;
563            tokensEncoded = tokenResults.encodedBand;
564            if (tokenCodec == null) {
565                tokenCodec = defaultCodec;
566            }
567            l = ((BHSDCodec) tokenCodec).getL();
568            final int h = ((BHSDCodec) tokenCodec).getH();
569            final int s = ((BHSDCodec) tokenCodec).getS();
570            final int b = ((BHSDCodec) tokenCodec).getB();
571            final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0;
572            if (s == 0 && d == 0) {
573                boolean canUseTDefL = true;
574                if (b > 1) {
575                    final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h);
576                    if (oneLowerB.largest() >= k) {
577                        canUseTDefL = false;
578                    }
579                }
580                if (canUseTDefL) {
581                    switch (l) {
582                    case 4:
583                        tdefL = 1;
584                        break;
585                    case 8:
586                        tdefL = 2;
587                        break;
588                    case 16:
589                        tdefL = 3;
590                        break;
591                    case 32:
592                        tdefL = 4;
593                        break;
594                    case 64:
595                        tdefL = 5;
596                        break;
597                    case 128:
598                        tdefL = 6;
599                        break;
600                    case 192:
601                        tdefL = 7;
602                        break;
603                    case 224:
604                        tdefL = 8;
605                        break;
606                    case 240:
607                        tdefL = 9;
608                        break;
609                    case 248:
610                        tdefL = 10;
611                        break;
612                    case 252:
613                        tdefL = 11;
614                        break;
615                    }
616                }
617            }
618        }
619
620        final byte[] favouredEncoded = favouredResults.encodedBand;
621        final byte[] unfavouredEncoded = unfavouredResults.encodedBand;
622        final Codec favouredCodec = favouredResults.betterCodec;
623        final Codec unfavouredCodec = unfavouredResults.betterCodec;
624
625        int specifier = 141 + (favouredCodec == null ? 1 : 0) + 4 * tdefL + (unfavouredCodec == null ? 2 : 0);
626        final IntList extraBandMetadata = new IntList(3);
627        if (favouredCodec != null) {
628            IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add);
629        }
630        if (tdefL == 0) {
631            IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add);
632        }
633        if (unfavouredCodec != null) {
634            IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add);
635        }
636        final int[] extraMetadata = extraBandMetadata.toArray();
637        final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata);
638        if (defaultCodec.isSigned()) {
639            specifier = -1 - specifier;
640        } else {
641            specifier = specifier + defaultCodec.getL();
642        }
643        final byte[] firstValueEncoded = defaultCodec.encode(new int[] {specifier});
644        final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length
645            + unfavouredEncoded.length;
646
647        if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) {
648            results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length);
649            final byte[] encodedBand = new byte[totalBandLength];
650            System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length);
651            System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length);
652            System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length,
653                tokensEncoded.length);
654            System.arraycopy(unfavouredEncoded, 0, encodedBand,
655                firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, unfavouredEncoded.length);
656            results.encodedBand = encodedBand;
657            results.extraMetadata = extraMetadata;
658            if (l != 0) {
659                results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec);
660            } else {
661                results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec);
662            }
663        }
664    }
665
666    /*
667     * Flatten a 2-dimension array into a 1-dimension array
668     */
669    private long[] flatten(final long[][] flags) {
670        int totalSize = 0;
671        for (final long[] flag : flags) {
672            totalSize += flag.length;
673        }
674        final long[] flatArray = new long[totalSize];
675        int index = 0;
676        for (final long[] flag : flags) {
677            for (final long element : flag) {
678                flatArray[index] = element;
679                index++;
680            }
681        }
682        return flatArray;
683    }
684
685    /**
686     * Converts a list of Integers to an int[] array.
687     *
688     * @param integerList conversion source.
689     * @return conversion result.
690     */
691    protected int[] integerListToArray(final List<Integer> integerList) {
692        return integerList.stream().mapToInt(Integer::intValue).toArray();
693    }
694
695    /**
696     * Converts a list of Longs to an long[] array.
697     *
698     * @param longList conversion source.
699     * @return conversion result.
700     */
701    protected long[] longListToArray(final List<Long> longList) {
702        return longList.stream().mapToLong(Long::longValue).toArray();
703    }
704
705    /**
706     * Write the packed set of bands to the given output stream
707     *
708     * @param out TODO
709     * @throws IOException If an I/O error occurs.
710     * @throws Pack200Exception TODO
711     */
712    public abstract void pack(OutputStream out) throws IOException, Pack200Exception;
713
714    private boolean timeToStop(final BandAnalysisResults results) {
715        // if tried more than effort number of codecs for this band then return
716        // Note: these values have been tuned - please test carefully if changing them
717        if (effort > 6) {
718            return results.numCodecsTried >= effort * 2;
719        }
720        return results.numCodecsTried >= effort;
721        // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 %
722        // || (float) results.saved/(float) results.encodedBand.length > (float) effort * 2/100;
723    }
724
725    private void tryCodecs(final String name, final int[] band, final BHSDCodec defaultCodec, final BandData bandData,
726        final BandAnalysisResults results, final byte[] encoded, final BHSDCodec[] potentialCodecs)
727        throws Pack200Exception {
728        for (final BHSDCodec potential : potentialCodecs) {
729            if (potential.equals(defaultCodec)) {
730                return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there
731                        // won't be any savings
732            }
733            if (potential.isDelta()) {
734                if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta
735                    && potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) {
736                    // TODO: can represent some negative deltas with overflow
737                    final byte[] encoded2 = potential.encode(band);
738                    results.numCodecsTried++;
739                    final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
740                    final int saved = encoded.length - encoded2.length - specifierEncoded.length;
741                    if (saved > results.saved) {
742                        results.betterCodec = potential;
743                        results.encodedBand = encoded2;
744                        results.saved = saved;
745                    }
746                }
747            } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) {
748                final byte[] encoded2 = potential.encode(band);
749                results.numCodecsTried++;
750                final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
751                final int saved = encoded.length - encoded2.length - specifierEncoded.length;
752                if (saved > results.saved) {
753                    results.betterCodec = potential;
754                    results.encodedBand = encoded2;
755                    results.saved = saved;
756                }
757            }
758            if (timeToStop(results)) {
759                return;
760            }
761        }
762    }
763
764}