001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.InputStream; 021 022/** 023 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa). 024 * 025 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are 026 * used in the implementation of many bands; but there are a variety of other ones, and indeed the specification assumes 027 * that other combinations of values can result in more specific and efficient formats. There are also a sequence of 028 * canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical number. 029 * {@link CodecEncoding#getCodec(int, InputStream, Codec)}) 030 */ 031public abstract class Codec { 032 033 /** 034 * BCI5 = (5,4): Used for storing branching information in bytecode. 035 */ 036 public static final BHSDCodec BCI5 = new BHSDCodec(5, 4); 037 038 /** 039 * BRANCH5 = (5,4,2): Used for storing branching information in bytecode. 040 */ 041 public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2); 042 043 /** 044 * BYTE1 = (1,256): Used for storing plain bytes. 045 */ 046 public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256); 047 048 /** 049 * CHAR3 = (3,128): Used for storing text (UTF-8) strings. NB This isn't quite the same as UTF-8, but has similar 050 * properties; ASCII characters < 127 are stored in a single byte. 051 */ 052 public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128); 053 054 /** 055 * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed 056 * values. 057 */ 058 public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1); 059 060 /** 061 * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed 062 * values, but where most of them are expected to be non-negative. 063 */ 064 public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1); 065 066 /** 067 * SIGNED5 = (5,64,1): Used for small signed values. 068 */ 069 public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1); 070 071 /** 072 * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned 073 * values. 074 */ 075 public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1); 076 077 /** 078 * UNSIGNED5 = (5,64): Used for small unsigned values. 079 */ 080 public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64); 081 082 public int lastBandLength; 083 084 /** 085 * Decode a sequence of bytes from the given input stream, returning the value as a long. Note that this method can 086 * only be applied for non-delta encodings. 087 * 088 * @param in the input stream to read from 089 * @return the value as a long 090 * @throws IOException if there is a problem reading from the underlying input stream 091 * @throws Pack200Exception if the encoding is a delta encoding 092 */ 093 public abstract int decode(InputStream in) throws IOException, Pack200Exception; 094 095 /** 096 * Decode a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a 097 * delta encoding (d=1) then the previous value must be passed in as a parameter. If it is a non-delta encoding, 098 * then it does not matter what value is passed in, so it makes sense for the value to be passed in by default using 099 * code similar to: 100 * 101 * <pre> 102 * long last = 0; 103 * while (condition) { 104 * last = codec.decode(in, last); 105 * // do something with last 106 * } 107 * </pre> 108 * 109 * @param in the input stream to read from 110 * @param last the previous value read, which must be supplied if the codec is a delta encoding 111 * @return the value as a long 112 * @throws IOException if there is a problem reading from the underlying input stream 113 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 114 */ 115 public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception; 116 117 /** 118 * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, 119 * since some codecs (such as {@link PopulationCodec}) only work when the number of values to be read is known. 120 * 121 * @param n the number of values to decode 122 * @param in the input stream to read from 123 * @return an array of {@code int} values corresponding to values decoded 124 * @throws IOException if there is a problem reading from the underlying input stream 125 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 126 */ 127 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception { 128 lastBandLength = 0; 129 final int[] result = new int[n]; 130 int last = 0; 131 for (int i = 0; i < n; i++) { 132 result[i] = last = decode(in, last); 133 } 134 return result; 135 } 136 137 /** 138 * Decodes a sequence of {@code n} values from {@code in}. 139 * 140 * @param n the number of values to decode 141 * @param in the input stream to read from 142 * @param firstValue the first value in the band if it has already been read 143 * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value 144 * in the array. 145 * @throws IOException if there is a problem reading from the underlying input stream 146 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 147 */ 148 public int[] decodeInts(final int n, final InputStream in, final int firstValue) 149 throws IOException, Pack200Exception { 150 final int[] result = new int[n + 1]; 151 result[0] = firstValue; 152 int last = firstValue; 153 for (int i = 1; i < n + 1; i++) { 154 result[i] = last = decode(in, last); 155 } 156 return result; 157 } 158 159 /** 160 * Encode a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings. 161 * 162 * @param value the value to encode 163 * @return the encoded bytes 164 * @throws Pack200Exception TODO 165 */ 166 public abstract byte[] encode(int value) throws Pack200Exception; 167 168 /** 169 * Encode a single value into a sequence of bytes. 170 * 171 * @param value the value to encode 172 * @param last the previous value encoded (for delta encodings) 173 * @return the encoded bytes 174 * @throws Pack200Exception TODO 175 */ 176 public abstract byte[] encode(int value, int last) throws Pack200Exception; 177 178 /** 179 * Encode a sequence of integers into a byte array 180 * 181 * @param ints the values to encode 182 * @return byte[] encoded bytes 183 * @throws Pack200Exception if there is a problem encoding any of the values 184 */ 185 public byte[] encode(final int[] ints) throws Pack200Exception { 186 int total = 0; 187 final byte[][] bytes = new byte[ints.length][]; 188 for (int i = 0; i < ints.length; i++) { 189 bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0); 190 total += bytes[i].length; 191 } 192 final byte[] encoded = new byte[total]; 193 int index = 0; 194 for (final byte[] element : bytes) { 195 System.arraycopy(element, 0, encoded, index, element.length); 196 index += element.length; 197 } 198 return encoded; 199 } 200}