001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.snappy;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.PushbackInputStream;
024import java.util.Arrays;
025
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BoundedInputStream;
028import org.apache.commons.compress.utils.IOUtils;
029
030/**
031 * CompressorInputStream for the framing Snappy format.
032 *
033 * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p>
034 *
035 * @see <a href="http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt">Snappy framing format description</a>
036 * @since 1.7
037 */
038public class FramedSnappyCompressorInputStream extends CompressorInputStream {
039    /**
040     * package private for tests only.
041     */
042    static final long MASK_OFFSET = 0xa282ead8L;
043
044    private static final int STREAM_IDENTIFIER_TYPE = 0xff;
045    private static final int COMPRESSED_CHUNK_TYPE = 0;
046    private static final int UNCOMPRESSED_CHUNK_TYPE = 1;
047    private static final int PADDING_CHUNK_TYPE = 0xfe;
048    private static final int MIN_UNSKIPPABLE_TYPE = 2;
049    private static final int MAX_UNSKIPPABLE_TYPE = 0x7f;
050    private static final int MAX_SKIPPABLE_TYPE = 0xfd;
051
052    private static final byte[] SZ_SIGNATURE = new byte[] {
053        (byte) STREAM_IDENTIFIER_TYPE, // tag
054        6, 0, 0, // length
055        's', 'N', 'a', 'P', 'p', 'Y'
056    };
057
058    /** The underlying stream to read compressed data from */
059    private final PushbackInputStream in;
060
061    private SnappyCompressorInputStream currentCompressedChunk;
062
063    // used in no-arg read method
064    private final byte[] oneByte = new byte[1];
065
066    private boolean endReached, inUncompressedChunk;
067
068    private int uncompressedBytesRemaining;
069    private long expectedChecksum = -1;
070    private final PureJavaCrc32C checksum = new PureJavaCrc32C();
071
072    /**
073     * Constructs a new input stream that decompresses snappy-framed-compressed data
074     * from the specified input stream.
075     * @param in  the InputStream from which to read the compressed data
076     * @throws IOException if reading fails
077     */
078    public FramedSnappyCompressorInputStream(InputStream in) throws IOException {
079        this.in = new PushbackInputStream(in, 1);
080        readStreamIdentifier();
081    }
082
083    /** {@inheritDoc} */
084    @Override
085    public int read() throws IOException {
086        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
087    }
088
089    /** {@inheritDoc} */
090    @Override
091    public void close() throws IOException {
092        if (currentCompressedChunk != null) {
093            currentCompressedChunk.close();
094            currentCompressedChunk = null;
095        }
096        in.close();
097    }
098
099    /** {@inheritDoc} */
100    @Override
101    public int read(byte[] b, int off, int len) throws IOException {
102        int read = readOnce(b, off, len);
103        if (read == -1) {
104            readNextBlock();
105            if (endReached) {
106                return -1;
107            }
108            read = readOnce(b, off, len);
109        }
110        return read;
111    }
112
113    /** {@inheritDoc} */
114    @Override
115    public int available() throws IOException {
116        if (inUncompressedChunk) {
117            return Math.min(uncompressedBytesRemaining,
118                            in.available());
119        } else if (currentCompressedChunk != null) {
120            return currentCompressedChunk.available();
121        }
122        return 0;
123    }
124
125    /**
126     * Read from the current chunk into the given array.
127     *
128     * @return -1 if there is no current chunk or the number of bytes
129     * read from the current chunk (which may be -1 if the end of the
130     * chunk is reached).
131     */
132    private int readOnce(byte[] b, int off, int len) throws IOException {
133        int read = -1;
134        if (inUncompressedChunk) {
135            int amount = Math.min(uncompressedBytesRemaining, len);
136            if (amount == 0) {
137                return -1;
138            }
139            read = in.read(b, off, amount);
140            if (read != -1) {
141                uncompressedBytesRemaining -= read;
142                count(read);
143            }
144        } else if (currentCompressedChunk != null) {
145            long before = currentCompressedChunk.getBytesRead();
146            read = currentCompressedChunk.read(b, off, len);
147            if (read == -1) {
148                currentCompressedChunk.close();
149                currentCompressedChunk = null;
150            } else {
151                count(currentCompressedChunk.getBytesRead() - before);
152            }
153        }
154        if (read > 0) {
155            checksum.update(b, off, read);
156        }
157        return read;
158    }
159
160    private void readNextBlock() throws IOException {
161        verifyLastChecksumAndReset();
162        inUncompressedChunk = false;
163        int type = readOneByte();
164        if (type == -1) {
165            endReached = true;
166        } else if (type == STREAM_IDENTIFIER_TYPE) {
167            in.unread(type);
168            pushedBackBytes(1);
169            readStreamIdentifier();
170            readNextBlock();
171        } else if (type == PADDING_CHUNK_TYPE
172                   || (type > MAX_UNSKIPPABLE_TYPE && type <= MAX_SKIPPABLE_TYPE)) {
173            skipBlock();
174            readNextBlock();
175        } else if (type >= MIN_UNSKIPPABLE_TYPE && type <= MAX_UNSKIPPABLE_TYPE) {
176            throw new IOException("unskippable chunk with type " + type
177                                  + " (hex " + Integer.toHexString(type) + ")"
178                                  + " detected.");
179        } else if (type == UNCOMPRESSED_CHUNK_TYPE) {
180            inUncompressedChunk = true;
181            uncompressedBytesRemaining = readSize() - 4 /* CRC */;
182            expectedChecksum = unmask(readCrc());
183        } else if (type == COMPRESSED_CHUNK_TYPE) {
184            long size = readSize() - 4 /* CRC */;
185            expectedChecksum = unmask(readCrc());
186            currentCompressedChunk =
187                new SnappyCompressorInputStream(new BoundedInputStream(in, size));
188            // constructor reads uncompressed size
189            count(currentCompressedChunk.getBytesRead());
190        } else {
191            // impossible as all potential byte values have been covered
192            throw new IOException("unknown chunk type " + type
193                                  + " detected.");
194        }
195    }
196
197    private long readCrc() throws IOException {
198        byte[] b = new byte[4];
199        int read = IOUtils.readFully(in, b);
200        count(read);
201        if (read != 4) {
202            throw new IOException("premature end of stream");
203        }
204        long crc = 0;
205        for (int i = 0; i < 4; i++) {
206            crc |= (b[i] & 0xFFL) << (8 * i);
207        }
208        return crc;
209    }
210
211    static long unmask(long x) {
212        // ugly, maybe we should just have used ints and deal with the
213        // overflow
214        x -= MASK_OFFSET;
215        x &= 0xffffFFFFL;
216        return ((x >> 17) | (x << 15)) & 0xffffFFFFL;
217    }
218
219    private int readSize() throws IOException {
220        int b = 0;
221        int sz = 0;
222        for (int i = 0; i < 3; i++) {
223            b = readOneByte();
224            if (b == -1) {
225                throw new IOException("premature end of stream");
226            }
227            sz |= (b << (i * 8));
228        }
229        return sz;
230    }
231
232    private void skipBlock() throws IOException {
233        int size = readSize();
234        long read = IOUtils.skip(in, size);
235        count(read);
236        if (read != size) {
237            throw new IOException("premature end of stream");
238        }
239    }
240
241    private void readStreamIdentifier() throws IOException {
242        byte[] b = new byte[10];
243        int read = IOUtils.readFully(in, b);
244        count(read);
245        if (10 != read || !matches(b, 10)) {
246            throw new IOException("Not a framed Snappy stream");
247        }
248    }
249
250    private int readOneByte() throws IOException {
251        int b = in.read();
252        if (b != -1) {
253            count(1);
254            return b & 0xFF;
255        }
256        return -1;
257    }
258
259    private void verifyLastChecksumAndReset() throws IOException {
260        if (expectedChecksum >= 0 && expectedChecksum != checksum.getValue()) {
261            throw new IOException("Checksum verification failed");
262        }
263        expectedChecksum = -1;
264        checksum.reset();
265    }
266
267    /**
268     * Checks if the signature matches what is expected for a .sz file.
269     *
270     * <p>.sz files start with a chunk with tag 0xff and content sNaPpY.</p>
271     * 
272     * @param signature the bytes to check
273     * @param length    the number of bytes to check
274     * @return          true if this is a .sz stream, false otherwise
275     */
276    public static boolean matches(byte[] signature, int length) {
277
278        if (length < SZ_SIGNATURE.length) {
279            return false;
280        }
281
282        byte[] shortenedSig = signature;
283        if (signature.length > SZ_SIGNATURE.length) {
284            shortenedSig = new byte[SZ_SIGNATURE.length];
285            System.arraycopy(signature, 0, shortenedSig, 0, SZ_SIGNATURE.length);
286        }
287
288        return Arrays.equals(shortenedSig, SZ_SIGNATURE);
289    }
290
291}