001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.arj;
019
020import java.io.ByteArrayInputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.DataInputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.ArrayList;
027import java.util.zip.CRC32;
028
029import org.apache.commons.compress.archivers.ArchiveEntry;
030import org.apache.commons.compress.archivers.ArchiveException;
031import org.apache.commons.compress.archivers.ArchiveInputStream;
032import org.apache.commons.compress.utils.BoundedInputStream;
033import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
034import org.apache.commons.compress.utils.Charsets;
035import org.apache.commons.compress.utils.IOUtils;
036
037/**
038 * Implements the "arj" archive format as an InputStream.
039 * <p>
040 * <a href="https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt">Reference 1</a>
041 * <br>
042 * <a href="http://www.fileformat.info/format/arj/corion.htm">Reference 2</a>
043 * @NotThreadSafe
044 * @since 1.6
045 */
046public class ArjArchiveInputStream extends ArchiveInputStream {
047    private static final int ARJ_MAGIC_1 = 0x60;
048    private static final int ARJ_MAGIC_2 = 0xEA;
049    private final DataInputStream in;
050    private final String charsetName;
051    private final MainHeader mainHeader;
052    private LocalFileHeader currentLocalFileHeader;
053    private InputStream currentInputStream;
054
055    /**
056     * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in.
057     * @param inputStream the underlying stream, whose ownership is taken
058     * @param charsetName the charset used for file names and comments
059     *   in the archive. May be {@code null} to use the platform default.
060     * @throws ArchiveException if an exception occurs while reading
061     */
062    public ArjArchiveInputStream(final InputStream inputStream,
063            final String charsetName) throws ArchiveException {
064        in = new DataInputStream(inputStream);
065        this.charsetName = charsetName;
066        try {
067            mainHeader = readMainHeader();
068            if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) {
069                throw new ArchiveException("Encrypted ARJ files are unsupported");
070            }
071            if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) {
072                throw new ArchiveException("Multi-volume ARJ files are unsupported");
073            }
074        } catch (final IOException ioException) {
075            throw new ArchiveException(ioException.getMessage(), ioException);
076        }
077    }
078
079    /**
080     * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in,
081     * and using the CP437 character encoding.
082     * @param inputStream the underlying stream, whose ownership is taken
083     * @throws ArchiveException if an exception occurs while reading
084     */
085    public ArjArchiveInputStream(final InputStream inputStream)
086            throws ArchiveException {
087        this(inputStream, "CP437");
088    }
089
090    @Override
091    public void close() throws IOException {
092        in.close();
093    }
094
095    private int read8(final DataInputStream dataIn) throws IOException {
096        final int value = dataIn.readUnsignedByte();
097        count(1);
098        return value;
099    }
100
101    private int read16(final DataInputStream dataIn) throws IOException {
102        final int value = dataIn.readUnsignedShort();
103        count(2);
104        return Integer.reverseBytes(value) >>> 16;
105    }
106
107    private int read32(final DataInputStream dataIn) throws IOException {
108        final int value = dataIn.readInt();
109        count(4);
110        return Integer.reverseBytes(value);
111    }
112
113    private String readString(final DataInputStream dataIn) throws IOException {
114        try (final ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
115            int nextByte;
116            while ((nextByte = dataIn.readUnsignedByte()) != 0) {
117                buffer.write(nextByte);
118            }
119            return buffer.toString(Charsets.toCharset(charsetName).name());
120        }
121    }
122
123    private byte[] readRange(final InputStream in, final int len)
124        throws IOException {
125        final byte[] b = IOUtils.readRange(in, len);
126        count(b.length);
127        if (b.length < len) {
128            throw new EOFException();
129        }
130        return b;
131    }
132
133    private byte[] readHeader() throws IOException {
134        boolean found = false;
135        byte[] basicHeaderBytes = null;
136        do {
137            int first = 0;
138            int second = read8(in);
139            do {
140                first = second;
141                second = read8(in);
142            } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
143            final int basicHeaderSize = read16(in);
144            if (basicHeaderSize == 0) {
145                // end of archive
146                return null;
147            }
148            if (basicHeaderSize <= 2600) {
149                basicHeaderBytes = readRange(in, basicHeaderSize);
150                final long basicHeaderCrc32 = read32(in) & 0xFFFFFFFFL;
151                final CRC32 crc32 = new CRC32();
152                crc32.update(basicHeaderBytes);
153                if (basicHeaderCrc32 == crc32.getValue()) {
154                    found = true;
155                }
156            }
157        } while (!found);
158        return basicHeaderBytes;
159    }
160
161    private MainHeader readMainHeader() throws IOException {
162        final byte[] basicHeaderBytes = readHeader();
163        if (basicHeaderBytes == null) {
164            throw new IOException("Archive ends without any headers");
165        }
166        final DataInputStream basicHeader = new DataInputStream(
167                new ByteArrayInputStream(basicHeaderBytes));
168
169        final int firstHeaderSize = basicHeader.readUnsignedByte();
170        final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
171        pushedBackBytes(firstHeaderBytes.length);
172
173        final DataInputStream firstHeader = new DataInputStream(
174                new ByteArrayInputStream(firstHeaderBytes));
175
176        final MainHeader hdr = new MainHeader();
177        hdr.archiverVersionNumber = firstHeader.readUnsignedByte();
178        hdr.minVersionToExtract = firstHeader.readUnsignedByte();
179        hdr.hostOS = firstHeader.readUnsignedByte();
180        hdr.arjFlags = firstHeader.readUnsignedByte();
181        hdr.securityVersion = firstHeader.readUnsignedByte();
182        hdr.fileType = firstHeader.readUnsignedByte();
183        hdr.reserved = firstHeader.readUnsignedByte();
184        hdr.dateTimeCreated = read32(firstHeader);
185        hdr.dateTimeModified = read32(firstHeader);
186        hdr.archiveSize = 0xffffFFFFL & read32(firstHeader);
187        hdr.securityEnvelopeFilePosition = read32(firstHeader);
188        hdr.fileSpecPosition = read16(firstHeader);
189        hdr.securityEnvelopeLength = read16(firstHeader);
190        pushedBackBytes(20); // count has already counted them via readRange
191        hdr.encryptionVersion = firstHeader.readUnsignedByte();
192        hdr.lastChapter = firstHeader.readUnsignedByte();
193
194        if (firstHeaderSize >= 33) {
195            hdr.arjProtectionFactor = firstHeader.readUnsignedByte();
196            hdr.arjFlags2 = firstHeader.readUnsignedByte();
197            firstHeader.readUnsignedByte();
198            firstHeader.readUnsignedByte();
199        }
200
201        hdr.name = readString(basicHeader);
202        hdr.comment = readString(basicHeader);
203
204        final  int extendedHeaderSize = read16(in);
205        if (extendedHeaderSize > 0) {
206            hdr.extendedHeaderBytes = readRange(in, extendedHeaderSize);
207            final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in);
208            final CRC32 crc32 = new CRC32();
209            crc32.update(hdr.extendedHeaderBytes);
210            if (extendedHeaderCrc32 != crc32.getValue()) {
211                throw new IOException("Extended header CRC32 verification failure");
212            }
213        }
214
215        return hdr;
216    }
217
218    private LocalFileHeader readLocalFileHeader() throws IOException {
219        final byte[] basicHeaderBytes = readHeader();
220        if (basicHeaderBytes == null) {
221            return null;
222        }
223        try (final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) {
224
225            final int firstHeaderSize = basicHeader.readUnsignedByte();
226            final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
227            pushedBackBytes(firstHeaderBytes.length);
228            try (final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) {
229
230                final LocalFileHeader localFileHeader = new LocalFileHeader();
231                localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte();
232                localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte();
233                localFileHeader.hostOS = firstHeader.readUnsignedByte();
234                localFileHeader.arjFlags = firstHeader.readUnsignedByte();
235                localFileHeader.method = firstHeader.readUnsignedByte();
236                localFileHeader.fileType = firstHeader.readUnsignedByte();
237                localFileHeader.reserved = firstHeader.readUnsignedByte();
238                localFileHeader.dateTimeModified = read32(firstHeader);
239                localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader);
240                localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader);
241                localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader);
242                localFileHeader.fileSpecPosition = read16(firstHeader);
243                localFileHeader.fileAccessMode = read16(firstHeader);
244                pushedBackBytes(20);
245                localFileHeader.firstChapter = firstHeader.readUnsignedByte();
246                localFileHeader.lastChapter = firstHeader.readUnsignedByte();
247
248                readExtraData(firstHeaderSize, firstHeader, localFileHeader);
249
250                localFileHeader.name = readString(basicHeader);
251                localFileHeader.comment = readString(basicHeader);
252
253                final ArrayList<byte[]> extendedHeaders = new ArrayList<>();
254                int extendedHeaderSize;
255                while ((extendedHeaderSize = read16(in)) > 0) {
256                    final byte[] extendedHeaderBytes = readRange(in, extendedHeaderSize);
257                    final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in);
258                    final CRC32 crc32 = new CRC32();
259                    crc32.update(extendedHeaderBytes);
260                    if (extendedHeaderCrc32 != crc32.getValue()) {
261                        throw new IOException("Extended header CRC32 verification failure");
262                    }
263                    extendedHeaders.add(extendedHeaderBytes);
264                }
265                localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[0][]);
266
267                return localFileHeader;
268            }
269        }
270    }
271
272    private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader,
273                               final LocalFileHeader localFileHeader) throws IOException {
274        if (firstHeaderSize >= 33) {
275            localFileHeader.extendedFilePosition = read32(firstHeader);
276            if (firstHeaderSize >= 45) {
277                localFileHeader.dateTimeAccessed = read32(firstHeader);
278                localFileHeader.dateTimeCreated = read32(firstHeader);
279                localFileHeader.originalSizeEvenForVolumes = read32(firstHeader);
280                pushedBackBytes(12);
281            }
282            pushedBackBytes(4);
283        }
284    }
285
286    /**
287     * Checks if the signature matches what is expected for an arj file.
288     *
289     * @param signature
290     *            the bytes to check
291     * @param length
292     *            the number of bytes to check
293     * @return true, if this stream is an arj archive stream, false otherwise
294     */
295    public static boolean matches(final byte[] signature, final int length) {
296        return length >= 2 &&
297                (0xff & signature[0]) == ARJ_MAGIC_1 &&
298                (0xff & signature[1]) == ARJ_MAGIC_2;
299    }
300
301    /**
302     * Gets the archive's recorded name.
303     * @return the archive's name
304     */
305    public String getArchiveName() {
306        return mainHeader.name;
307    }
308
309    /**
310     * Gets the archive's comment.
311     * @return the archive's comment
312     */
313    public String getArchiveComment() {
314        return mainHeader.comment;
315    }
316
317    @Override
318    public ArjArchiveEntry getNextEntry() throws IOException {
319        if (currentInputStream != null) {
320            // return value ignored as IOUtils.skip ensures the stream is drained completely
321            IOUtils.skip(currentInputStream, Long.MAX_VALUE);
322            currentInputStream.close();
323            currentLocalFileHeader = null;
324            currentInputStream = null;
325        }
326
327        currentLocalFileHeader = readLocalFileHeader();
328        if (currentLocalFileHeader != null) {
329            currentInputStream = new BoundedInputStream(in, currentLocalFileHeader.compressedSize);
330            if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) {
331                currentInputStream = new CRC32VerifyingInputStream(currentInputStream,
332                        currentLocalFileHeader.originalSize, currentLocalFileHeader.originalCrc32);
333            }
334            return new ArjArchiveEntry(currentLocalFileHeader);
335        }
336        currentInputStream = null;
337        return null;
338    }
339
340    @Override
341    public boolean canReadEntryData(final ArchiveEntry ae) {
342        return ae instanceof ArjArchiveEntry
343            && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED;
344    }
345
346    @Override
347    public int read(final byte[] b, final int off, final int len) throws IOException {
348        if (len == 0) {
349            return 0;
350        }
351        if (currentLocalFileHeader == null) {
352            throw new IllegalStateException("No current arj entry");
353        }
354        if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) {
355            throw new IOException("Unsupported compression method " + currentLocalFileHeader.method);
356        }
357        return currentInputStream.read(b, off, len);
358    }
359}