001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.tar;
019
020import java.io.ByteArrayOutputStream;
021import java.io.Closeable;
022import java.io.File;
023import java.io.IOException;
024import java.io.InputStream;
025import java.nio.ByteBuffer;
026import java.nio.channels.SeekableByteChannel;
027import java.nio.file.Files;
028import java.nio.file.Path;
029import java.util.ArrayList;
030import java.util.HashMap;
031import java.util.LinkedList;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.zip.ZipEncoding;
036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
037import org.apache.commons.compress.utils.ArchiveUtils;
038import org.apache.commons.compress.utils.BoundedArchiveInputStream;
039import org.apache.commons.compress.utils.BoundedInputStream;
040import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
041import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
042
043/**
044 * The TarFile provides random access to UNIX archives.
045 * @since 1.21
046 */
047public class TarFile implements Closeable {
048
049    private static final int SMALL_BUFFER_SIZE = 256;
050
051    private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
052
053    private final SeekableByteChannel archive;
054
055    /**
056     * The encoding of the tar file
057     */
058    private final ZipEncoding zipEncoding;
059
060    private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
061
062    private final int blockSize;
063
064    private final boolean lenient;
065
066    private final int recordSize;
067
068    private final ByteBuffer recordBuffer;
069
070    // the global sparse headers, this is only used in PAX Format 0.X
071    private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
072
073    private boolean hasHitEOF;
074
075    /**
076     * The meta-data about the current entry
077     */
078    private TarArchiveEntry currEntry;
079
080    // the global PAX header
081    private Map<String, String> globalPaxHeaders = new HashMap<>();
082
083    private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
084
085    /**
086     * Constructor for TarFile.
087     *
088     * @param content the content to use
089     * @throws IOException when reading the tar archive fails
090     */
091    public TarFile(final byte[] content) throws IOException {
092        this(new SeekableInMemoryByteChannel(content));
093    }
094
095    /**
096     * Constructor for TarFile.
097     *
098     * @param content  the content to use
099     * @param encoding the encoding to use
100     * @throws IOException when reading the tar archive fails
101     */
102    public TarFile(final byte[] content, final String encoding) throws IOException {
103        this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
104    }
105
106    /**
107     * Constructor for TarFile.
108     *
109     * @param content the content to use
110     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
111     *                ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
112     *                exception instead.
113     * @throws IOException when reading the tar archive fails
114     */
115    public TarFile(final byte[] content, final boolean lenient) throws IOException {
116        this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
117    }
118
119    /**
120     * Constructor for TarFile.
121     *
122     * @param archive the file of the archive to use
123     * @throws IOException when reading the tar archive fails
124     */
125    public TarFile(final File archive) throws IOException {
126        this(archive.toPath());
127    }
128
129    /**
130     * Constructor for TarFile.
131     *
132     * @param archive  the file of the archive to use
133     * @param encoding the encoding to use
134     * @throws IOException when reading the tar archive fails
135     */
136    public TarFile(final File archive, final String encoding) throws IOException {
137        this(archive.toPath(), encoding);
138    }
139
140    /**
141     * Constructor for TarFile.
142     *
143     * @param archive the file of the archive to use
144     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
145     *                ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
146     *                exception instead.
147     * @throws IOException when reading the tar archive fails
148     */
149    public TarFile(final File archive, final boolean lenient) throws IOException {
150        this(archive.toPath(), lenient);
151    }
152
153    /**
154     * Constructor for TarFile.
155     *
156     * @param archivePath the path of the archive to use
157     * @throws IOException when reading the tar archive fails
158     */
159    public TarFile(final Path archivePath) throws IOException {
160        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
161    }
162
163    /**
164     * Constructor for TarFile.
165     *
166     * @param archivePath the path of the archive to use
167     * @param encoding    the encoding to use
168     * @throws IOException when reading the tar archive fails
169     */
170    public TarFile(final Path archivePath, final String encoding) throws IOException {
171        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
172    }
173
174    /**
175     * Constructor for TarFile.
176     *
177     * @param archivePath the path of the archive to use
178     * @param lenient     when set to true illegal values for group/userid, mode, device numbers and timestamp will be
179     *                    ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
180     *                    exception instead.
181     * @throws IOException when reading the tar archive fails
182     */
183    public TarFile(final Path archivePath, final boolean lenient) throws IOException {
184        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
185    }
186
187    /**
188     * Constructor for TarFile.
189     *
190     * @param content the content to use
191     * @throws IOException when reading the tar archive fails
192     */
193    public TarFile(final SeekableByteChannel content) throws IOException {
194        this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
195    }
196
197    /**
198     * Constructor for TarFile.
199     *
200     * @param archive    the seekable byte channel to use
201     * @param blockSize  the blocks size to use
202     * @param recordSize the record size to use
203     * @param encoding   the encoding to use
204     * @param lenient    when set to true illegal values for group/userid, mode, device numbers and timestamp will be
205     *                   ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
206     *                   exception instead.
207     * @throws IOException when reading the tar archive fails
208     */
209    public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) throws IOException {
210        this.archive = archive;
211        this.hasHitEOF = false;
212        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
213        this.recordSize = recordSize;
214        this.recordBuffer = ByteBuffer.allocate(this.recordSize);
215        this.blockSize = blockSize;
216        this.lenient = lenient;
217
218        TarArchiveEntry entry;
219        while ((entry = getNextTarEntry()) != null) {
220            entries.add(entry);
221        }
222    }
223
224    /**
225     * Get the next entry in this tar archive. This will skip
226     * to the end of the current entry, if there is one, and
227     * place the position of the channel at the header of the
228     * next entry, and read the header and instantiate a new
229     * TarEntry from the header bytes and return that entry.
230     * If there are no more entries in the archive, null will
231     * be returned to indicate that the end of the archive has
232     * been reached.
233     *
234     * @return The next TarEntry in the archive, or null if there is no next entry.
235     * @throws IOException when reading the next TarEntry fails
236     */
237    private TarArchiveEntry getNextTarEntry() throws IOException {
238        if (isAtEOF()) {
239            return null;
240        }
241
242        if (currEntry != null) {
243            // Skip to the end of the entry
244            repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
245            throwExceptionIfPositionIsNotInArchive();
246            skipRecordPadding();
247        }
248
249        final ByteBuffer headerBuf = getRecord();
250        if (null == headerBuf) {
251            /* hit EOF */
252            currEntry = null;
253            return null;
254        }
255
256        try {
257            final long position = archive.position();
258            currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
259        } catch (final IllegalArgumentException e) {
260            throw new IOException("Error detected parsing the header", e);
261        }
262
263        if (currEntry.isGNULongLinkEntry()) {
264            final byte[] longLinkData = getLongNameData();
265            if (longLinkData == null) {
266                // Bugzilla: 40334
267                // Malformed tar file - long link entry name not followed by
268                // entry
269                return null;
270            }
271            currEntry.setLinkName(zipEncoding.decode(longLinkData));
272        }
273
274        if (currEntry.isGNULongNameEntry()) {
275            final byte[] longNameData = getLongNameData();
276            if (longNameData == null) {
277                // Bugzilla: 40334
278                // Malformed tar file - long entry name not followed by
279                // entry
280                return null;
281            }
282
283            // COMPRESS-509 : the name of directories should end with '/'
284            final String name = zipEncoding.decode(longNameData);
285            currEntry.setName(name);
286            if (currEntry.isDirectory() && !name.endsWith("/")) {
287                currEntry.setName(name + "/");
288            }
289        }
290
291        if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers
292            readGlobalPaxHeaders();
293        }
294
295        try {
296            if (currEntry.isPaxHeader()) { // Process Pax headers
297                paxHeaders();
298            } else if (!globalPaxHeaders.isEmpty()) {
299                applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
300            }
301        } catch (NumberFormatException e) {
302            throw new IOException("Error detected parsing the pax header", e);
303        }
304
305        if (currEntry.isOldGNUSparse()) { // Process sparse files
306            readOldGNUSparse();
307        }
308
309        return currEntry;
310    }
311
312    /**
313     * Adds the sparse chunks from the current entry to the sparse chunks,
314     * including any additional sparse entries following the current entry.
315     *
316     * @throws IOException when reading the sparse entry fails
317     */
318    private void readOldGNUSparse() throws IOException {
319        if (currEntry.isExtended()) {
320            TarArchiveSparseEntry entry;
321            do {
322                final ByteBuffer headerBuf = getRecord();
323                if (headerBuf == null) {
324                    throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
325                }
326                entry = new TarArchiveSparseEntry(headerBuf.array());
327                currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
328                currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
329            } while (entry.isExtended());
330        }
331
332        // sparse headers are all done reading, we need to build
333        // sparse input streams using these sparse headers
334        buildSparseInputStreams();
335    }
336
337    /**
338     * Build the input streams consisting of all-zero input streams and non-zero input streams.
339     * When reading from the non-zero input streams, the data is actually read from the original input stream.
340     * The size of each input stream is introduced by the sparse headers.
341     *
342     * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
343     *        0 size input streams because they are meaningless.
344     */
345    private void buildSparseInputStreams() throws IOException {
346        final List<InputStream> streams = new ArrayList<>();
347
348        final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
349
350        // Stream doesn't need to be closed at all as it doesn't use any resources
351        final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR
352        // logical offset into the extracted entry
353        long offset = 0;
354        long numberOfZeroBytesInSparseEntry = 0;
355        for (TarArchiveStructSparse sparseHeader : sparseHeaders) {
356            final long zeroBlockSize = sparseHeader.getOffset() - offset;
357            if (zeroBlockSize < 0) {
358                // sparse header says to move backwards inside of the extracted entry
359                throw new IOException("Corrupted struct sparse detected");
360            }
361
362            // only store the zero block if it is not empty
363            if (zeroBlockSize > 0) {
364                streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize));
365                numberOfZeroBytesInSparseEntry += zeroBlockSize;
366            }
367
368            // only store the input streams with non-zero size
369            if (sparseHeader.getNumbytes() > 0) {
370                final long start =
371                    currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
372                if (start + sparseHeader.getNumbytes() < start) {
373                    // possible integer overflow
374                    throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
375                }
376                streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
377            }
378
379            offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
380        }
381
382        sparseInputStreams.put(currEntry.getName(), streams);
383    }
384
385    /**
386     * Update the current entry with the read pax headers
387     * @param headers Headers read from the pax header
388     * @param sparseHeaders Sparse headers read from pax header
389     */
390    private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders)
391        throws IOException {
392        currEntry.updateEntryFromPaxHeaders(headers);
393        currEntry.setSparseHeaders(sparseHeaders);
394    }
395
396    /**
397     * <p>
398     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
399     * may appear multi times, and they look like:
400     * <pre>
401     * GNU.sparse.size=size
402     * GNU.sparse.numblocks=numblocks
403     * repeat numblocks times
404     *   GNU.sparse.offset=offset
405     *   GNU.sparse.numbytes=numbytes
406     * end repeat
407     * </pre>
408     *
409     * <p>
410     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
411     * <pre>
412     * GNU.sparse.map
413     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
414     * </pre>
415     *
416     * <p>
417     * For PAX Format 1.X:
418     * <br>
419     * The sparse map itself is stored in the file data block, preceding the actual file data.
420     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
421     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
422     * giving the offset and size of the data block it describes.
423     * @throws IOException
424     */
425    private void paxHeaders() throws IOException {
426        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
427        final Map<String, String> headers;
428        try (final InputStream input = getInputStream(currEntry)) {
429            headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
430        }
431
432        // for 0.1 PAX Headers
433        if (headers.containsKey("GNU.sparse.map")) {
434            sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map")));
435        }
436        getNextTarEntry(); // Get the actual file entry
437        if (currEntry == null) {
438            throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
439        }
440        applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
441
442        // for 1.0 PAX Format, the sparse map is stored in the file data block
443        if (currEntry.isPaxGNU1XSparse()) {
444            try (final InputStream input = getInputStream(currEntry)) {
445                sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
446            }
447            currEntry.setSparseHeaders(sparseHeaders);
448            // data of the entry is after the pax gnu entry. So we need to update the data position once again
449            currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
450        }
451
452        // sparse headers are all done reading, we need to build
453        // sparse input streams using these sparse headers
454        buildSparseInputStreams();
455    }
456
457    private void readGlobalPaxHeaders() throws IOException {
458        try (InputStream input = getInputStream(currEntry)) {
459            globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders,
460                currEntry.getSize());
461        }
462        getNextTarEntry(); // Get the actual file entry
463
464        if (currEntry == null) {
465            throw new IOException("Error detected parsing the pax header");
466        }
467    }
468
469    /**
470     * Get the next entry in this tar archive as longname data.
471     *
472     * @return The next entry in the archive as longname data, or null.
473     * @throws IOException on error
474     */
475    private byte[] getLongNameData() throws IOException {
476        final ByteArrayOutputStream longName = new ByteArrayOutputStream();
477        int length;
478        try (final InputStream in = getInputStream(currEntry)) {
479            while ((length = in.read(smallBuf)) >= 0) {
480                longName.write(smallBuf, 0, length);
481            }
482        }
483        getNextTarEntry();
484        if (currEntry == null) {
485            // Bugzilla: 40334
486            // Malformed tar file - long entry name not followed by entry
487            return null;
488        }
489        byte[] longNameData = longName.toByteArray();
490        // remove trailing null terminator(s)
491        length = longNameData.length;
492        while (length > 0 && longNameData[length - 1] == 0) {
493            --length;
494        }
495        if (length != longNameData.length) {
496            final byte[] l = new byte[length];
497            System.arraycopy(longNameData, 0, l, 0, length);
498            longNameData = l;
499        }
500        return longNameData;
501    }
502
503    /**
504     * The last record block should be written at the full size, so skip any
505     * additional space used to fill a record after an entry
506     *
507     * @throws IOException when skipping the padding of the record fails
508     */
509    private void skipRecordPadding() throws IOException {
510        if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
511            final long numRecords = (currEntry.getSize() / recordSize) + 1;
512            final long padding = (numRecords * recordSize) - currEntry.getSize();
513            repositionForwardBy(padding);
514            throwExceptionIfPositionIsNotInArchive();
515        }
516    }
517
518    private void repositionForwardTo(final long newPosition) throws IOException {
519        final long currPosition = archive.position();
520        if (newPosition < currPosition) {
521            throw new IOException("trying to move backwards inside of the archive");
522        }
523        archive.position(newPosition);
524    }
525
526    private void repositionForwardBy(final long offset) throws IOException {
527        repositionForwardTo(archive.position() + offset);
528    }
529
530    /**
531     * Checks if the current position of the SeekableByteChannel is in the archive.
532     * @throws IOException If the position is not in the archive
533     */
534    private void throwExceptionIfPositionIsNotInArchive() throws IOException {
535        if (archive.size() < archive.position()) {
536            throw new IOException("Truncated TAR archive");
537        }
538    }
539
540    /**
541     * Get the next record in this tar archive. This will skip
542     * over any remaining data in the current entry, if there
543     * is one, and place the input stream at the header of the
544     * next entry.
545     *
546     * <p>If there are no more entries in the archive, null will be
547     * returned to indicate that the end of the archive has been
548     * reached.  At the same time the {@code hasHitEOF} marker will be
549     * set to true.</p>
550     *
551     * @return The next TarEntry in the archive, or null if there is no next entry.
552     * @throws IOException when reading the next TarEntry fails
553     */
554    private ByteBuffer getRecord() throws IOException {
555        ByteBuffer headerBuf = readRecord();
556        setAtEOF(isEOFRecord(headerBuf));
557        if (isAtEOF() && headerBuf != null) {
558            // Consume rest
559            tryToConsumeSecondEOFRecord();
560            consumeRemainderOfLastBlock();
561            headerBuf = null;
562        }
563        return headerBuf;
564    }
565
566    /**
567     * Tries to read the next record resetting the position in the
568     * archive if it is not a EOF record.
569     *
570     * <p>This is meant to protect against cases where a tar
571     * implementation has written only one EOF record when two are
572     * expected. Actually this won't help since a non-conforming
573     * implementation likely won't fill full blocks consisting of - by
574     * default - ten records either so we probably have already read
575     * beyond the archive anyway.</p>
576     *
577     * @throws IOException if reading the record of resetting the position in the archive fails
578     */
579    private void tryToConsumeSecondEOFRecord() throws IOException {
580        boolean shouldReset = true;
581        try {
582            shouldReset = !isEOFRecord(readRecord());
583        } finally {
584            if (shouldReset) {
585                archive.position(archive.position() - recordSize);
586            }
587        }
588    }
589
590    /**
591     * This method is invoked once the end of the archive is hit, it
592     * tries to consume the remaining bytes under the assumption that
593     * the tool creating this archive has padded the last block.
594     */
595    private void consumeRemainderOfLastBlock() throws IOException {
596        final long bytesReadOfLastBlock = archive.position() % blockSize;
597        if (bytesReadOfLastBlock > 0) {
598            repositionForwardBy(blockSize - bytesReadOfLastBlock);
599        }
600    }
601
602    /**
603     * Read a record from the input stream and return the data.
604     *
605     * @return The record data or null if EOF has been hit.
606     * @throws IOException if reading from the archive fails
607     */
608    private ByteBuffer readRecord() throws IOException {
609        recordBuffer.rewind();
610        final int readNow = archive.read(recordBuffer);
611        if (readNow != recordSize) {
612            return null;
613        }
614        return recordBuffer;
615    }
616
617    /**
618     * Get all TAR Archive Entries from the TarFile
619     *
620     * @return All entries from the tar file
621     */
622    public List<TarArchiveEntry> getEntries() {
623        return new ArrayList<>(entries);
624    }
625
626    private boolean isEOFRecord(final ByteBuffer headerBuf) {
627        return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
628    }
629
630    protected final boolean isAtEOF() {
631        return hasHitEOF;
632    }
633
634    protected final void setAtEOF(final boolean b) {
635        hasHitEOF = b;
636    }
637
638    private boolean isDirectory() {
639        return currEntry != null && currEntry.isDirectory();
640    }
641
642    /**
643     * Gets the input stream for the provided Tar Archive Entry.
644     * @param entry Entry to get the input stream from
645     * @return Input stream of the provided entry
646     * @throws IOException Corrupted TAR archive. Can't read entry.
647     */
648    public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
649        try {
650            return new BoundedTarEntryInputStream(entry, archive);
651        } catch (RuntimeException ex) {
652            throw new IOException("Corrupted TAR archive. Can't read entry", ex);
653        }
654    }
655
656    @Override
657    public void close() throws IOException {
658        archive.close();
659    }
660
661    private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
662
663        private final SeekableByteChannel channel;
664
665        private final TarArchiveEntry entry;
666
667        private long entryOffset;
668
669        private int currentSparseInputStreamIndex;
670
671        BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
672            super(entry.getDataOffset(), entry.getRealSize());
673            if (channel.size() - entry.getSize() < entry.getDataOffset()) {
674                throw new IOException("entry size exceeds archive size");
675            }
676            this.entry = entry;
677            this.channel = channel;
678        }
679
680        @Override
681        protected int read(final long pos, final ByteBuffer buf) throws IOException {
682            if (entryOffset >= entry.getRealSize()) {
683                return -1;
684            }
685
686            final int totalRead;
687            if (entry.isSparse()) {
688                totalRead = readSparse(entryOffset, buf, buf.limit());
689            } else {
690                totalRead = readArchive(pos, buf);
691            }
692
693            if (totalRead == -1) {
694                if (buf.array().length > 0) {
695                    throw new IOException("Truncated TAR archive");
696                }
697                setAtEOF(true);
698            } else {
699                entryOffset += totalRead;
700                buf.flip();
701            }
702            return totalRead;
703        }
704
705        private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
706            // if there are no actual input streams, just read from the original archive
707            final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
708            if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
709                return readArchive(entry.getDataOffset() + pos, buf);
710            }
711
712            if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
713                return -1;
714            }
715
716            final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
717            final byte[] bufArray = new byte[numToRead];
718            final int readLen = currentInputStream.read(bufArray);
719            if (readLen != -1) {
720                buf.put(bufArray, 0, readLen);
721            }
722
723            // if the current input stream is the last input stream,
724            // just return the number of bytes read from current input stream
725            if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
726                return readLen;
727            }
728
729            // if EOF of current input stream is meet, open a new input stream and recursively call read
730            if (readLen == -1) {
731                currentSparseInputStreamIndex++;
732                return readSparse(pos, buf, numToRead);
733            }
734
735            // if the rest data of current input stream is not long enough, open a new input stream
736            // and recursively call read
737            if (readLen < numToRead) {
738                currentSparseInputStreamIndex++;
739                final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
740                if (readLenOfNext == -1) {
741                    return readLen;
742                }
743
744                return readLen + readLenOfNext;
745            }
746
747            // if the rest data of current input stream is enough(which means readLen == len), just return readLen
748            return readLen;
749        }
750
751        private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
752            channel.position(pos);
753            return channel.read(buf);
754        }
755    }
756}