001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.activemq.store.kahadb.disk.journal;
018
019import java.io.EOFException;
020import java.io.File;
021import java.io.FileNotFoundException;
022import java.io.FilenameFilter;
023import java.io.IOException;
024import java.io.RandomAccessFile;
025import java.io.UnsupportedEncodingException;
026import java.nio.ByteBuffer;
027import java.nio.channels.ClosedByInterruptException;
028import java.nio.channels.FileChannel;
029import java.util.Collections;
030import java.util.HashMap;
031import java.util.Iterator;
032import java.util.LinkedHashMap;
033import java.util.LinkedList;
034import java.util.Map;
035import java.util.Set;
036import java.util.TreeMap;
037import java.util.concurrent.ConcurrentHashMap;
038import java.util.concurrent.Executors;
039import java.util.concurrent.Future;
040import java.util.concurrent.ScheduledExecutorService;
041import java.util.concurrent.ScheduledFuture;
042import java.util.concurrent.ThreadFactory;
043import java.util.concurrent.TimeUnit;
044import java.util.concurrent.atomic.AtomicLong;
045import java.util.concurrent.atomic.AtomicReference;
046import java.util.zip.Adler32;
047import java.util.zip.Checksum;
048
049import org.apache.activemq.store.kahadb.disk.util.LinkedNode;
050import org.apache.activemq.store.kahadb.disk.util.LinkedNodeList;
051import org.apache.activemq.store.kahadb.disk.util.Sequence;
052import org.apache.activemq.util.ByteSequence;
053import org.apache.activemq.util.DataByteArrayInputStream;
054import org.apache.activemq.util.DataByteArrayOutputStream;
055import org.apache.activemq.util.IOHelper;
056import org.apache.activemq.util.RecoverableRandomAccessFile;
057import org.apache.activemq.util.ThreadPoolUtils;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061/**
062 * Manages DataFiles
063 */
064public class Journal {
065    public static final String CALLER_BUFFER_APPENDER = "org.apache.kahadb.journal.CALLER_BUFFER_APPENDER";
066    public static final boolean callerBufferAppender = Boolean.parseBoolean(System.getProperty(CALLER_BUFFER_APPENDER, "false"));
067
068    private static final int PREALLOC_CHUNK_SIZE = 1024*1024;
069
070    // ITEM_HEAD_SPACE = length + type+ reserved space + SOR
071    public static final int RECORD_HEAD_SPACE = 4 + 1;
072
073    public static final byte USER_RECORD_TYPE = 1;
074    public static final byte BATCH_CONTROL_RECORD_TYPE = 2;
075    // Batch Control Item holds a 4 byte size of the batch and a 8 byte checksum of the batch.
076    public static final byte[] BATCH_CONTROL_RECORD_MAGIC = bytes("WRITE BATCH");
077    public static final int BATCH_CONTROL_RECORD_SIZE = RECORD_HEAD_SPACE + BATCH_CONTROL_RECORD_MAGIC.length + 4 + 8;
078    public static final byte[] BATCH_CONTROL_RECORD_HEADER = createBatchControlRecordHeader();
079    public static final byte[] EMPTY_BATCH_CONTROL_RECORD = createEmptyBatchControlRecordHeader();
080    public static final int EOF_INT = ByteBuffer.wrap(new byte[]{'-', 'q', 'M', 'a'}).getInt();
081    public static final byte EOF_EOT = '4';
082    public static final byte[] EOF_RECORD = createEofBatchAndLocationRecord();
083
084    private ScheduledExecutorService scheduler;
085
086    // tackle corruption when checksum is disabled or corrupt with zeros, minimize data loss
087    public void corruptRecoveryLocation(Location recoveryPosition) throws IOException {
088        DataFile dataFile = getDataFile(recoveryPosition);
089        // with corruption on recovery we have no faith in the content - slip to the next batch record or eof
090        DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile);
091        try {
092            RandomAccessFile randomAccessFile = reader.getRaf().getRaf();
093            randomAccessFile.seek(recoveryPosition.getOffset() + 1);
094            byte[] data = new byte[getWriteBatchSize()];
095            ByteSequence bs = new ByteSequence(data, 0, randomAccessFile.read(data));
096            int nextOffset = 0;
097            if (findNextBatchRecord(bs, randomAccessFile) >= 0) {
098                nextOffset = Math.toIntExact(randomAccessFile.getFilePointer() - bs.remaining());
099            } else {
100                nextOffset = Math.toIntExact(randomAccessFile.length());
101            }
102            Sequence sequence = new Sequence(recoveryPosition.getOffset(), nextOffset - 1);
103            LOG.warn("Corrupt journal records found in '{}' between offsets: {}", dataFile.getFile(), sequence);
104
105            // skip corruption on getNextLocation
106            recoveryPosition.setOffset(nextOffset);
107            recoveryPosition.setSize(-1);
108
109            dataFile.corruptedBlocks.add(sequence);
110        } catch (IOException e) {
111        } finally {
112            accessorPool.closeDataFileAccessor(reader);
113        }
114    }
115
116    public DataFileAccessorPool getAccessorPool() {
117        return accessorPool;
118    }
119
120    public void allowIOResumption() {
121        if (appender instanceof DataFileAppender) {
122            DataFileAppender dataFileAppender = (DataFileAppender)appender;
123            dataFileAppender.shutdown = false;
124        }
125    }
126
127    public void setCleanupInterval(long cleanupInterval) {
128        this.cleanupInterval = cleanupInterval;
129    }
130
131    public long getCleanupInterval() {
132        return cleanupInterval;
133    }
134
135    public enum PreallocationStrategy {
136        SPARSE_FILE,
137        OS_KERNEL_COPY,
138        ZEROS,
139        CHUNKED_ZEROS;
140    }
141
142    public enum PreallocationScope {
143        ENTIRE_JOURNAL,
144        ENTIRE_JOURNAL_ASYNC,
145        NONE;
146    }
147
148    public enum JournalDiskSyncStrategy {
149        ALWAYS,
150        PERIODIC,
151        NEVER;
152    }
153
154    private static byte[] createBatchControlRecordHeader() {
155        try (DataByteArrayOutputStream os = new DataByteArrayOutputStream();) {
156            os.writeInt(BATCH_CONTROL_RECORD_SIZE);
157            os.writeByte(BATCH_CONTROL_RECORD_TYPE);
158            os.write(BATCH_CONTROL_RECORD_MAGIC);
159            ByteSequence sequence = os.toByteSequence();
160            sequence.compact();
161            return sequence.getData();
162        } catch (IOException e) {
163            throw new RuntimeException("Could not create batch control record header.", e);
164        }
165    }
166
167    private static byte[] createEmptyBatchControlRecordHeader() {
168        try (DataByteArrayOutputStream os = new DataByteArrayOutputStream();) {
169            os.writeInt(BATCH_CONTROL_RECORD_SIZE);
170            os.writeByte(BATCH_CONTROL_RECORD_TYPE);
171            os.write(BATCH_CONTROL_RECORD_MAGIC);
172            os.writeInt(0);
173            os.writeLong(0l);
174            ByteSequence sequence = os.toByteSequence();
175            sequence.compact();
176            return sequence.getData();
177        } catch (IOException e) {
178            throw new RuntimeException("Could not create empty batch control record header.", e);
179        }
180    }
181
182    private static byte[] createEofBatchAndLocationRecord() {
183        try (DataByteArrayOutputStream os = new DataByteArrayOutputStream();) {
184            os.writeInt(EOF_INT);
185            os.writeByte(EOF_EOT);
186            ByteSequence sequence = os.toByteSequence();
187            sequence.compact();
188            return sequence.getData();
189        } catch (IOException e) {
190            throw new RuntimeException("Could not create eof header.", e);
191        }
192    }
193
194    public static final String DEFAULT_DIRECTORY = ".";
195    public static final String DEFAULT_ARCHIVE_DIRECTORY = "data-archive";
196    public static final String DEFAULT_FILE_PREFIX = "db-";
197    public static final String DEFAULT_FILE_SUFFIX = ".log";
198    public static final int DEFAULT_MAX_FILE_LENGTH = 1024 * 1024 * 32;
199    public static final int DEFAULT_CLEANUP_INTERVAL = 1000 * 30;
200    public static final int DEFAULT_MAX_WRITE_BATCH_SIZE = 1024 * 1024 * 4;
201
202    private static final Logger LOG = LoggerFactory.getLogger(Journal.class);
203
204    protected final Map<WriteKey, WriteCommand> inflightWrites = new ConcurrentHashMap<WriteKey, WriteCommand>();
205
206    protected File directory = new File(DEFAULT_DIRECTORY);
207    protected File directoryArchive;
208    private boolean directoryArchiveOverridden = false;
209
210    protected String filePrefix = DEFAULT_FILE_PREFIX;
211    protected String fileSuffix = DEFAULT_FILE_SUFFIX;
212    protected boolean started;
213
214    protected int maxFileLength = DEFAULT_MAX_FILE_LENGTH;
215    protected int writeBatchSize = DEFAULT_MAX_WRITE_BATCH_SIZE;
216
217    protected FileAppender appender;
218    protected DataFileAccessorPool accessorPool;
219
220    protected Map<Integer, DataFile> fileMap = new HashMap<Integer, DataFile>();
221    protected Map<File, DataFile> fileByFileMap = new LinkedHashMap<File, DataFile>();
222    protected LinkedNodeList<DataFile> dataFiles = new LinkedNodeList<DataFile>();
223
224    protected final AtomicReference<Location> lastAppendLocation = new AtomicReference<Location>();
225    protected ScheduledFuture cleanupTask;
226    protected AtomicLong totalLength = new AtomicLong();
227    protected boolean archiveDataLogs;
228    private ReplicationTarget replicationTarget;
229    protected boolean checksum;
230    protected boolean checkForCorruptionOnStartup;
231    protected boolean enableAsyncDiskSync = true;
232    private int nextDataFileId = 1;
233    private Object dataFileIdLock = new Object();
234    private final AtomicReference<DataFile> currentDataFile = new AtomicReference<>(null);
235    private volatile DataFile nextDataFile;
236
237    protected PreallocationScope preallocationScope = PreallocationScope.ENTIRE_JOURNAL;
238    protected PreallocationStrategy preallocationStrategy = PreallocationStrategy.SPARSE_FILE;
239    private File osKernelCopyTemplateFile = null;
240    private ByteBuffer preAllocateDirectBuffer = null;
241    private long cleanupInterval = DEFAULT_CLEANUP_INTERVAL;
242
243    protected JournalDiskSyncStrategy journalDiskSyncStrategy = JournalDiskSyncStrategy.ALWAYS;
244
245    public interface DataFileRemovedListener {
246        void fileRemoved(DataFile datafile);
247    }
248
249    private DataFileRemovedListener dataFileRemovedListener;
250
251    public synchronized void start() throws IOException {
252        if (started) {
253            return;
254        }
255
256        long start = System.currentTimeMillis();
257        accessorPool = new DataFileAccessorPool(this);
258        started = true;
259
260        appender = callerBufferAppender ? new CallerBufferingDataFileAppender(this) : new DataFileAppender(this);
261
262        File[] files = directory.listFiles(new FilenameFilter() {
263            @Override
264            public boolean accept(File dir, String n) {
265                return dir.equals(directory) && n.startsWith(filePrefix) && n.endsWith(fileSuffix);
266            }
267        });
268
269        if (files != null) {
270            for (File file : files) {
271                try {
272                    String n = file.getName();
273                    String numStr = n.substring(filePrefix.length(), n.length()-fileSuffix.length());
274                    int num = Integer.parseInt(numStr);
275                    DataFile dataFile = new DataFile(file, num);
276                    fileMap.put(dataFile.getDataFileId(), dataFile);
277                    totalLength.addAndGet(dataFile.getLength());
278                } catch (NumberFormatException e) {
279                    // Ignore file that do not match the pattern.
280                }
281            }
282
283            // Sort the list so that we can link the DataFiles together in the
284            // right order.
285            LinkedList<DataFile> l = new LinkedList<>(fileMap.values());
286            Collections.sort(l);
287            for (DataFile df : l) {
288                if (df.getLength() == 0) {
289                    // possibly the result of a previous failed write
290                    LOG.info("ignoring zero length, partially initialised journal data file: " + df);
291                    continue;
292                } else if (l.getLast().equals(df) && isUnusedPreallocated(df)) {
293                    continue;
294                }
295                dataFiles.addLast(df);
296                fileByFileMap.put(df.getFile(), df);
297
298                if( isCheckForCorruptionOnStartup() ) {
299                    lastAppendLocation.set(recoveryCheck(df));
300                }
301            }
302        }
303
304        if (preallocationScope != PreallocationScope.NONE) {
305            switch (preallocationStrategy) {
306                case SPARSE_FILE:
307                    break;
308                case OS_KERNEL_COPY: {
309                    osKernelCopyTemplateFile = createJournalTemplateFile();
310                }
311                break;
312                case CHUNKED_ZEROS: {
313                    preAllocateDirectBuffer = allocateDirectBuffer(PREALLOC_CHUNK_SIZE);
314                }
315                break;
316                case ZEROS: {
317                    preAllocateDirectBuffer = allocateDirectBuffer(getMaxFileLength());
318                }
319                break;
320            }
321        }
322        scheduler = Executors.newScheduledThreadPool(1, new ThreadFactory() {
323            @Override
324            public Thread newThread(Runnable r) {
325                Thread schedulerThread = new Thread(r);
326                schedulerThread.setName("ActiveMQ Journal Scheduled executor");
327                schedulerThread.setDaemon(true);
328                return schedulerThread;
329            }
330        });
331
332        // init current write file
333        if (dataFiles.isEmpty()) {
334            nextDataFileId = 1;
335            rotateWriteFile();
336        } else {
337            currentDataFile.set(dataFiles.getTail());
338            nextDataFileId = currentDataFile.get().dataFileId + 1;
339        }
340
341        if( lastAppendLocation.get()==null ) {
342            DataFile df = dataFiles.getTail();
343            lastAppendLocation.set(recoveryCheck(df));
344        }
345
346        // ensure we don't report unused space of last journal file in size metric
347        int lastFileLength = dataFiles.getTail().getLength();
348        if (totalLength.get() > lastFileLength && lastAppendLocation.get().getOffset() > 0) {
349            totalLength.addAndGet(lastAppendLocation.get().getOffset() - lastFileLength);
350        }
351
352        cleanupTask = scheduler.scheduleAtFixedRate(new Runnable() {
353            @Override
354            public void run() {
355                cleanup();
356            }
357        }, cleanupInterval, cleanupInterval, TimeUnit.MILLISECONDS);
358
359        long end = System.currentTimeMillis();
360        LOG.trace("Startup took: "+(end-start)+" ms");
361    }
362
363    private ByteBuffer allocateDirectBuffer(int size) {
364        ByteBuffer buffer = ByteBuffer.allocateDirect(size);
365        buffer.put(EOF_RECORD);
366        return buffer;
367    }
368
369    public void preallocateEntireJournalDataFile(RecoverableRandomAccessFile file) {
370
371        if (PreallocationScope.NONE != preallocationScope) {
372
373            try {
374                if (PreallocationStrategy.OS_KERNEL_COPY == preallocationStrategy) {
375                    doPreallocationKernelCopy(file);
376                } else if (PreallocationStrategy.ZEROS == preallocationStrategy) {
377                    doPreallocationZeros(file);
378                } else if (PreallocationStrategy.CHUNKED_ZEROS == preallocationStrategy) {
379                    doPreallocationChunkedZeros(file);
380                } else {
381                    doPreallocationSparseFile(file);
382                }
383            } catch (Throwable continueWithNoPrealloc) {
384                // error on preallocation is non fatal, and we don't want to leak the journal handle
385                LOG.error("cound not preallocate journal data file", continueWithNoPrealloc);
386            }
387        }
388    }
389
390    private void doPreallocationSparseFile(RecoverableRandomAccessFile file) {
391        final ByteBuffer journalEof = ByteBuffer.wrap(EOF_RECORD);
392        try {
393            FileChannel channel = file.getChannel();
394            channel.position(0);
395            channel.write(journalEof);
396            channel.position(maxFileLength - 5);
397            journalEof.rewind();
398            channel.write(journalEof);
399            channel.force(false);
400            channel.position(0);
401        } catch (ClosedByInterruptException ignored) {
402            LOG.trace("Could not preallocate journal file with sparse file", ignored);
403        } catch (IOException e) {
404            LOG.error("Could not preallocate journal file with sparse file", e);
405        }
406    }
407
408    private void doPreallocationZeros(RecoverableRandomAccessFile file) {
409        preAllocateDirectBuffer.rewind();
410        try {
411            FileChannel channel = file.getChannel();
412            channel.write(preAllocateDirectBuffer);
413            channel.force(false);
414            channel.position(0);
415        } catch (ClosedByInterruptException ignored) {
416            LOG.trace("Could not preallocate journal file with zeros", ignored);
417        } catch (IOException e) {
418            LOG.error("Could not preallocate journal file with zeros", e);
419        }
420    }
421
422    private void doPreallocationKernelCopy(RecoverableRandomAccessFile file) {
423        try (RandomAccessFile templateRaf = new RandomAccessFile(osKernelCopyTemplateFile, "rw");){
424            templateRaf.getChannel().transferTo(0, getMaxFileLength(), file.getChannel());
425        } catch (ClosedByInterruptException ignored) {
426            LOG.trace("Could not preallocate journal file with kernel copy", ignored);
427        } catch (FileNotFoundException e) {
428            LOG.error("Could not find the template file on disk at " + osKernelCopyTemplateFile.getAbsolutePath(), e);
429        } catch (IOException e) {
430            LOG.error("Could not transfer the template file to journal, transferFile=" + osKernelCopyTemplateFile.getAbsolutePath(), e);
431        }
432    }
433
434    private File createJournalTemplateFile() {
435        String fileName = "db-log.template";
436        File rc = new File(directory, fileName);
437        try (RandomAccessFile templateRaf = new RandomAccessFile(rc, "rw");) {
438            templateRaf.getChannel().write(ByteBuffer.wrap(EOF_RECORD));
439            templateRaf.setLength(maxFileLength);
440            templateRaf.getChannel().force(true);
441        } catch (FileNotFoundException e) {
442            LOG.error("Could not find the template file on disk at " + osKernelCopyTemplateFile.getAbsolutePath(), e);
443        } catch (IOException e) {
444            LOG.error("Could not transfer the template file to journal, transferFile=" + osKernelCopyTemplateFile.getAbsolutePath(), e);
445        }
446        return rc;
447    }
448
449    private void doPreallocationChunkedZeros(RecoverableRandomAccessFile file) {
450        preAllocateDirectBuffer.limit(preAllocateDirectBuffer.capacity());
451        preAllocateDirectBuffer.rewind();
452        try {
453            FileChannel channel = file.getChannel();
454
455            int remLen = maxFileLength;
456            while (remLen > 0) {
457                if (remLen < preAllocateDirectBuffer.remaining()) {
458                    preAllocateDirectBuffer.limit(remLen);
459                }
460                int writeLen = channel.write(preAllocateDirectBuffer);
461                remLen -= writeLen;
462                preAllocateDirectBuffer.rewind();
463            }
464
465            channel.force(false);
466            channel.position(0);
467        } catch (ClosedByInterruptException ignored) {
468            LOG.trace("Could not preallocate journal file with zeros", ignored);
469        } catch (IOException e) {
470            LOG.error("Could not preallocate journal file with zeros! Will continue without preallocation", e);
471        }
472    }
473
474    private static byte[] bytes(String string) {
475        try {
476            return string.getBytes("UTF-8");
477        } catch (UnsupportedEncodingException e) {
478            throw new RuntimeException(e);
479        }
480    }
481
482    public boolean isUnusedPreallocated(DataFile dataFile) throws IOException {
483        if (preallocationScope == PreallocationScope.ENTIRE_JOURNAL_ASYNC) {
484            DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile);
485            try {
486                byte[] firstFewBytes = new byte[BATCH_CONTROL_RECORD_HEADER.length];
487                reader.readFully(0, firstFewBytes);
488                ByteSequence bs = new ByteSequence(firstFewBytes);
489                return bs.startsWith(EOF_RECORD);
490            } catch (Exception ignored) {
491            } finally {
492                accessorPool.closeDataFileAccessor(reader);
493            }
494        }
495        return false;
496    }
497
498    protected Location recoveryCheck(DataFile dataFile) throws IOException {
499        Location location = new Location();
500        location.setDataFileId(dataFile.getDataFileId());
501        location.setOffset(0);
502
503        DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile);
504        try {
505            RandomAccessFile randomAccessFile = reader.getRaf().getRaf();
506            randomAccessFile.seek(0);
507            final long totalFileLength = randomAccessFile.length();
508            byte[] data = new byte[getWriteBatchSize()];
509            ByteSequence bs = new ByteSequence(data, 0, randomAccessFile.read(data));
510
511            while (true) {
512                int size = checkBatchRecord(bs, randomAccessFile);
513                if (size > 0 && location.getOffset() + BATCH_CONTROL_RECORD_SIZE + size <= totalFileLength) {
514                    location.setOffset(location.getOffset() + BATCH_CONTROL_RECORD_SIZE + size);
515                } else if (size == 0 && location.getOffset() + EOF_RECORD.length + size <= totalFileLength) {
516                    // eof batch record
517                    break;
518                } else  {
519                    // track corruption and skip if possible
520                    Sequence sequence = new Sequence(location.getOffset());
521                    if (findNextBatchRecord(bs, randomAccessFile) >= 0) {
522                        int nextOffset = Math.toIntExact(randomAccessFile.getFilePointer() - bs.remaining());
523                        sequence.setLast(nextOffset - 1);
524                        dataFile.corruptedBlocks.add(sequence);
525                        LOG.warn("Corrupt journal records found in '{}' between offsets: {}", dataFile.getFile(), sequence);
526                        location.setOffset(nextOffset);
527                    } else {
528                        // corruption to eof, don't loose track of this corruption, don't truncate
529                        sequence.setLast(Math.toIntExact(randomAccessFile.getFilePointer()));
530                        dataFile.corruptedBlocks.add(sequence);
531                        LOG.warn("Corrupt journal records found in '{}' from offset: {} to EOF", dataFile.getFile(), sequence);
532                        break;
533                    }
534                }
535            }
536
537        } catch (IOException e) {
538            LOG.trace("exception on recovery check of: " + dataFile + ", at " + location, e);
539        } finally {
540            accessorPool.closeDataFileAccessor(reader);
541        }
542
543        int existingLen = dataFile.getLength();
544        dataFile.setLength(location.getOffset());
545        if (existingLen > dataFile.getLength()) {
546            totalLength.addAndGet(dataFile.getLength() - existingLen);
547        }
548        return location;
549    }
550
551    private int findNextBatchRecord(ByteSequence bs, RandomAccessFile reader) throws IOException {
552        final ByteSequence header = new ByteSequence(BATCH_CONTROL_RECORD_HEADER);
553        int pos = 0;
554        while (true) {
555            pos = bs.indexOf(header, 0);
556            if (pos >= 0) {
557                bs.setOffset(bs.offset + pos);
558                return pos;
559            } else {
560                // need to load the next data chunck in..
561                if (bs.length != bs.data.length) {
562                    // If we had a short read then we were at EOF
563                    return -1;
564                }
565                bs.setOffset(bs.length - BATCH_CONTROL_RECORD_HEADER.length);
566                bs.reset();
567                bs.setLength(bs.length + reader.read(bs.data, bs.length, bs.data.length - BATCH_CONTROL_RECORD_HEADER.length));
568            }
569        }
570    }
571
572    private int checkBatchRecord(ByteSequence bs, RandomAccessFile reader) throws IOException {
573        ensureAvailable(bs, reader, EOF_RECORD.length);
574        if (bs.startsWith(EOF_RECORD)) {
575            return 0; // eof
576        }
577        ensureAvailable(bs, reader, BATCH_CONTROL_RECORD_SIZE);
578        try (DataByteArrayInputStream controlIs = new DataByteArrayInputStream(bs)) {
579
580            // Assert that it's a batch record.
581            for (int i = 0; i < BATCH_CONTROL_RECORD_HEADER.length; i++) {
582                if (controlIs.readByte() != BATCH_CONTROL_RECORD_HEADER[i]) {
583                    return -1;
584                }
585            }
586
587            int size = controlIs.readInt();
588            if (size < 0 || size > Integer.MAX_VALUE - (BATCH_CONTROL_RECORD_SIZE + EOF_RECORD.length)) {
589                return -2;
590            }
591
592            long expectedChecksum = controlIs.readLong();
593            Checksum checksum = null;
594            if (isChecksum() && expectedChecksum > 0) {
595                checksum = new Adler32();
596            }
597
598            // revert to bs to consume data
599            bs.setOffset(controlIs.position());
600            int toRead = size;
601            while (toRead > 0) {
602                if (bs.remaining() >= toRead) {
603                    if (checksum != null) {
604                        checksum.update(bs.getData(), bs.getOffset(), toRead);
605                    }
606                    bs.setOffset(bs.offset + toRead);
607                    toRead = 0;
608                } else {
609                    if (bs.length != bs.data.length) {
610                        // buffer exhausted
611                        return  -3;
612                    }
613
614                    toRead -= bs.remaining();
615                    if (checksum != null) {
616                        checksum.update(bs.getData(), bs.getOffset(), bs.remaining());
617                    }
618                    bs.setLength(reader.read(bs.data));
619                    bs.setOffset(0);
620                }
621            }
622            if (checksum != null && expectedChecksum != checksum.getValue()) {
623                return -4;
624            }
625
626            return size;
627        }
628    }
629
630    private void ensureAvailable(ByteSequence bs, RandomAccessFile reader, int required) throws IOException {
631        if (bs.remaining() < required) {
632            bs.reset();
633            int read = reader.read(bs.data, bs.length, bs.data.length - bs.length);
634            if (read < 0) {
635                if (bs.remaining() == 0) {
636                    throw new EOFException("request for " + required + " bytes reached EOF");
637                }
638            }
639            bs.setLength(bs.length + read);
640        }
641    }
642
643    void addToTotalLength(int size) {
644        totalLength.addAndGet(size);
645    }
646
647    public long length() {
648        return totalLength.get();
649    }
650
651    public void rotateWriteFile() throws IOException {
652       synchronized (dataFileIdLock) {
653            DataFile dataFile = nextDataFile;
654            if (dataFile == null) {
655                dataFile = newDataFile();
656            }
657            synchronized (currentDataFile) {
658                fileMap.put(dataFile.getDataFileId(), dataFile);
659                fileByFileMap.put(dataFile.getFile(), dataFile);
660                dataFiles.addLast(dataFile);
661                currentDataFile.set(dataFile);
662            }
663            nextDataFile = null;
664        }
665        if (PreallocationScope.ENTIRE_JOURNAL_ASYNC == preallocationScope) {
666            preAllocateNextDataFileFuture = scheduler.submit(preAllocateNextDataFileTask);
667        }
668    }
669
670    private Runnable preAllocateNextDataFileTask = new Runnable() {
671        @Override
672        public void run() {
673            if (nextDataFile == null) {
674                synchronized (dataFileIdLock){
675                    try {
676                        nextDataFile = newDataFile();
677                    } catch (IOException e) {
678                        LOG.warn("Failed to proactively allocate data file", e);
679                    }
680                }
681            }
682        }
683    };
684
685    private volatile Future preAllocateNextDataFileFuture;
686
687    private DataFile newDataFile() throws IOException {
688        int nextNum = nextDataFileId++;
689        File file = getFile(nextNum);
690        DataFile nextWriteFile = new DataFile(file, nextNum);
691        preallocateEntireJournalDataFile(nextWriteFile.appendRandomAccessFile());
692        return nextWriteFile;
693    }
694
695
696    public DataFile reserveDataFile() {
697        synchronized (dataFileIdLock) {
698            int nextNum = nextDataFileId++;
699            File file = getFile(nextNum);
700            DataFile reservedDataFile = new DataFile(file, nextNum);
701            synchronized (currentDataFile) {
702                fileMap.put(reservedDataFile.getDataFileId(), reservedDataFile);
703                fileByFileMap.put(file, reservedDataFile);
704                if (dataFiles.isEmpty()) {
705                    dataFiles.addLast(reservedDataFile);
706                } else {
707                    dataFiles.getTail().linkBefore(reservedDataFile);
708                }
709            }
710            return reservedDataFile;
711        }
712    }
713
714    public File getFile(int nextNum) {
715        String fileName = filePrefix + nextNum + fileSuffix;
716        File file = new File(directory, fileName);
717        return file;
718    }
719
720    DataFile getDataFile(Location item) throws IOException {
721        Integer key = Integer.valueOf(item.getDataFileId());
722        DataFile dataFile = null;
723        synchronized (currentDataFile) {
724            dataFile = fileMap.get(key);
725        }
726        if (dataFile == null) {
727            LOG.error("Looking for key " + key + " but not found in fileMap: " + fileMap);
728            throw new IOException("Could not locate data file " + getFile(item.getDataFileId()));
729        }
730        return dataFile;
731    }
732
733    public void close() throws IOException {
734        synchronized (this) {
735            if (!started) {
736                return;
737            }
738            cleanupTask.cancel(true);
739            if (preAllocateNextDataFileFuture != null) {
740                preAllocateNextDataFileFuture.cancel(true);
741            }
742            ThreadPoolUtils.shutdownGraceful(scheduler, 4000);
743            accessorPool.close();
744        }
745        // the appender can be calling back to to the journal blocking a close AMQ-5620
746        appender.close();
747        synchronized (currentDataFile) {
748            fileMap.clear();
749            fileByFileMap.clear();
750            dataFiles.clear();
751            lastAppendLocation.set(null);
752            started = false;
753        }
754    }
755
756    public synchronized void cleanup() {
757        if (accessorPool != null) {
758            accessorPool.disposeUnused();
759        }
760    }
761
762    public synchronized boolean delete() throws IOException {
763
764        // Close all open file handles...
765        appender.close();
766        accessorPool.close();
767
768        boolean result = true;
769        for (Iterator<DataFile> i = fileMap.values().iterator(); i.hasNext();) {
770            DataFile dataFile = i.next();
771            result &= dataFile.delete();
772        }
773
774        if (preAllocateNextDataFileFuture != null) {
775            preAllocateNextDataFileFuture.cancel(true);
776        }
777        synchronized (dataFileIdLock) {
778            if (nextDataFile != null) {
779                nextDataFile.delete();
780                nextDataFile = null;
781            }
782        }
783
784        totalLength.set(0);
785        synchronized (currentDataFile) {
786            fileMap.clear();
787            fileByFileMap.clear();
788            lastAppendLocation.set(null);
789            dataFiles = new LinkedNodeList<DataFile>();
790        }
791        // reopen open file handles...
792        accessorPool = new DataFileAccessorPool(this);
793        appender = new DataFileAppender(this);
794        return result;
795    }
796
797    public void removeDataFiles(Set<Integer> files) throws IOException {
798        for (Integer key : files) {
799            // Can't remove the data file (or subsequent files) that is currently being written to.
800            if (key >= lastAppendLocation.get().getDataFileId()) {
801                continue;
802            }
803            DataFile dataFile = null;
804            synchronized (currentDataFile) {
805                dataFile = fileMap.remove(key);
806                if (dataFile != null) {
807                    fileByFileMap.remove(dataFile.getFile());
808                    dataFile.unlink();
809                }
810            }
811            if (dataFile != null) {
812                forceRemoveDataFile(dataFile);
813            }
814        }
815    }
816
817    private void forceRemoveDataFile(DataFile dataFile) throws IOException {
818        accessorPool.disposeDataFileAccessors(dataFile);
819        totalLength.addAndGet(-dataFile.getLength());
820        if (archiveDataLogs) {
821            File directoryArchive = getDirectoryArchive();
822            if (directoryArchive.exists()) {
823                LOG.debug("Archive directory exists: {}", directoryArchive);
824            } else {
825                if (directoryArchive.isAbsolute())
826                if (LOG.isDebugEnabled()) {
827                    LOG.debug("Archive directory [{}] does not exist - creating it now",
828                            directoryArchive.getAbsolutePath());
829                }
830                IOHelper.mkdirs(directoryArchive);
831            }
832            LOG.debug("Moving data file {} to {} ", dataFile, directoryArchive.getCanonicalPath());
833            dataFile.move(directoryArchive);
834            LOG.debug("Successfully moved data file");
835        } else {
836            LOG.debug("Deleting data file: {}", dataFile);
837            if (dataFile.delete()) {
838                LOG.debug("Discarded data file: {}", dataFile);
839            } else {
840                LOG.warn("Failed to discard data file : {}", dataFile.getFile());
841            }
842        }
843        if (dataFileRemovedListener != null) {
844            dataFileRemovedListener.fileRemoved(dataFile);
845        }
846    }
847
848    /**
849     * @return the maxFileLength
850     */
851    public int getMaxFileLength() {
852        return maxFileLength;
853    }
854
855    /**
856     * @param maxFileLength the maxFileLength to set
857     */
858    public void setMaxFileLength(int maxFileLength) {
859        this.maxFileLength = maxFileLength;
860    }
861
862    @Override
863    public String toString() {
864        return directory.toString();
865    }
866
867    public Location getNextLocation(Location location) throws IOException, IllegalStateException {
868        return getNextLocation(location, null);
869    }
870
871    public Location getNextLocation(Location location, Location limit) throws IOException, IllegalStateException {
872        Location cur = null;
873        while (true) {
874            if (cur == null) {
875                if (location == null) {
876                    DataFile head = null;
877                    synchronized (currentDataFile) {
878                        head = dataFiles.getHead();
879                    }
880                    if (head == null) {
881                        return null;
882                    }
883                    cur = new Location();
884                    cur.setDataFileId(head.getDataFileId());
885                    cur.setOffset(0);
886                } else {
887                    // Set to the next offset..
888                    if (location.getSize() == -1) {
889                        cur = new Location(location);
890                    } else {
891                        cur = new Location(location);
892                        cur.setOffset(location.getOffset() + location.getSize());
893                    }
894                }
895            } else {
896                cur.setOffset(cur.getOffset() + cur.getSize());
897            }
898
899            DataFile dataFile = getDataFile(cur);
900
901            // Did it go into the next file??
902            if (dataFile.getLength() <= cur.getOffset()) {
903                synchronized (currentDataFile) {
904                    dataFile = dataFile.getNext();
905                }
906                if (dataFile == null) {
907                    return null;
908                } else {
909                    cur.setDataFileId(dataFile.getDataFileId().intValue());
910                    cur.setOffset(0);
911                    if (limit != null && cur.compareTo(limit) >= 0) {
912                        LOG.trace("reached limit: {} at: {}", limit, cur);
913                        return null;
914                    }
915                }
916            }
917
918            // Load in location size and type.
919            DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile);
920            try {
921                reader.readLocationDetails(cur);
922            } catch (EOFException eof) {
923                LOG.trace("EOF on next: " + location + ", cur: " + cur);
924                throw eof;
925            } finally {
926                accessorPool.closeDataFileAccessor(reader);
927            }
928
929            Sequence corruptedRange = dataFile.corruptedBlocks.get(cur.getOffset());
930            if (corruptedRange != null) {
931                // skip corruption
932                cur.setSize((int) corruptedRange.range());
933            } else if (cur.getSize() == EOF_INT && cur.getType() == EOF_EOT ||
934                    (cur.getType() == 0 && cur.getSize() == 0)) {
935                // eof - jump to next datafile
936                // EOF_INT and EOF_EOT replace 0,0 - we need to react to both for
937                // replay of existing journals
938                // possibly journal is larger than maxFileLength after config change
939                cur.setSize(EOF_RECORD.length);
940                cur.setOffset(Math.max(maxFileLength, dataFile.getLength()));
941            } else if (cur.getType() == USER_RECORD_TYPE) {
942                // Only return user records.
943                return cur;
944            }
945        }
946    }
947
948    public ByteSequence read(Location location) throws IOException, IllegalStateException {
949        DataFile dataFile = getDataFile(location);
950        DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile);
951        ByteSequence rc = null;
952        try {
953            rc = reader.readRecord(location);
954        } finally {
955            accessorPool.closeDataFileAccessor(reader);
956        }
957        return rc;
958    }
959
960    public Location write(ByteSequence data, boolean sync) throws IOException, IllegalStateException {
961        Location loc = appender.storeItem(data, Location.USER_TYPE, sync);
962        return loc;
963    }
964
965    public Location write(ByteSequence data, Runnable onComplete) throws IOException, IllegalStateException {
966        Location loc = appender.storeItem(data, Location.USER_TYPE, onComplete);
967        return loc;
968    }
969
970    public void update(Location location, ByteSequence data, boolean sync) throws IOException {
971        DataFile dataFile = getDataFile(location);
972        DataFileAccessor updater = accessorPool.openDataFileAccessor(dataFile);
973        try {
974            updater.updateRecord(location, data, sync);
975        } finally {
976            accessorPool.closeDataFileAccessor(updater);
977        }
978    }
979
980    public PreallocationStrategy getPreallocationStrategy() {
981        return preallocationStrategy;
982    }
983
984    public void setPreallocationStrategy(PreallocationStrategy preallocationStrategy) {
985        this.preallocationStrategy = preallocationStrategy;
986    }
987
988    public PreallocationScope getPreallocationScope() {
989        return preallocationScope;
990    }
991
992    public void setPreallocationScope(PreallocationScope preallocationScope) {
993        this.preallocationScope = preallocationScope;
994    }
995
996    public File getDirectory() {
997        return directory;
998    }
999
1000    public void setDirectory(File directory) {
1001        this.directory = directory;
1002    }
1003
1004    public String getFilePrefix() {
1005        return filePrefix;
1006    }
1007
1008    public void setFilePrefix(String filePrefix) {
1009        this.filePrefix = filePrefix;
1010    }
1011
1012    public Map<WriteKey, WriteCommand> getInflightWrites() {
1013        return inflightWrites;
1014    }
1015
1016    public Location getLastAppendLocation() {
1017        return lastAppendLocation.get();
1018    }
1019
1020    public void setLastAppendLocation(Location lastSyncedLocation) {
1021        this.lastAppendLocation.set(lastSyncedLocation);
1022    }
1023
1024    public File getDirectoryArchive() {
1025        if (!directoryArchiveOverridden && (directoryArchive == null)) {
1026            // create the directoryArchive relative to the journal location
1027            directoryArchive = new File(directory.getAbsolutePath() +
1028                    File.separator + DEFAULT_ARCHIVE_DIRECTORY);
1029        }
1030        return directoryArchive;
1031    }
1032
1033    public void setDirectoryArchive(File directoryArchive) {
1034        directoryArchiveOverridden = true;
1035        this.directoryArchive = directoryArchive;
1036    }
1037
1038    public boolean isArchiveDataLogs() {
1039        return archiveDataLogs;
1040    }
1041
1042    public void setArchiveDataLogs(boolean archiveDataLogs) {
1043        this.archiveDataLogs = archiveDataLogs;
1044    }
1045
1046    public DataFile getDataFileById(int dataFileId) {
1047        synchronized (currentDataFile) {
1048            return fileMap.get(Integer.valueOf(dataFileId));
1049        }
1050    }
1051
1052    public DataFile getCurrentDataFile(int capacity) throws IOException {
1053        //First just acquire the currentDataFile lock and return if no rotation needed
1054        synchronized (currentDataFile) {
1055            if (currentDataFile.get().getLength() + capacity < maxFileLength) {
1056                return currentDataFile.get();
1057            }
1058        }
1059
1060        //AMQ-6545 - if rotation needed, acquire dataFileIdLock first to prevent deadlocks
1061        //then re-check if rotation is needed
1062        synchronized (dataFileIdLock) {
1063            synchronized (currentDataFile) {
1064                if (currentDataFile.get().getLength() + capacity >= maxFileLength) {
1065                    rotateWriteFile();
1066                }
1067                return currentDataFile.get();
1068            }
1069        }
1070    }
1071
1072    public Integer getCurrentDataFileId() {
1073        synchronized (currentDataFile) {
1074            return currentDataFile.get().getDataFileId();
1075        }
1076    }
1077
1078    /**
1079     * Get a set of files - only valid after start()
1080     *
1081     * @return files currently being used
1082     */
1083    public Set<File> getFiles() {
1084        synchronized (currentDataFile) {
1085            return fileByFileMap.keySet();
1086        }
1087    }
1088
1089    public Map<Integer, DataFile> getFileMap() {
1090        synchronized (currentDataFile) {
1091            return new TreeMap<Integer, DataFile>(fileMap);
1092        }
1093    }
1094
1095    public long getDiskSize() {
1096        return totalLength.get();
1097    }
1098
1099    public void setReplicationTarget(ReplicationTarget replicationTarget) {
1100        this.replicationTarget = replicationTarget;
1101    }
1102
1103    public ReplicationTarget getReplicationTarget() {
1104        return replicationTarget;
1105    }
1106
1107    public String getFileSuffix() {
1108        return fileSuffix;
1109    }
1110
1111    public void setFileSuffix(String fileSuffix) {
1112        this.fileSuffix = fileSuffix;
1113    }
1114
1115    public boolean isChecksum() {
1116        return checksum;
1117    }
1118
1119    public void setChecksum(boolean checksumWrites) {
1120        this.checksum = checksumWrites;
1121    }
1122
1123    public boolean isCheckForCorruptionOnStartup() {
1124        return checkForCorruptionOnStartup;
1125    }
1126
1127    public void setCheckForCorruptionOnStartup(boolean checkForCorruptionOnStartup) {
1128        this.checkForCorruptionOnStartup = checkForCorruptionOnStartup;
1129    }
1130
1131    public void setWriteBatchSize(int writeBatchSize) {
1132        this.writeBatchSize = writeBatchSize;
1133    }
1134
1135    public int getWriteBatchSize() {
1136        return writeBatchSize;
1137    }
1138
1139    public void setSizeAccumulator(AtomicLong storeSizeAccumulator) {
1140       this.totalLength = storeSizeAccumulator;
1141    }
1142
1143    public void setEnableAsyncDiskSync(boolean val) {
1144        this.enableAsyncDiskSync = val;
1145    }
1146
1147    public boolean isEnableAsyncDiskSync() {
1148        return enableAsyncDiskSync;
1149    }
1150
1151    public JournalDiskSyncStrategy getJournalDiskSyncStrategy() {
1152        return journalDiskSyncStrategy;
1153    }
1154
1155    public void setJournalDiskSyncStrategy(JournalDiskSyncStrategy journalDiskSyncStrategy) {
1156        this.journalDiskSyncStrategy = journalDiskSyncStrategy;
1157    }
1158
1159    public boolean isJournalDiskSyncPeriodic() {
1160        return JournalDiskSyncStrategy.PERIODIC.equals(journalDiskSyncStrategy);
1161    }
1162
1163    public void setDataFileRemovedListener(DataFileRemovedListener dataFileRemovedListener) {
1164        this.dataFileRemovedListener = dataFileRemovedListener;
1165    }
1166
1167    public static class WriteCommand extends LinkedNode<WriteCommand> {
1168        public final Location location;
1169        public final ByteSequence data;
1170        final boolean sync;
1171        public final Runnable onComplete;
1172
1173        public WriteCommand(Location location, ByteSequence data, boolean sync) {
1174            this.location = location;
1175            this.data = data;
1176            this.sync = sync;
1177            this.onComplete = null;
1178        }
1179
1180        public WriteCommand(Location location, ByteSequence data, Runnable onComplete) {
1181            this.location = location;
1182            this.data = data;
1183            this.onComplete = onComplete;
1184            this.sync = false;
1185        }
1186    }
1187
1188    public static class WriteKey {
1189        private final int file;
1190        private final long offset;
1191        private final int hash;
1192
1193        public WriteKey(Location item) {
1194            file = item.getDataFileId();
1195            offset = item.getOffset();
1196            // TODO: see if we can build a better hash
1197            hash = (int)(file ^ offset);
1198        }
1199
1200        @Override
1201        public int hashCode() {
1202            return hash;
1203        }
1204
1205        @Override
1206        public boolean equals(Object obj) {
1207            if (obj instanceof WriteKey) {
1208                WriteKey di = (WriteKey)obj;
1209                return di.file == file && di.offset == offset;
1210            }
1211            return false;
1212        }
1213    }
1214}