001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.activemq.store.kahadb.disk.journal; 018 019import java.io.EOFException; 020import java.io.File; 021import java.io.FileNotFoundException; 022import java.io.FilenameFilter; 023import java.io.IOException; 024import java.io.RandomAccessFile; 025import java.io.UnsupportedEncodingException; 026import java.nio.ByteBuffer; 027import java.nio.channels.ClosedByInterruptException; 028import java.nio.channels.FileChannel; 029import java.util.Collections; 030import java.util.HashMap; 031import java.util.Iterator; 032import java.util.LinkedHashMap; 033import java.util.LinkedList; 034import java.util.Map; 035import java.util.Set; 036import java.util.TreeMap; 037import java.util.concurrent.ConcurrentHashMap; 038import java.util.concurrent.Executors; 039import java.util.concurrent.Future; 040import java.util.concurrent.ScheduledExecutorService; 041import java.util.concurrent.ScheduledFuture; 042import java.util.concurrent.ThreadFactory; 043import java.util.concurrent.TimeUnit; 044import java.util.concurrent.atomic.AtomicLong; 045import java.util.concurrent.atomic.AtomicReference; 046import java.util.zip.Adler32; 047import java.util.zip.Checksum; 048 049import org.apache.activemq.store.kahadb.disk.util.LinkedNode; 050import org.apache.activemq.store.kahadb.disk.util.LinkedNodeList; 051import org.apache.activemq.store.kahadb.disk.util.Sequence; 052import org.apache.activemq.util.ByteSequence; 053import org.apache.activemq.util.DataByteArrayInputStream; 054import org.apache.activemq.util.DataByteArrayOutputStream; 055import org.apache.activemq.util.IOHelper; 056import org.apache.activemq.util.RecoverableRandomAccessFile; 057import org.apache.activemq.util.ThreadPoolUtils; 058import org.slf4j.Logger; 059import org.slf4j.LoggerFactory; 060 061/** 062 * Manages DataFiles 063 */ 064public class Journal { 065 public static final String CALLER_BUFFER_APPENDER = "org.apache.kahadb.journal.CALLER_BUFFER_APPENDER"; 066 public static final boolean callerBufferAppender = Boolean.parseBoolean(System.getProperty(CALLER_BUFFER_APPENDER, "false")); 067 068 private static final int PREALLOC_CHUNK_SIZE = 1024*1024; 069 070 // ITEM_HEAD_SPACE = length + type+ reserved space + SOR 071 public static final int RECORD_HEAD_SPACE = 4 + 1; 072 073 public static final byte USER_RECORD_TYPE = 1; 074 public static final byte BATCH_CONTROL_RECORD_TYPE = 2; 075 // Batch Control Item holds a 4 byte size of the batch and a 8 byte checksum of the batch. 076 public static final byte[] BATCH_CONTROL_RECORD_MAGIC = bytes("WRITE BATCH"); 077 public static final int BATCH_CONTROL_RECORD_SIZE = RECORD_HEAD_SPACE + BATCH_CONTROL_RECORD_MAGIC.length + 4 + 8; 078 public static final byte[] BATCH_CONTROL_RECORD_HEADER = createBatchControlRecordHeader(); 079 public static final byte[] EMPTY_BATCH_CONTROL_RECORD = createEmptyBatchControlRecordHeader(); 080 public static final int EOF_INT = ByteBuffer.wrap(new byte[]{'-', 'q', 'M', 'a'}).getInt(); 081 public static final byte EOF_EOT = '4'; 082 public static final byte[] EOF_RECORD = createEofBatchAndLocationRecord(); 083 084 private ScheduledExecutorService scheduler; 085 086 // tackle corruption when checksum is disabled or corrupt with zeros, minimize data loss 087 public void corruptRecoveryLocation(Location recoveryPosition) throws IOException { 088 DataFile dataFile = getDataFile(recoveryPosition); 089 // with corruption on recovery we have no faith in the content - slip to the next batch record or eof 090 DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile); 091 try { 092 RandomAccessFile randomAccessFile = reader.getRaf().getRaf(); 093 randomAccessFile.seek(recoveryPosition.getOffset() + 1); 094 byte[] data = new byte[getWriteBatchSize()]; 095 ByteSequence bs = new ByteSequence(data, 0, randomAccessFile.read(data)); 096 int nextOffset = 0; 097 if (findNextBatchRecord(bs, randomAccessFile) >= 0) { 098 nextOffset = Math.toIntExact(randomAccessFile.getFilePointer() - bs.remaining()); 099 } else { 100 nextOffset = Math.toIntExact(randomAccessFile.length()); 101 } 102 Sequence sequence = new Sequence(recoveryPosition.getOffset(), nextOffset - 1); 103 LOG.warn("Corrupt journal records found in '{}' between offsets: {}", dataFile.getFile(), sequence); 104 105 // skip corruption on getNextLocation 106 recoveryPosition.setOffset(nextOffset); 107 recoveryPosition.setSize(-1); 108 109 dataFile.corruptedBlocks.add(sequence); 110 } catch (IOException e) { 111 } finally { 112 accessorPool.closeDataFileAccessor(reader); 113 } 114 } 115 116 public DataFileAccessorPool getAccessorPool() { 117 return accessorPool; 118 } 119 120 public void allowIOResumption() { 121 if (appender instanceof DataFileAppender) { 122 DataFileAppender dataFileAppender = (DataFileAppender)appender; 123 dataFileAppender.shutdown = false; 124 } 125 } 126 127 public void setCleanupInterval(long cleanupInterval) { 128 this.cleanupInterval = cleanupInterval; 129 } 130 131 public long getCleanupInterval() { 132 return cleanupInterval; 133 } 134 135 public enum PreallocationStrategy { 136 SPARSE_FILE, 137 OS_KERNEL_COPY, 138 ZEROS, 139 CHUNKED_ZEROS; 140 } 141 142 public enum PreallocationScope { 143 ENTIRE_JOURNAL, 144 ENTIRE_JOURNAL_ASYNC, 145 NONE; 146 } 147 148 public enum JournalDiskSyncStrategy { 149 ALWAYS, 150 PERIODIC, 151 NEVER; 152 } 153 154 private static byte[] createBatchControlRecordHeader() { 155 try (DataByteArrayOutputStream os = new DataByteArrayOutputStream();) { 156 os.writeInt(BATCH_CONTROL_RECORD_SIZE); 157 os.writeByte(BATCH_CONTROL_RECORD_TYPE); 158 os.write(BATCH_CONTROL_RECORD_MAGIC); 159 ByteSequence sequence = os.toByteSequence(); 160 sequence.compact(); 161 return sequence.getData(); 162 } catch (IOException e) { 163 throw new RuntimeException("Could not create batch control record header.", e); 164 } 165 } 166 167 private static byte[] createEmptyBatchControlRecordHeader() { 168 try (DataByteArrayOutputStream os = new DataByteArrayOutputStream();) { 169 os.writeInt(BATCH_CONTROL_RECORD_SIZE); 170 os.writeByte(BATCH_CONTROL_RECORD_TYPE); 171 os.write(BATCH_CONTROL_RECORD_MAGIC); 172 os.writeInt(0); 173 os.writeLong(0l); 174 ByteSequence sequence = os.toByteSequence(); 175 sequence.compact(); 176 return sequence.getData(); 177 } catch (IOException e) { 178 throw new RuntimeException("Could not create empty batch control record header.", e); 179 } 180 } 181 182 private static byte[] createEofBatchAndLocationRecord() { 183 try (DataByteArrayOutputStream os = new DataByteArrayOutputStream();) { 184 os.writeInt(EOF_INT); 185 os.writeByte(EOF_EOT); 186 ByteSequence sequence = os.toByteSequence(); 187 sequence.compact(); 188 return sequence.getData(); 189 } catch (IOException e) { 190 throw new RuntimeException("Could not create eof header.", e); 191 } 192 } 193 194 public static final String DEFAULT_DIRECTORY = "."; 195 public static final String DEFAULT_ARCHIVE_DIRECTORY = "data-archive"; 196 public static final String DEFAULT_FILE_PREFIX = "db-"; 197 public static final String DEFAULT_FILE_SUFFIX = ".log"; 198 public static final int DEFAULT_MAX_FILE_LENGTH = 1024 * 1024 * 32; 199 public static final int DEFAULT_CLEANUP_INTERVAL = 1000 * 30; 200 public static final int DEFAULT_MAX_WRITE_BATCH_SIZE = 1024 * 1024 * 4; 201 202 private static final Logger LOG = LoggerFactory.getLogger(Journal.class); 203 204 protected final Map<WriteKey, WriteCommand> inflightWrites = new ConcurrentHashMap<WriteKey, WriteCommand>(); 205 206 protected File directory = new File(DEFAULT_DIRECTORY); 207 protected File directoryArchive; 208 private boolean directoryArchiveOverridden = false; 209 210 protected String filePrefix = DEFAULT_FILE_PREFIX; 211 protected String fileSuffix = DEFAULT_FILE_SUFFIX; 212 protected boolean started; 213 214 protected int maxFileLength = DEFAULT_MAX_FILE_LENGTH; 215 protected int writeBatchSize = DEFAULT_MAX_WRITE_BATCH_SIZE; 216 217 protected FileAppender appender; 218 protected DataFileAccessorPool accessorPool; 219 220 protected Map<Integer, DataFile> fileMap = new HashMap<Integer, DataFile>(); 221 protected Map<File, DataFile> fileByFileMap = new LinkedHashMap<File, DataFile>(); 222 protected LinkedNodeList<DataFile> dataFiles = new LinkedNodeList<DataFile>(); 223 224 protected final AtomicReference<Location> lastAppendLocation = new AtomicReference<Location>(); 225 protected ScheduledFuture cleanupTask; 226 protected AtomicLong totalLength = new AtomicLong(); 227 protected boolean archiveDataLogs; 228 private ReplicationTarget replicationTarget; 229 protected boolean checksum; 230 protected boolean checkForCorruptionOnStartup; 231 protected boolean enableAsyncDiskSync = true; 232 private int nextDataFileId = 1; 233 private Object dataFileIdLock = new Object(); 234 private final AtomicReference<DataFile> currentDataFile = new AtomicReference<>(null); 235 private volatile DataFile nextDataFile; 236 237 protected PreallocationScope preallocationScope = PreallocationScope.ENTIRE_JOURNAL; 238 protected PreallocationStrategy preallocationStrategy = PreallocationStrategy.SPARSE_FILE; 239 private File osKernelCopyTemplateFile = null; 240 private ByteBuffer preAllocateDirectBuffer = null; 241 private long cleanupInterval = DEFAULT_CLEANUP_INTERVAL; 242 243 protected JournalDiskSyncStrategy journalDiskSyncStrategy = JournalDiskSyncStrategy.ALWAYS; 244 245 public interface DataFileRemovedListener { 246 void fileRemoved(DataFile datafile); 247 } 248 249 private DataFileRemovedListener dataFileRemovedListener; 250 251 public synchronized void start() throws IOException { 252 if (started) { 253 return; 254 } 255 256 long start = System.currentTimeMillis(); 257 accessorPool = new DataFileAccessorPool(this); 258 started = true; 259 260 appender = callerBufferAppender ? new CallerBufferingDataFileAppender(this) : new DataFileAppender(this); 261 262 File[] files = directory.listFiles(new FilenameFilter() { 263 @Override 264 public boolean accept(File dir, String n) { 265 return dir.equals(directory) && n.startsWith(filePrefix) && n.endsWith(fileSuffix); 266 } 267 }); 268 269 if (files != null) { 270 for (File file : files) { 271 try { 272 String n = file.getName(); 273 String numStr = n.substring(filePrefix.length(), n.length()-fileSuffix.length()); 274 int num = Integer.parseInt(numStr); 275 DataFile dataFile = new DataFile(file, num); 276 fileMap.put(dataFile.getDataFileId(), dataFile); 277 totalLength.addAndGet(dataFile.getLength()); 278 } catch (NumberFormatException e) { 279 // Ignore file that do not match the pattern. 280 } 281 } 282 283 // Sort the list so that we can link the DataFiles together in the 284 // right order. 285 LinkedList<DataFile> l = new LinkedList<>(fileMap.values()); 286 Collections.sort(l); 287 for (DataFile df : l) { 288 if (df.getLength() == 0) { 289 // possibly the result of a previous failed write 290 LOG.info("ignoring zero length, partially initialised journal data file: " + df); 291 continue; 292 } else if (l.getLast().equals(df) && isUnusedPreallocated(df)) { 293 continue; 294 } 295 dataFiles.addLast(df); 296 fileByFileMap.put(df.getFile(), df); 297 298 if( isCheckForCorruptionOnStartup() ) { 299 lastAppendLocation.set(recoveryCheck(df)); 300 } 301 } 302 } 303 304 if (preallocationScope != PreallocationScope.NONE) { 305 switch (preallocationStrategy) { 306 case SPARSE_FILE: 307 break; 308 case OS_KERNEL_COPY: { 309 osKernelCopyTemplateFile = createJournalTemplateFile(); 310 } 311 break; 312 case CHUNKED_ZEROS: { 313 preAllocateDirectBuffer = allocateDirectBuffer(PREALLOC_CHUNK_SIZE); 314 } 315 break; 316 case ZEROS: { 317 preAllocateDirectBuffer = allocateDirectBuffer(getMaxFileLength()); 318 } 319 break; 320 } 321 } 322 scheduler = Executors.newScheduledThreadPool(1, new ThreadFactory() { 323 @Override 324 public Thread newThread(Runnable r) { 325 Thread schedulerThread = new Thread(r); 326 schedulerThread.setName("ActiveMQ Journal Scheduled executor"); 327 schedulerThread.setDaemon(true); 328 return schedulerThread; 329 } 330 }); 331 332 // init current write file 333 if (dataFiles.isEmpty()) { 334 nextDataFileId = 1; 335 rotateWriteFile(); 336 } else { 337 currentDataFile.set(dataFiles.getTail()); 338 nextDataFileId = currentDataFile.get().dataFileId + 1; 339 } 340 341 if( lastAppendLocation.get()==null ) { 342 DataFile df = dataFiles.getTail(); 343 lastAppendLocation.set(recoveryCheck(df)); 344 } 345 346 // ensure we don't report unused space of last journal file in size metric 347 int lastFileLength = dataFiles.getTail().getLength(); 348 if (totalLength.get() > lastFileLength && lastAppendLocation.get().getOffset() > 0) { 349 totalLength.addAndGet(lastAppendLocation.get().getOffset() - lastFileLength); 350 } 351 352 cleanupTask = scheduler.scheduleAtFixedRate(new Runnable() { 353 @Override 354 public void run() { 355 cleanup(); 356 } 357 }, cleanupInterval, cleanupInterval, TimeUnit.MILLISECONDS); 358 359 long end = System.currentTimeMillis(); 360 LOG.trace("Startup took: "+(end-start)+" ms"); 361 } 362 363 private ByteBuffer allocateDirectBuffer(int size) { 364 ByteBuffer buffer = ByteBuffer.allocateDirect(size); 365 buffer.put(EOF_RECORD); 366 return buffer; 367 } 368 369 public void preallocateEntireJournalDataFile(RecoverableRandomAccessFile file) { 370 371 if (PreallocationScope.NONE != preallocationScope) { 372 373 try { 374 if (PreallocationStrategy.OS_KERNEL_COPY == preallocationStrategy) { 375 doPreallocationKernelCopy(file); 376 } else if (PreallocationStrategy.ZEROS == preallocationStrategy) { 377 doPreallocationZeros(file); 378 } else if (PreallocationStrategy.CHUNKED_ZEROS == preallocationStrategy) { 379 doPreallocationChunkedZeros(file); 380 } else { 381 doPreallocationSparseFile(file); 382 } 383 } catch (Throwable continueWithNoPrealloc) { 384 // error on preallocation is non fatal, and we don't want to leak the journal handle 385 LOG.error("cound not preallocate journal data file", continueWithNoPrealloc); 386 } 387 } 388 } 389 390 private void doPreallocationSparseFile(RecoverableRandomAccessFile file) { 391 final ByteBuffer journalEof = ByteBuffer.wrap(EOF_RECORD); 392 try { 393 FileChannel channel = file.getChannel(); 394 channel.position(0); 395 channel.write(journalEof); 396 channel.position(maxFileLength - 5); 397 journalEof.rewind(); 398 channel.write(journalEof); 399 channel.force(false); 400 channel.position(0); 401 } catch (ClosedByInterruptException ignored) { 402 LOG.trace("Could not preallocate journal file with sparse file", ignored); 403 } catch (IOException e) { 404 LOG.error("Could not preallocate journal file with sparse file", e); 405 } 406 } 407 408 private void doPreallocationZeros(RecoverableRandomAccessFile file) { 409 preAllocateDirectBuffer.rewind(); 410 try { 411 FileChannel channel = file.getChannel(); 412 channel.write(preAllocateDirectBuffer); 413 channel.force(false); 414 channel.position(0); 415 } catch (ClosedByInterruptException ignored) { 416 LOG.trace("Could not preallocate journal file with zeros", ignored); 417 } catch (IOException e) { 418 LOG.error("Could not preallocate journal file with zeros", e); 419 } 420 } 421 422 private void doPreallocationKernelCopy(RecoverableRandomAccessFile file) { 423 try (RandomAccessFile templateRaf = new RandomAccessFile(osKernelCopyTemplateFile, "rw");){ 424 templateRaf.getChannel().transferTo(0, getMaxFileLength(), file.getChannel()); 425 } catch (ClosedByInterruptException ignored) { 426 LOG.trace("Could not preallocate journal file with kernel copy", ignored); 427 } catch (FileNotFoundException e) { 428 LOG.error("Could not find the template file on disk at " + osKernelCopyTemplateFile.getAbsolutePath(), e); 429 } catch (IOException e) { 430 LOG.error("Could not transfer the template file to journal, transferFile=" + osKernelCopyTemplateFile.getAbsolutePath(), e); 431 } 432 } 433 434 private File createJournalTemplateFile() { 435 String fileName = "db-log.template"; 436 File rc = new File(directory, fileName); 437 try (RandomAccessFile templateRaf = new RandomAccessFile(rc, "rw");) { 438 templateRaf.getChannel().write(ByteBuffer.wrap(EOF_RECORD)); 439 templateRaf.setLength(maxFileLength); 440 templateRaf.getChannel().force(true); 441 } catch (FileNotFoundException e) { 442 LOG.error("Could not find the template file on disk at " + osKernelCopyTemplateFile.getAbsolutePath(), e); 443 } catch (IOException e) { 444 LOG.error("Could not transfer the template file to journal, transferFile=" + osKernelCopyTemplateFile.getAbsolutePath(), e); 445 } 446 return rc; 447 } 448 449 private void doPreallocationChunkedZeros(RecoverableRandomAccessFile file) { 450 preAllocateDirectBuffer.limit(preAllocateDirectBuffer.capacity()); 451 preAllocateDirectBuffer.rewind(); 452 try { 453 FileChannel channel = file.getChannel(); 454 455 int remLen = maxFileLength; 456 while (remLen > 0) { 457 if (remLen < preAllocateDirectBuffer.remaining()) { 458 preAllocateDirectBuffer.limit(remLen); 459 } 460 int writeLen = channel.write(preAllocateDirectBuffer); 461 remLen -= writeLen; 462 preAllocateDirectBuffer.rewind(); 463 } 464 465 channel.force(false); 466 channel.position(0); 467 } catch (ClosedByInterruptException ignored) { 468 LOG.trace("Could not preallocate journal file with zeros", ignored); 469 } catch (IOException e) { 470 LOG.error("Could not preallocate journal file with zeros! Will continue without preallocation", e); 471 } 472 } 473 474 private static byte[] bytes(String string) { 475 try { 476 return string.getBytes("UTF-8"); 477 } catch (UnsupportedEncodingException e) { 478 throw new RuntimeException(e); 479 } 480 } 481 482 public boolean isUnusedPreallocated(DataFile dataFile) throws IOException { 483 if (preallocationScope == PreallocationScope.ENTIRE_JOURNAL_ASYNC) { 484 DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile); 485 try { 486 byte[] firstFewBytes = new byte[BATCH_CONTROL_RECORD_HEADER.length]; 487 reader.readFully(0, firstFewBytes); 488 ByteSequence bs = new ByteSequence(firstFewBytes); 489 return bs.startsWith(EOF_RECORD); 490 } catch (Exception ignored) { 491 } finally { 492 accessorPool.closeDataFileAccessor(reader); 493 } 494 } 495 return false; 496 } 497 498 protected Location recoveryCheck(DataFile dataFile) throws IOException { 499 Location location = new Location(); 500 location.setDataFileId(dataFile.getDataFileId()); 501 location.setOffset(0); 502 503 DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile); 504 try { 505 RandomAccessFile randomAccessFile = reader.getRaf().getRaf(); 506 randomAccessFile.seek(0); 507 final long totalFileLength = randomAccessFile.length(); 508 byte[] data = new byte[getWriteBatchSize()]; 509 ByteSequence bs = new ByteSequence(data, 0, randomAccessFile.read(data)); 510 511 while (true) { 512 int size = checkBatchRecord(bs, randomAccessFile); 513 if (size > 0 && location.getOffset() + BATCH_CONTROL_RECORD_SIZE + size <= totalFileLength) { 514 location.setOffset(location.getOffset() + BATCH_CONTROL_RECORD_SIZE + size); 515 } else if (size == 0 && location.getOffset() + EOF_RECORD.length + size <= totalFileLength) { 516 // eof batch record 517 break; 518 } else { 519 // track corruption and skip if possible 520 Sequence sequence = new Sequence(location.getOffset()); 521 if (findNextBatchRecord(bs, randomAccessFile) >= 0) { 522 int nextOffset = Math.toIntExact(randomAccessFile.getFilePointer() - bs.remaining()); 523 sequence.setLast(nextOffset - 1); 524 dataFile.corruptedBlocks.add(sequence); 525 LOG.warn("Corrupt journal records found in '{}' between offsets: {}", dataFile.getFile(), sequence); 526 location.setOffset(nextOffset); 527 } else { 528 // corruption to eof, don't loose track of this corruption, don't truncate 529 sequence.setLast(Math.toIntExact(randomAccessFile.getFilePointer())); 530 dataFile.corruptedBlocks.add(sequence); 531 LOG.warn("Corrupt journal records found in '{}' from offset: {} to EOF", dataFile.getFile(), sequence); 532 break; 533 } 534 } 535 } 536 537 } catch (IOException e) { 538 LOG.trace("exception on recovery check of: " + dataFile + ", at " + location, e); 539 } finally { 540 accessorPool.closeDataFileAccessor(reader); 541 } 542 543 int existingLen = dataFile.getLength(); 544 dataFile.setLength(location.getOffset()); 545 if (existingLen > dataFile.getLength()) { 546 totalLength.addAndGet(dataFile.getLength() - existingLen); 547 } 548 return location; 549 } 550 551 private int findNextBatchRecord(ByteSequence bs, RandomAccessFile reader) throws IOException { 552 final ByteSequence header = new ByteSequence(BATCH_CONTROL_RECORD_HEADER); 553 int pos = 0; 554 while (true) { 555 pos = bs.indexOf(header, 0); 556 if (pos >= 0) { 557 bs.setOffset(bs.offset + pos); 558 return pos; 559 } else { 560 // need to load the next data chunck in.. 561 if (bs.length != bs.data.length) { 562 // If we had a short read then we were at EOF 563 return -1; 564 } 565 bs.setOffset(bs.length - BATCH_CONTROL_RECORD_HEADER.length); 566 bs.reset(); 567 bs.setLength(bs.length + reader.read(bs.data, bs.length, bs.data.length - BATCH_CONTROL_RECORD_HEADER.length)); 568 } 569 } 570 } 571 572 private int checkBatchRecord(ByteSequence bs, RandomAccessFile reader) throws IOException { 573 ensureAvailable(bs, reader, EOF_RECORD.length); 574 if (bs.startsWith(EOF_RECORD)) { 575 return 0; // eof 576 } 577 ensureAvailable(bs, reader, BATCH_CONTROL_RECORD_SIZE); 578 try (DataByteArrayInputStream controlIs = new DataByteArrayInputStream(bs)) { 579 580 // Assert that it's a batch record. 581 for (int i = 0; i < BATCH_CONTROL_RECORD_HEADER.length; i++) { 582 if (controlIs.readByte() != BATCH_CONTROL_RECORD_HEADER[i]) { 583 return -1; 584 } 585 } 586 587 int size = controlIs.readInt(); 588 if (size < 0 || size > Integer.MAX_VALUE - (BATCH_CONTROL_RECORD_SIZE + EOF_RECORD.length)) { 589 return -2; 590 } 591 592 long expectedChecksum = controlIs.readLong(); 593 Checksum checksum = null; 594 if (isChecksum() && expectedChecksum > 0) { 595 checksum = new Adler32(); 596 } 597 598 // revert to bs to consume data 599 bs.setOffset(controlIs.position()); 600 int toRead = size; 601 while (toRead > 0) { 602 if (bs.remaining() >= toRead) { 603 if (checksum != null) { 604 checksum.update(bs.getData(), bs.getOffset(), toRead); 605 } 606 bs.setOffset(bs.offset + toRead); 607 toRead = 0; 608 } else { 609 if (bs.length != bs.data.length) { 610 // buffer exhausted 611 return -3; 612 } 613 614 toRead -= bs.remaining(); 615 if (checksum != null) { 616 checksum.update(bs.getData(), bs.getOffset(), bs.remaining()); 617 } 618 bs.setLength(reader.read(bs.data)); 619 bs.setOffset(0); 620 } 621 } 622 if (checksum != null && expectedChecksum != checksum.getValue()) { 623 return -4; 624 } 625 626 return size; 627 } 628 } 629 630 private void ensureAvailable(ByteSequence bs, RandomAccessFile reader, int required) throws IOException { 631 if (bs.remaining() < required) { 632 bs.reset(); 633 int read = reader.read(bs.data, bs.length, bs.data.length - bs.length); 634 if (read < 0) { 635 if (bs.remaining() == 0) { 636 throw new EOFException("request for " + required + " bytes reached EOF"); 637 } 638 } 639 bs.setLength(bs.length + read); 640 } 641 } 642 643 void addToTotalLength(int size) { 644 totalLength.addAndGet(size); 645 } 646 647 public long length() { 648 return totalLength.get(); 649 } 650 651 public void rotateWriteFile() throws IOException { 652 synchronized (dataFileIdLock) { 653 DataFile dataFile = nextDataFile; 654 if (dataFile == null) { 655 dataFile = newDataFile(); 656 } 657 synchronized (currentDataFile) { 658 fileMap.put(dataFile.getDataFileId(), dataFile); 659 fileByFileMap.put(dataFile.getFile(), dataFile); 660 dataFiles.addLast(dataFile); 661 currentDataFile.set(dataFile); 662 } 663 nextDataFile = null; 664 } 665 if (PreallocationScope.ENTIRE_JOURNAL_ASYNC == preallocationScope) { 666 preAllocateNextDataFileFuture = scheduler.submit(preAllocateNextDataFileTask); 667 } 668 } 669 670 private Runnable preAllocateNextDataFileTask = new Runnable() { 671 @Override 672 public void run() { 673 if (nextDataFile == null) { 674 synchronized (dataFileIdLock){ 675 try { 676 nextDataFile = newDataFile(); 677 } catch (IOException e) { 678 LOG.warn("Failed to proactively allocate data file", e); 679 } 680 } 681 } 682 } 683 }; 684 685 private volatile Future preAllocateNextDataFileFuture; 686 687 private DataFile newDataFile() throws IOException { 688 int nextNum = nextDataFileId++; 689 File file = getFile(nextNum); 690 DataFile nextWriteFile = new DataFile(file, nextNum); 691 preallocateEntireJournalDataFile(nextWriteFile.appendRandomAccessFile()); 692 return nextWriteFile; 693 } 694 695 696 public DataFile reserveDataFile() { 697 synchronized (dataFileIdLock) { 698 int nextNum = nextDataFileId++; 699 File file = getFile(nextNum); 700 DataFile reservedDataFile = new DataFile(file, nextNum); 701 synchronized (currentDataFile) { 702 fileMap.put(reservedDataFile.getDataFileId(), reservedDataFile); 703 fileByFileMap.put(file, reservedDataFile); 704 if (dataFiles.isEmpty()) { 705 dataFiles.addLast(reservedDataFile); 706 } else { 707 dataFiles.getTail().linkBefore(reservedDataFile); 708 } 709 } 710 return reservedDataFile; 711 } 712 } 713 714 public File getFile(int nextNum) { 715 String fileName = filePrefix + nextNum + fileSuffix; 716 File file = new File(directory, fileName); 717 return file; 718 } 719 720 DataFile getDataFile(Location item) throws IOException { 721 Integer key = Integer.valueOf(item.getDataFileId()); 722 DataFile dataFile = null; 723 synchronized (currentDataFile) { 724 dataFile = fileMap.get(key); 725 } 726 if (dataFile == null) { 727 LOG.error("Looking for key " + key + " but not found in fileMap: " + fileMap); 728 throw new IOException("Could not locate data file " + getFile(item.getDataFileId())); 729 } 730 return dataFile; 731 } 732 733 public void close() throws IOException { 734 synchronized (this) { 735 if (!started) { 736 return; 737 } 738 cleanupTask.cancel(true); 739 if (preAllocateNextDataFileFuture != null) { 740 preAllocateNextDataFileFuture.cancel(true); 741 } 742 ThreadPoolUtils.shutdownGraceful(scheduler, 4000); 743 accessorPool.close(); 744 } 745 // the appender can be calling back to to the journal blocking a close AMQ-5620 746 appender.close(); 747 synchronized (currentDataFile) { 748 fileMap.clear(); 749 fileByFileMap.clear(); 750 dataFiles.clear(); 751 lastAppendLocation.set(null); 752 started = false; 753 } 754 } 755 756 public synchronized void cleanup() { 757 if (accessorPool != null) { 758 accessorPool.disposeUnused(); 759 } 760 } 761 762 public synchronized boolean delete() throws IOException { 763 764 // Close all open file handles... 765 appender.close(); 766 accessorPool.close(); 767 768 boolean result = true; 769 for (Iterator<DataFile> i = fileMap.values().iterator(); i.hasNext();) { 770 DataFile dataFile = i.next(); 771 result &= dataFile.delete(); 772 } 773 774 if (preAllocateNextDataFileFuture != null) { 775 preAllocateNextDataFileFuture.cancel(true); 776 } 777 synchronized (dataFileIdLock) { 778 if (nextDataFile != null) { 779 nextDataFile.delete(); 780 nextDataFile = null; 781 } 782 } 783 784 totalLength.set(0); 785 synchronized (currentDataFile) { 786 fileMap.clear(); 787 fileByFileMap.clear(); 788 lastAppendLocation.set(null); 789 dataFiles = new LinkedNodeList<DataFile>(); 790 } 791 // reopen open file handles... 792 accessorPool = new DataFileAccessorPool(this); 793 appender = new DataFileAppender(this); 794 return result; 795 } 796 797 public void removeDataFiles(Set<Integer> files) throws IOException { 798 for (Integer key : files) { 799 // Can't remove the data file (or subsequent files) that is currently being written to. 800 if (key >= lastAppendLocation.get().getDataFileId()) { 801 continue; 802 } 803 DataFile dataFile = null; 804 synchronized (currentDataFile) { 805 dataFile = fileMap.remove(key); 806 if (dataFile != null) { 807 fileByFileMap.remove(dataFile.getFile()); 808 dataFile.unlink(); 809 } 810 } 811 if (dataFile != null) { 812 forceRemoveDataFile(dataFile); 813 } 814 } 815 } 816 817 private void forceRemoveDataFile(DataFile dataFile) throws IOException { 818 accessorPool.disposeDataFileAccessors(dataFile); 819 totalLength.addAndGet(-dataFile.getLength()); 820 if (archiveDataLogs) { 821 File directoryArchive = getDirectoryArchive(); 822 if (directoryArchive.exists()) { 823 LOG.debug("Archive directory exists: {}", directoryArchive); 824 } else { 825 if (directoryArchive.isAbsolute()) 826 if (LOG.isDebugEnabled()) { 827 LOG.debug("Archive directory [{}] does not exist - creating it now", 828 directoryArchive.getAbsolutePath()); 829 } 830 IOHelper.mkdirs(directoryArchive); 831 } 832 LOG.debug("Moving data file {} to {} ", dataFile, directoryArchive.getCanonicalPath()); 833 dataFile.move(directoryArchive); 834 LOG.debug("Successfully moved data file"); 835 } else { 836 LOG.debug("Deleting data file: {}", dataFile); 837 if (dataFile.delete()) { 838 LOG.debug("Discarded data file: {}", dataFile); 839 } else { 840 LOG.warn("Failed to discard data file : {}", dataFile.getFile()); 841 } 842 } 843 if (dataFileRemovedListener != null) { 844 dataFileRemovedListener.fileRemoved(dataFile); 845 } 846 } 847 848 /** 849 * @return the maxFileLength 850 */ 851 public int getMaxFileLength() { 852 return maxFileLength; 853 } 854 855 /** 856 * @param maxFileLength the maxFileLength to set 857 */ 858 public void setMaxFileLength(int maxFileLength) { 859 this.maxFileLength = maxFileLength; 860 } 861 862 @Override 863 public String toString() { 864 return directory.toString(); 865 } 866 867 public Location getNextLocation(Location location) throws IOException, IllegalStateException { 868 return getNextLocation(location, null); 869 } 870 871 public Location getNextLocation(Location location, Location limit) throws IOException, IllegalStateException { 872 Location cur = null; 873 while (true) { 874 if (cur == null) { 875 if (location == null) { 876 DataFile head = null; 877 synchronized (currentDataFile) { 878 head = dataFiles.getHead(); 879 } 880 if (head == null) { 881 return null; 882 } 883 cur = new Location(); 884 cur.setDataFileId(head.getDataFileId()); 885 cur.setOffset(0); 886 } else { 887 // Set to the next offset.. 888 if (location.getSize() == -1) { 889 cur = new Location(location); 890 } else { 891 cur = new Location(location); 892 cur.setOffset(location.getOffset() + location.getSize()); 893 } 894 } 895 } else { 896 cur.setOffset(cur.getOffset() + cur.getSize()); 897 } 898 899 DataFile dataFile = getDataFile(cur); 900 901 // Did it go into the next file?? 902 if (dataFile.getLength() <= cur.getOffset()) { 903 synchronized (currentDataFile) { 904 dataFile = dataFile.getNext(); 905 } 906 if (dataFile == null) { 907 return null; 908 } else { 909 cur.setDataFileId(dataFile.getDataFileId().intValue()); 910 cur.setOffset(0); 911 if (limit != null && cur.compareTo(limit) >= 0) { 912 LOG.trace("reached limit: {} at: {}", limit, cur); 913 return null; 914 } 915 } 916 } 917 918 // Load in location size and type. 919 DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile); 920 try { 921 reader.readLocationDetails(cur); 922 } catch (EOFException eof) { 923 LOG.trace("EOF on next: " + location + ", cur: " + cur); 924 throw eof; 925 } finally { 926 accessorPool.closeDataFileAccessor(reader); 927 } 928 929 Sequence corruptedRange = dataFile.corruptedBlocks.get(cur.getOffset()); 930 if (corruptedRange != null) { 931 // skip corruption 932 cur.setSize((int) corruptedRange.range()); 933 } else if (cur.getSize() == EOF_INT && cur.getType() == EOF_EOT || 934 (cur.getType() == 0 && cur.getSize() == 0)) { 935 // eof - jump to next datafile 936 // EOF_INT and EOF_EOT replace 0,0 - we need to react to both for 937 // replay of existing journals 938 // possibly journal is larger than maxFileLength after config change 939 cur.setSize(EOF_RECORD.length); 940 cur.setOffset(Math.max(maxFileLength, dataFile.getLength())); 941 } else if (cur.getType() == USER_RECORD_TYPE) { 942 // Only return user records. 943 return cur; 944 } 945 } 946 } 947 948 public ByteSequence read(Location location) throws IOException, IllegalStateException { 949 DataFile dataFile = getDataFile(location); 950 DataFileAccessor reader = accessorPool.openDataFileAccessor(dataFile); 951 ByteSequence rc = null; 952 try { 953 rc = reader.readRecord(location); 954 } finally { 955 accessorPool.closeDataFileAccessor(reader); 956 } 957 return rc; 958 } 959 960 public Location write(ByteSequence data, boolean sync) throws IOException, IllegalStateException { 961 Location loc = appender.storeItem(data, Location.USER_TYPE, sync); 962 return loc; 963 } 964 965 public Location write(ByteSequence data, Runnable onComplete) throws IOException, IllegalStateException { 966 Location loc = appender.storeItem(data, Location.USER_TYPE, onComplete); 967 return loc; 968 } 969 970 public void update(Location location, ByteSequence data, boolean sync) throws IOException { 971 DataFile dataFile = getDataFile(location); 972 DataFileAccessor updater = accessorPool.openDataFileAccessor(dataFile); 973 try { 974 updater.updateRecord(location, data, sync); 975 } finally { 976 accessorPool.closeDataFileAccessor(updater); 977 } 978 } 979 980 public PreallocationStrategy getPreallocationStrategy() { 981 return preallocationStrategy; 982 } 983 984 public void setPreallocationStrategy(PreallocationStrategy preallocationStrategy) { 985 this.preallocationStrategy = preallocationStrategy; 986 } 987 988 public PreallocationScope getPreallocationScope() { 989 return preallocationScope; 990 } 991 992 public void setPreallocationScope(PreallocationScope preallocationScope) { 993 this.preallocationScope = preallocationScope; 994 } 995 996 public File getDirectory() { 997 return directory; 998 } 999 1000 public void setDirectory(File directory) { 1001 this.directory = directory; 1002 } 1003 1004 public String getFilePrefix() { 1005 return filePrefix; 1006 } 1007 1008 public void setFilePrefix(String filePrefix) { 1009 this.filePrefix = filePrefix; 1010 } 1011 1012 public Map<WriteKey, WriteCommand> getInflightWrites() { 1013 return inflightWrites; 1014 } 1015 1016 public Location getLastAppendLocation() { 1017 return lastAppendLocation.get(); 1018 } 1019 1020 public void setLastAppendLocation(Location lastSyncedLocation) { 1021 this.lastAppendLocation.set(lastSyncedLocation); 1022 } 1023 1024 public File getDirectoryArchive() { 1025 if (!directoryArchiveOverridden && (directoryArchive == null)) { 1026 // create the directoryArchive relative to the journal location 1027 directoryArchive = new File(directory.getAbsolutePath() + 1028 File.separator + DEFAULT_ARCHIVE_DIRECTORY); 1029 } 1030 return directoryArchive; 1031 } 1032 1033 public void setDirectoryArchive(File directoryArchive) { 1034 directoryArchiveOverridden = true; 1035 this.directoryArchive = directoryArchive; 1036 } 1037 1038 public boolean isArchiveDataLogs() { 1039 return archiveDataLogs; 1040 } 1041 1042 public void setArchiveDataLogs(boolean archiveDataLogs) { 1043 this.archiveDataLogs = archiveDataLogs; 1044 } 1045 1046 public DataFile getDataFileById(int dataFileId) { 1047 synchronized (currentDataFile) { 1048 return fileMap.get(Integer.valueOf(dataFileId)); 1049 } 1050 } 1051 1052 public DataFile getCurrentDataFile(int capacity) throws IOException { 1053 //First just acquire the currentDataFile lock and return if no rotation needed 1054 synchronized (currentDataFile) { 1055 if (currentDataFile.get().getLength() + capacity < maxFileLength) { 1056 return currentDataFile.get(); 1057 } 1058 } 1059 1060 //AMQ-6545 - if rotation needed, acquire dataFileIdLock first to prevent deadlocks 1061 //then re-check if rotation is needed 1062 synchronized (dataFileIdLock) { 1063 synchronized (currentDataFile) { 1064 if (currentDataFile.get().getLength() + capacity >= maxFileLength) { 1065 rotateWriteFile(); 1066 } 1067 return currentDataFile.get(); 1068 } 1069 } 1070 } 1071 1072 public Integer getCurrentDataFileId() { 1073 synchronized (currentDataFile) { 1074 return currentDataFile.get().getDataFileId(); 1075 } 1076 } 1077 1078 /** 1079 * Get a set of files - only valid after start() 1080 * 1081 * @return files currently being used 1082 */ 1083 public Set<File> getFiles() { 1084 synchronized (currentDataFile) { 1085 return fileByFileMap.keySet(); 1086 } 1087 } 1088 1089 public Map<Integer, DataFile> getFileMap() { 1090 synchronized (currentDataFile) { 1091 return new TreeMap<Integer, DataFile>(fileMap); 1092 } 1093 } 1094 1095 public long getDiskSize() { 1096 return totalLength.get(); 1097 } 1098 1099 public void setReplicationTarget(ReplicationTarget replicationTarget) { 1100 this.replicationTarget = replicationTarget; 1101 } 1102 1103 public ReplicationTarget getReplicationTarget() { 1104 return replicationTarget; 1105 } 1106 1107 public String getFileSuffix() { 1108 return fileSuffix; 1109 } 1110 1111 public void setFileSuffix(String fileSuffix) { 1112 this.fileSuffix = fileSuffix; 1113 } 1114 1115 public boolean isChecksum() { 1116 return checksum; 1117 } 1118 1119 public void setChecksum(boolean checksumWrites) { 1120 this.checksum = checksumWrites; 1121 } 1122 1123 public boolean isCheckForCorruptionOnStartup() { 1124 return checkForCorruptionOnStartup; 1125 } 1126 1127 public void setCheckForCorruptionOnStartup(boolean checkForCorruptionOnStartup) { 1128 this.checkForCorruptionOnStartup = checkForCorruptionOnStartup; 1129 } 1130 1131 public void setWriteBatchSize(int writeBatchSize) { 1132 this.writeBatchSize = writeBatchSize; 1133 } 1134 1135 public int getWriteBatchSize() { 1136 return writeBatchSize; 1137 } 1138 1139 public void setSizeAccumulator(AtomicLong storeSizeAccumulator) { 1140 this.totalLength = storeSizeAccumulator; 1141 } 1142 1143 public void setEnableAsyncDiskSync(boolean val) { 1144 this.enableAsyncDiskSync = val; 1145 } 1146 1147 public boolean isEnableAsyncDiskSync() { 1148 return enableAsyncDiskSync; 1149 } 1150 1151 public JournalDiskSyncStrategy getJournalDiskSyncStrategy() { 1152 return journalDiskSyncStrategy; 1153 } 1154 1155 public void setJournalDiskSyncStrategy(JournalDiskSyncStrategy journalDiskSyncStrategy) { 1156 this.journalDiskSyncStrategy = journalDiskSyncStrategy; 1157 } 1158 1159 public boolean isJournalDiskSyncPeriodic() { 1160 return JournalDiskSyncStrategy.PERIODIC.equals(journalDiskSyncStrategy); 1161 } 1162 1163 public void setDataFileRemovedListener(DataFileRemovedListener dataFileRemovedListener) { 1164 this.dataFileRemovedListener = dataFileRemovedListener; 1165 } 1166 1167 public static class WriteCommand extends LinkedNode<WriteCommand> { 1168 public final Location location; 1169 public final ByteSequence data; 1170 final boolean sync; 1171 public final Runnable onComplete; 1172 1173 public WriteCommand(Location location, ByteSequence data, boolean sync) { 1174 this.location = location; 1175 this.data = data; 1176 this.sync = sync; 1177 this.onComplete = null; 1178 } 1179 1180 public WriteCommand(Location location, ByteSequence data, Runnable onComplete) { 1181 this.location = location; 1182 this.data = data; 1183 this.onComplete = onComplete; 1184 this.sync = false; 1185 } 1186 } 1187 1188 public static class WriteKey { 1189 private final int file; 1190 private final long offset; 1191 private final int hash; 1192 1193 public WriteKey(Location item) { 1194 file = item.getDataFileId(); 1195 offset = item.getOffset(); 1196 // TODO: see if we can build a better hash 1197 hash = (int)(file ^ offset); 1198 } 1199 1200 @Override 1201 public int hashCode() { 1202 return hash; 1203 } 1204 1205 @Override 1206 public boolean equals(Object obj) { 1207 if (obj instanceof WriteKey) { 1208 WriteKey di = (WriteKey)obj; 1209 return di.file == file && di.offset == offset; 1210 } 1211 return false; 1212 } 1213 } 1214}