001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.component.file; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.Deque; 022import java.util.LinkedList; 023import java.util.List; 024import java.util.Queue; 025 026import org.apache.camel.AsyncCallback; 027import org.apache.camel.Exchange; 028import org.apache.camel.Processor; 029import org.apache.camel.ShutdownRunningTask; 030import org.apache.camel.impl.ScheduledBatchPollingConsumer; 031import org.apache.camel.spi.UriParam; 032import org.apache.camel.util.CastUtils; 033import org.apache.camel.util.ObjectHelper; 034import org.apache.camel.util.StopWatch; 035import org.apache.camel.util.TimeUtils; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039/** 040 * Base class for file consumers. 041 */ 042public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer { 043 protected final Logger log = LoggerFactory.getLogger(getClass()); 044 protected GenericFileEndpoint<T> endpoint; 045 protected GenericFileOperations<T> operations; 046 protected volatile boolean loggedIn; 047 protected String fileExpressionResult; 048 protected volatile ShutdownRunningTask shutdownRunningTask; 049 protected volatile int pendingExchanges; 050 protected Processor customProcessor; 051 @UriParam 052 protected boolean eagerLimitMaxMessagesPerPoll = true; 053 protected volatile boolean prepareOnStartup; 054 055 public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) { 056 super(endpoint, processor); 057 this.endpoint = endpoint; 058 this.operations = operations; 059 } 060 061 public Processor getCustomProcessor() { 062 return customProcessor; 063 } 064 065 /** 066 * Use a custom processor to process the exchange. 067 * <p/> 068 * Only set this if you need to do custom processing, instead of the regular processing. 069 * <p/> 070 * This is for example used to browse file endpoints by leveraging the file consumer to poll 071 * the directory to gather the list of exchanges. But to avoid processing the files regularly 072 * we can use a custom processor. 073 * 074 * @param processor a custom processor 075 */ 076 public void setCustomProcessor(Processor processor) { 077 this.customProcessor = processor; 078 } 079 080 public boolean isEagerLimitMaxMessagesPerPoll() { 081 return eagerLimitMaxMessagesPerPoll; 082 } 083 084 public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) { 085 this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll; 086 } 087 088 /** 089 * Poll for files 090 */ 091 protected int poll() throws Exception { 092 // must prepare on startup the very first time 093 if (!prepareOnStartup) { 094 // prepare on startup 095 endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint); 096 prepareOnStartup = true; 097 } 098 099 // must reset for each poll 100 fileExpressionResult = null; 101 shutdownRunningTask = null; 102 pendingExchanges = 0; 103 104 // before we poll is there anything we need to check? 105 // such as are we connected to the FTP Server still? 106 if (!prePollCheck()) { 107 log.debug("Skipping poll as pre poll check returned false"); 108 return 0; 109 } 110 111 // gather list of files to process 112 List<GenericFile<T>> files = new ArrayList<GenericFile<T>>(); 113 String name = endpoint.getConfiguration().getDirectory(); 114 115 // time how long time it takes to poll 116 StopWatch stop = new StopWatch(); 117 boolean limitHit; 118 try { 119 limitHit = !pollDirectory(name, files, 0); 120 } catch (Exception e) { 121 // during poll directory we add files to the in progress repository, in case of any exception thrown after this work 122 // we must then drain the in progress files before rethrowing the exception 123 log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress."); 124 removeExcessiveInProgressFiles(files); 125 throw e; 126 } 127 128 long delta = stop.stop(); 129 if (log.isDebugEnabled()) { 130 log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name); 131 } 132 133 // log if we hit the limit 134 if (limitHit) { 135 log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll); 136 } 137 138 // sort files using file comparator if provided 139 if (endpoint.getSorter() != null) { 140 Collections.sort(files, endpoint.getSorter()); 141 } 142 143 // sort using build in sorters so we can use expressions 144 // use a linked list so we can dequeue the exchanges 145 LinkedList<Exchange> exchanges = new LinkedList<Exchange>(); 146 for (GenericFile<T> file : files) { 147 Exchange exchange = endpoint.createExchange(file); 148 endpoint.configureExchange(exchange); 149 endpoint.configureMessage(file, exchange.getIn()); 150 exchanges.add(exchange); 151 } 152 // sort files using exchange comparator if provided 153 if (endpoint.getSortBy() != null) { 154 Collections.sort(exchanges, endpoint.getSortBy()); 155 } 156 if (endpoint.isShuffle()) { 157 Collections.shuffle(exchanges); 158 } 159 160 // use a queue for the exchanges 161 Deque<Exchange> q = exchanges; 162 163 // we are not eager limiting, but we have configured a limit, so cut the list of files 164 if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) { 165 if (files.size() > maxMessagesPerPoll) { 166 log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll); 167 // must first remove excessive files from the in progress repository 168 removeExcessiveInProgressFiles(q, maxMessagesPerPoll); 169 } 170 } 171 172 // consume files one by one 173 int total = exchanges.size(); 174 if (total > 0) { 175 log.debug("Total {} files to consume", total); 176 } 177 178 int polledMessages = processBatch(CastUtils.cast(q)); 179 180 postPollCheck(polledMessages); 181 182 return polledMessages; 183 } 184 185 public int processBatch(Queue<Object> exchanges) { 186 int total = exchanges.size(); 187 int answer = total; 188 189 // limit if needed 190 if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) { 191 log.debug("Limiting to maximum messages to poll {} as there was {} messages in this poll.", maxMessagesPerPoll, total); 192 total = maxMessagesPerPoll; 193 } 194 195 for (int index = 0; index < total && isBatchAllowed(); index++) { 196 // only loop if we are started (allowed to run) 197 // use poll to remove the head so it does not consume memory even after we have processed it 198 Exchange exchange = (Exchange) exchanges.poll(); 199 // add current index and total as properties 200 exchange.setProperty(Exchange.BATCH_INDEX, index); 201 exchange.setProperty(Exchange.BATCH_SIZE, total); 202 exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1); 203 204 // update pending number of exchanges 205 pendingExchanges = total - index - 1; 206 207 // process the current exchange 208 boolean started; 209 if (customProcessor != null) { 210 // use a custom processor 211 started = customProcessExchange(exchange, customProcessor); 212 } else { 213 // process the exchange regular 214 started = processExchange(exchange); 215 } 216 217 // if we did not start process the file then decrement the counter 218 if (!started) { 219 answer--; 220 } 221 } 222 223 // drain any in progress files as we are done with this batch 224 removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0); 225 226 return answer; 227 } 228 229 /** 230 * Drain any in progress files as we are done with this batch 231 * 232 * @param exchanges the exchanges 233 * @param limit the limit 234 */ 235 protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) { 236 // remove the file from the in progress list in case the batch was limited by max messages per poll 237 while (exchanges.size() > limit) { 238 // must remove last 239 Exchange exchange = exchanges.removeLast(); 240 GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class); 241 String key = file.getAbsoluteFilePath(); 242 endpoint.getInProgressRepository().remove(key); 243 } 244 } 245 246 /** 247 * Drain any in progress files as we are done with the files 248 * 249 * @param files the files 250 */ 251 protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) { 252 for (GenericFile file : files) { 253 String key = file.getAbsoluteFilePath(); 254 endpoint.getInProgressRepository().remove(key); 255 } 256 } 257 258 /** 259 * Whether or not we can continue polling for more files 260 * 261 * @param fileList the current list of gathered files 262 * @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit 263 */ 264 public boolean canPollMoreFiles(List<?> fileList) { 265 // at this point we should not limit if we are not eager 266 if (!eagerLimitMaxMessagesPerPoll) { 267 return true; 268 } 269 270 if (maxMessagesPerPoll <= 0) { 271 // no limitation 272 return true; 273 } 274 275 // then only poll if we haven't reached the max limit 276 return fileList.size() < maxMessagesPerPoll; 277 } 278 279 /** 280 * Override if required. Perform some checks (and perhaps actions) before we poll. 281 * 282 * @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll. 283 */ 284 protected boolean prePollCheck() throws Exception { 285 return true; 286 } 287 288 /** 289 * Override if required. Perform some checks (and perhaps actions) after we have polled. 290 * 291 * @param polledMessages number of polled messages 292 */ 293 protected void postPollCheck(int polledMessages) { 294 // noop 295 } 296 297 /** 298 * Polls the given directory for files to process 299 * 300 * @param fileName current directory or file 301 * @param fileList current list of files gathered 302 * @param depth the current depth of the directory (will start from 0) 303 * @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit 304 */ 305 protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth); 306 307 /** 308 * Sets the operations to be used. 309 * <p/> 310 * Can be used to set a fresh operations in case of recovery attempts 311 * 312 * @param operations the operations 313 */ 314 public void setOperations(GenericFileOperations<T> operations) { 315 this.operations = operations; 316 } 317 318 /** 319 * Whether to ignore if the file cannot be retrieved. 320 * <p/> 321 * By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved. 322 * <p/> 323 * This method allows to suppress this and just ignore that. 324 * 325 * @param name the file name 326 * @param exchange the exchange 327 * @param cause optional exception occurred during retrieving file 328 * @return <tt>true</tt> to ignore, <tt>false</tt> is the default. 329 */ 330 protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) { 331 return false; 332 } 333 334 /** 335 * Processes the exchange 336 * 337 * @param exchange the exchange 338 * @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started 339 * to be processed, for some reason (not found, or aborted etc) 340 */ 341 protected boolean processExchange(final Exchange exchange) { 342 GenericFile<T> file = getExchangeFileProperty(exchange); 343 log.trace("Processing file: {}", file); 344 345 // must extract the absolute name before the begin strategy as the file could potentially be pre moved 346 // and then the file name would be changed 347 String absoluteFileName = file.getAbsoluteFilePath(); 348 349 // check if we can begin processing the file 350 final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy(); 351 352 Exception beginCause = null; 353 boolean begin = false; 354 try { 355 begin = processStrategy.begin(operations, endpoint, exchange, file); 356 } catch (Exception e) { 357 beginCause = e; 358 } 359 360 if (!begin) { 361 // no something was wrong, so we need to abort and remove the file from the in progress list 362 Exception abortCause = null; 363 log.debug("{} cannot begin processing file: {}", endpoint, file); 364 try { 365 // abort 366 processStrategy.abort(operations, endpoint, exchange, file); 367 } catch (Exception e) { 368 abortCause = e; 369 } finally { 370 // begin returned false, so remove file from the in progress list as its no longer in progress 371 endpoint.getInProgressRepository().remove(absoluteFileName); 372 } 373 if (beginCause != null) { 374 String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage(); 375 handleException(msg, beginCause); 376 } 377 if (abortCause != null) { 378 String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage(); 379 handleException(msg2, abortCause); 380 } 381 return false; 382 } 383 384 // must use file from exchange as it can be updated due the 385 // preMoveNamePrefix/preMoveNamePostfix options 386 final GenericFile<T> target = getExchangeFileProperty(exchange); 387 // must use full name when downloading so we have the correct path 388 final String name = target.getAbsoluteFilePath(); 389 try { 390 391 if (isRetrieveFile()) { 392 // retrieve the file using the stream 393 log.trace("Retrieving file: {} from: {}", name, endpoint); 394 395 // retrieve the file and check it was a success 396 boolean retrieved; 397 Exception cause = null; 398 try { 399 retrieved = operations.retrieveFile(name, exchange); 400 } catch (Exception e) { 401 retrieved = false; 402 cause = e; 403 } 404 405 if (!retrieved) { 406 if (ignoreCannotRetrieveFile(name, exchange, cause)) { 407 log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name); 408 // remove file from the in progress list as we could not retrieve it, but should ignore 409 endpoint.getInProgressRepository().remove(absoluteFileName); 410 return false; 411 } else { 412 // throw exception to handle the problem with retrieving the file 413 // then if the method return false or throws an exception is handled the same in here 414 // as in both cases an exception is being thrown 415 if (cause != null && cause instanceof GenericFileOperationFailedException) { 416 throw cause; 417 } else { 418 throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause); 419 } 420 } 421 } 422 423 log.trace("Retrieved file: {} from: {}", name, endpoint); 424 } else { 425 log.trace("Skipped retrieval of file: {} from: {}", name, endpoint); 426 exchange.getIn().setBody(null); 427 } 428 429 // register on completion callback that does the completion strategies 430 // (for instance to move the file after we have processed it) 431 exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName)); 432 433 log.debug("About to process file: {} using exchange: {}", target, exchange); 434 435 if (endpoint.isSynchronous()) { 436 // process synchronously 437 getProcessor().process(exchange); 438 } else { 439 // process the exchange using the async consumer to support async routing engine 440 // which can be supported by this file consumer as all the done work is 441 // provided in the GenericFileOnCompletion 442 getAsyncProcessor().process(exchange, new AsyncCallback() { 443 public void done(boolean doneSync) { 444 // noop 445 if (log.isTraceEnabled()) { 446 log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously"); 447 } 448 } 449 }); 450 } 451 452 } catch (Exception e) { 453 // remove file from the in progress list due to failure 454 // (cannot be in finally block due to GenericFileOnCompletion will remove it 455 // from in progress when it takes over and processes the file, which may happen 456 // by another thread at a later time. So its only safe to remove it if there was an exception) 457 endpoint.getInProgressRepository().remove(absoluteFileName); 458 459 String msg = "Error processing file " + file + " due to " + e.getMessage(); 460 handleException(msg, e); 461 } 462 463 return true; 464 } 465 466 /** 467 * Override if required. Files are retrieved / returns true by default 468 * 469 * @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files. 470 */ 471 protected boolean isRetrieveFile() { 472 return true; 473 } 474 475 /** 476 * Processes the exchange using a custom processor. 477 * 478 * @param exchange the exchange 479 * @param processor the custom processor 480 */ 481 protected boolean customProcessExchange(final Exchange exchange, final Processor processor) { 482 GenericFile<T> file = getExchangeFileProperty(exchange); 483 log.trace("Custom processing file: {}", file); 484 485 // must extract the absolute name before the begin strategy as the file could potentially be pre moved 486 // and then the file name would be changed 487 String absoluteFileName = file.getAbsoluteFilePath(); 488 489 try { 490 // process using the custom processor 491 processor.process(exchange); 492 } catch (Exception e) { 493 if (log.isDebugEnabled()) { 494 log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e); 495 } 496 handleException(e); 497 } finally { 498 // always remove file from the in progress list as its no longer in progress 499 // use the original file name that was used to add it to the repository 500 // as the name can be different when using preMove option 501 endpoint.getInProgressRepository().remove(absoluteFileName); 502 } 503 504 return true; 505 } 506 507 /** 508 * Strategy for validating if the given remote file should be included or not 509 * 510 * @param file the file 511 * @param isDirectory whether the file is a directory or a file 512 * @param files files in the directory 513 * @return <tt>true</tt> to include the file, <tt>false</tt> to skip it 514 */ 515 protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) { 516 String absoluteFilePath = file.getAbsoluteFilePath(); 517 518 if (!isMatched(file, isDirectory, files)) { 519 log.trace("File did not match. Will skip this file: {}", file); 520 return false; 521 } 522 523 // directory is always valid 524 if (isDirectory) { 525 return true; 526 } 527 528 // check if file is already in progress 529 if (endpoint.getInProgressRepository().contains(absoluteFilePath)) { 530 if (log.isTraceEnabled()) { 531 log.trace("Skipping as file is already in progress: {}", file.getFileName()); 532 } 533 return false; 534 } 535 536 // if its a file then check we have the file in the idempotent registry already 537 if (endpoint.isIdempotent()) { 538 // use absolute file path as default key, but evaluate if an expression key was configured 539 String key = file.getAbsoluteFilePath(); 540 if (endpoint.getIdempotentKey() != null) { 541 Exchange dummy = endpoint.createExchange(file); 542 key = endpoint.getIdempotentKey().evaluate(dummy, String.class); 543 } 544 if (key != null && endpoint.getIdempotentRepository().contains(key)) { 545 log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file); 546 return false; 547 } 548 } 549 550 // okay so final step is to be able to add atomic as in-progress, so we are the 551 // only thread processing this file 552 return endpoint.getInProgressRepository().add(absoluteFilePath); 553 } 554 555 /** 556 * Strategy to perform file matching based on endpoint configuration. 557 * <p/> 558 * Will always return <tt>false</tt> for certain files/folders: 559 * <ul> 560 * <li>Starting with a dot</li> 561 * <li>lock files</li> 562 * </ul> 563 * And then <tt>true</tt> for directories. 564 * 565 * @param file the file 566 * @param isDirectory whether the file is a directory or a file 567 * @param files files in the directory 568 * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not 569 */ 570 protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) { 571 String name = file.getFileNameOnly(); 572 573 // folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock") 574 if (name.startsWith(".")) { 575 return false; 576 } 577 578 // lock files should be skipped 579 if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) { 580 return false; 581 } 582 583 if (endpoint.getFilter() != null) { 584 if (!endpoint.getFilter().accept(file)) { 585 return false; 586 } 587 } 588 589 if (endpoint.getAntFilter() != null) { 590 if (!endpoint.getAntFilter().accept(file)) { 591 return false; 592 } 593 } 594 595 // directories are regarded as matched if filter accepted them 596 if (isDirectory) { 597 return true; 598 } 599 600 if (ObjectHelper.isNotEmpty(endpoint.getExclude())) { 601 if (name.matches(endpoint.getExclude())) { 602 return false; 603 } 604 } 605 606 if (ObjectHelper.isNotEmpty(endpoint.getInclude())) { 607 if (!name.matches(endpoint.getInclude())) { 608 return false; 609 } 610 } 611 612 // use file expression for a simple dynamic file filter 613 if (endpoint.getFileName() != null) { 614 fileExpressionResult = evaluateFileExpression(); 615 if (fileExpressionResult != null) { 616 if (!name.equals(fileExpressionResult)) { 617 return false; 618 } 619 } 620 } 621 622 // if done file name is enabled, then the file is only valid if a done file exists 623 if (endpoint.getDoneFileName() != null) { 624 // done file must be in same path as the file 625 String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath()); 626 ObjectHelper.notEmpty(doneFileName, "doneFileName", endpoint); 627 628 // is it a done file name? 629 if (endpoint.isDoneFile(file.getFileNameOnly())) { 630 log.trace("Skipping done file: {}", file); 631 return false; 632 } 633 634 if (!isMatched(file, doneFileName, files)) { 635 return false; 636 } 637 } 638 639 return true; 640 } 641 642 /** 643 * Strategy to perform file matching based on endpoint configuration in terms of done file name. 644 * 645 * @param file the file 646 * @param doneFileName the done file name (without any paths) 647 * @param files files in the directory 648 * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not 649 */ 650 protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files); 651 652 /** 653 * Is the given file already in progress. 654 * 655 * @param file the file 656 * @return <tt>true</tt> if the file is already in progress 657 * @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead. 658 */ 659 @Deprecated 660 protected boolean isInProgress(GenericFile<T> file) { 661 String key = file.getAbsoluteFilePath(); 662 // must use add, to have operation as atomic 663 return !endpoint.getInProgressRepository().add(key); 664 } 665 666 protected String evaluateFileExpression() { 667 if (fileExpressionResult == null && endpoint.getFileName() != null) { 668 // create a dummy exchange as Exchange is needed for expression evaluation 669 Exchange dummy = endpoint.createExchange(); 670 fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class); 671 } 672 return fileExpressionResult; 673 } 674 675 @SuppressWarnings("unchecked") 676 private GenericFile<T> getExchangeFileProperty(Exchange exchange) { 677 return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE); 678 } 679 680 @Override 681 protected void doStart() throws Exception { 682 super.doStart(); 683 } 684 685 @Override 686 protected void doStop() throws Exception { 687 prepareOnStartup = false; 688 super.doStop(); 689 } 690}