001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.component.file; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.Deque; 022import java.util.LinkedList; 023import java.util.List; 024import java.util.Queue; 025 026import org.apache.camel.AsyncCallback; 027import org.apache.camel.Exchange; 028import org.apache.camel.Processor; 029import org.apache.camel.ShutdownRunningTask; 030import org.apache.camel.impl.ScheduledBatchPollingConsumer; 031import org.apache.camel.spi.UriParam; 032import org.apache.camel.util.CastUtils; 033import org.apache.camel.util.ObjectHelper; 034import org.apache.camel.util.StopWatch; 035import org.apache.camel.util.TimeUtils; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039/** 040 * Base class for file consumers. 041 */ 042public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer { 043 protected final Logger log = LoggerFactory.getLogger(getClass()); 044 protected GenericFileEndpoint<T> endpoint; 045 protected GenericFileOperations<T> operations; 046 protected volatile boolean loggedIn; 047 protected String fileExpressionResult; 048 protected volatile ShutdownRunningTask shutdownRunningTask; 049 protected volatile int pendingExchanges; 050 protected Processor customProcessor; 051 @UriParam 052 protected boolean eagerLimitMaxMessagesPerPoll = true; 053 protected volatile boolean prepareOnStartup; 054 055 public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) { 056 super(endpoint, processor); 057 this.endpoint = endpoint; 058 this.operations = operations; 059 } 060 061 public Processor getCustomProcessor() { 062 return customProcessor; 063 } 064 065 /** 066 * Use a custom processor to process the exchange. 067 * <p/> 068 * Only set this if you need to do custom processing, instead of the regular processing. 069 * <p/> 070 * This is for example used to browse file endpoints by leveraging the file consumer to poll 071 * the directory to gather the list of exchanges. But to avoid processing the files regularly 072 * we can use a custom processor. 073 * 074 * @param processor a custom processor 075 */ 076 public void setCustomProcessor(Processor processor) { 077 this.customProcessor = processor; 078 } 079 080 public boolean isEagerLimitMaxMessagesPerPoll() { 081 return eagerLimitMaxMessagesPerPoll; 082 } 083 084 public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) { 085 this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll; 086 } 087 088 /** 089 * Poll for files 090 */ 091 protected int poll() throws Exception { 092 // must prepare on startup the very first time 093 if (!prepareOnStartup) { 094 // prepare on startup 095 endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint); 096 prepareOnStartup = true; 097 } 098 099 // must reset for each poll 100 fileExpressionResult = null; 101 shutdownRunningTask = null; 102 pendingExchanges = 0; 103 104 // before we poll is there anything we need to check? 105 // such as are we connected to the FTP Server still? 106 if (!prePollCheck()) { 107 log.debug("Skipping poll as pre poll check returned false"); 108 return 0; 109 } 110 111 // gather list of files to process 112 List<GenericFile<T>> files = new ArrayList<GenericFile<T>>(); 113 String name = endpoint.getConfiguration().getDirectory(); 114 115 // time how long time it takes to poll 116 StopWatch stop = new StopWatch(); 117 boolean limitHit; 118 try { 119 limitHit = !pollDirectory(name, files, 0); 120 } catch (Exception e) { 121 // during poll directory we add files to the in progress repository, in case of any exception thrown after this work 122 // we must then drain the in progress files before rethrowing the exception 123 log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress."); 124 removeExcessiveInProgressFiles(files); 125 throw e; 126 } 127 128 long delta = stop.stop(); 129 if (log.isDebugEnabled()) { 130 log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name); 131 } 132 133 // log if we hit the limit 134 if (limitHit) { 135 log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll); 136 } 137 138 // sort files using file comparator if provided 139 if (endpoint.getSorter() != null) { 140 Collections.sort(files, endpoint.getSorter()); 141 } 142 143 // sort using build in sorters so we can use expressions 144 // use a linked list so we can dequeue the exchanges 145 LinkedList<Exchange> exchanges = new LinkedList<Exchange>(); 146 for (GenericFile<T> file : files) { 147 Exchange exchange = endpoint.createExchange(file); 148 endpoint.configureExchange(exchange); 149 endpoint.configureMessage(file, exchange.getIn()); 150 exchanges.add(exchange); 151 } 152 // sort files using exchange comparator if provided 153 if (endpoint.getSortBy() != null) { 154 Collections.sort(exchanges, endpoint.getSortBy()); 155 } 156 157 // use a queue for the exchanges 158 Deque<Exchange> q = exchanges; 159 160 // we are not eager limiting, but we have configured a limit, so cut the list of files 161 if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) { 162 if (files.size() > maxMessagesPerPoll) { 163 log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll); 164 // must first remove excessive files from the in progress repository 165 removeExcessiveInProgressFiles(q, maxMessagesPerPoll); 166 } 167 } 168 169 // consume files one by one 170 int total = exchanges.size(); 171 if (total > 0) { 172 log.debug("Total {} files to consume", total); 173 } 174 175 int polledMessages = processBatch(CastUtils.cast(q)); 176 177 postPollCheck(polledMessages); 178 179 return polledMessages; 180 } 181 182 public int processBatch(Queue<Object> exchanges) { 183 int total = exchanges.size(); 184 int answer = total; 185 186 // limit if needed 187 if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) { 188 log.debug("Limiting to maximum messages to poll {} as there was {} messages in this poll.", maxMessagesPerPoll, total); 189 total = maxMessagesPerPoll; 190 } 191 192 for (int index = 0; index < total && isBatchAllowed(); index++) { 193 // only loop if we are started (allowed to run) 194 // use poll to remove the head so it does not consume memory even after we have processed it 195 Exchange exchange = (Exchange) exchanges.poll(); 196 // add current index and total as properties 197 exchange.setProperty(Exchange.BATCH_INDEX, index); 198 exchange.setProperty(Exchange.BATCH_SIZE, total); 199 exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1); 200 201 // update pending number of exchanges 202 pendingExchanges = total - index - 1; 203 204 // process the current exchange 205 boolean started; 206 if (customProcessor != null) { 207 // use a custom processor 208 started = customProcessExchange(exchange, customProcessor); 209 } else { 210 // process the exchange regular 211 started = processExchange(exchange); 212 } 213 214 // if we did not start process the file then decrement the counter 215 if (!started) { 216 answer--; 217 } 218 } 219 220 // drain any in progress files as we are done with this batch 221 removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0); 222 223 return answer; 224 } 225 226 /** 227 * Drain any in progress files as we are done with this batch 228 * 229 * @param exchanges the exchanges 230 * @param limit the limit 231 */ 232 protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) { 233 // remove the file from the in progress list in case the batch was limited by max messages per poll 234 while (exchanges.size() > limit) { 235 // must remove last 236 Exchange exchange = exchanges.removeLast(); 237 GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class); 238 String key = file.getAbsoluteFilePath(); 239 endpoint.getInProgressRepository().remove(key); 240 } 241 } 242 243 /** 244 * Drain any in progress files as we are done with the files 245 * 246 * @param files the files 247 */ 248 protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) { 249 for (GenericFile file : files) { 250 String key = file.getAbsoluteFilePath(); 251 endpoint.getInProgressRepository().remove(key); 252 } 253 } 254 255 /** 256 * Whether or not we can continue polling for more files 257 * 258 * @param fileList the current list of gathered files 259 * @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit 260 */ 261 public boolean canPollMoreFiles(List<?> fileList) { 262 // at this point we should not limit if we are not eager 263 if (!eagerLimitMaxMessagesPerPoll) { 264 return true; 265 } 266 267 if (maxMessagesPerPoll <= 0) { 268 // no limitation 269 return true; 270 } 271 272 // then only poll if we haven't reached the max limit 273 return fileList.size() < maxMessagesPerPoll; 274 } 275 276 /** 277 * Override if required. Perform some checks (and perhaps actions) before we poll. 278 * 279 * @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll. 280 */ 281 protected boolean prePollCheck() throws Exception { 282 return true; 283 } 284 285 /** 286 * Override if required. Perform some checks (and perhaps actions) after we have polled. 287 * 288 * @param polledMessages number of polled messages 289 */ 290 protected void postPollCheck(int polledMessages) { 291 // noop 292 } 293 294 /** 295 * Polls the given directory for files to process 296 * 297 * @param fileName current directory or file 298 * @param fileList current list of files gathered 299 * @param depth the current depth of the directory (will start from 0) 300 * @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit 301 */ 302 protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth); 303 304 /** 305 * Sets the operations to be used. 306 * <p/> 307 * Can be used to set a fresh operations in case of recovery attempts 308 * 309 * @param operations the operations 310 */ 311 public void setOperations(GenericFileOperations<T> operations) { 312 this.operations = operations; 313 } 314 315 /** 316 * Whether to ignore if the file cannot be retrieved. 317 * <p/> 318 * By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved. 319 * <p/> 320 * This method allows to suppress this and just ignore that. 321 * 322 * @param name the file name 323 * @param exchange the exchange 324 * @param cause optional exception occurred during retrieving file 325 * @return <tt>true</tt> to ignore, <tt>false</tt> is the default. 326 */ 327 protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) { 328 return false; 329 } 330 331 /** 332 * Processes the exchange 333 * 334 * @param exchange the exchange 335 * @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started 336 * to be processed, for some reason (not found, or aborted etc) 337 */ 338 protected boolean processExchange(final Exchange exchange) { 339 GenericFile<T> file = getExchangeFileProperty(exchange); 340 log.trace("Processing file: {}", file); 341 342 // must extract the absolute name before the begin strategy as the file could potentially be pre moved 343 // and then the file name would be changed 344 String absoluteFileName = file.getAbsoluteFilePath(); 345 346 // check if we can begin processing the file 347 final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy(); 348 349 Exception beginCause = null; 350 boolean begin = false; 351 try { 352 begin = processStrategy.begin(operations, endpoint, exchange, file); 353 } catch (Exception e) { 354 beginCause = e; 355 } 356 357 if (!begin) { 358 // no something was wrong, so we need to abort and remove the file from the in progress list 359 Exception abortCause = null; 360 log.debug("{} cannot begin processing file: {}", endpoint, file); 361 try { 362 // abort 363 processStrategy.abort(operations, endpoint, exchange, file); 364 } catch (Exception e) { 365 abortCause = e; 366 } finally { 367 // begin returned false, so remove file from the in progress list as its no longer in progress 368 endpoint.getInProgressRepository().remove(absoluteFileName); 369 } 370 if (beginCause != null) { 371 String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage(); 372 handleException(msg, beginCause); 373 } 374 if (abortCause != null) { 375 String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage(); 376 handleException(msg2, abortCause); 377 } 378 return false; 379 } 380 381 // must use file from exchange as it can be updated due the 382 // preMoveNamePrefix/preMoveNamePostfix options 383 final GenericFile<T> target = getExchangeFileProperty(exchange); 384 // must use full name when downloading so we have the correct path 385 final String name = target.getAbsoluteFilePath(); 386 try { 387 388 if (isRetrieveFile()) { 389 // retrieve the file using the stream 390 log.trace("Retrieving file: {} from: {}", name, endpoint); 391 392 // retrieve the file and check it was a success 393 boolean retrieved; 394 Exception cause = null; 395 try { 396 retrieved = operations.retrieveFile(name, exchange); 397 } catch (Exception e) { 398 retrieved = false; 399 cause = e; 400 } 401 402 if (!retrieved) { 403 if (ignoreCannotRetrieveFile(name, exchange, cause)) { 404 log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name); 405 // remove file from the in progress list as we could not retrieve it, but should ignore 406 endpoint.getInProgressRepository().remove(absoluteFileName); 407 return false; 408 } else { 409 // throw exception to handle the problem with retrieving the file 410 // then if the method return false or throws an exception is handled the same in here 411 // as in both cases an exception is being thrown 412 if (cause != null && cause instanceof GenericFileOperationFailedException) { 413 throw cause; 414 } else { 415 throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause); 416 } 417 } 418 } 419 420 log.trace("Retrieved file: {} from: {}", name, endpoint); 421 } else { 422 log.trace("Skipped retrieval of file: {} from: {}", name, endpoint); 423 exchange.getIn().setBody(null); 424 } 425 426 // register on completion callback that does the completion strategies 427 // (for instance to move the file after we have processed it) 428 exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName)); 429 430 log.debug("About to process file: {} using exchange: {}", target, exchange); 431 432 if (endpoint.isSynchronous()) { 433 // process synchronously 434 getProcessor().process(exchange); 435 } else { 436 // process the exchange using the async consumer to support async routing engine 437 // which can be supported by this file consumer as all the done work is 438 // provided in the GenericFileOnCompletion 439 getAsyncProcessor().process(exchange, new AsyncCallback() { 440 public void done(boolean doneSync) { 441 // noop 442 if (log.isTraceEnabled()) { 443 log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously"); 444 } 445 } 446 }); 447 } 448 449 } catch (Exception e) { 450 // remove file from the in progress list due to failure 451 // (cannot be in finally block due to GenericFileOnCompletion will remove it 452 // from in progress when it takes over and processes the file, which may happen 453 // by another thread at a later time. So its only safe to remove it if there was an exception) 454 endpoint.getInProgressRepository().remove(absoluteFileName); 455 456 String msg = "Error processing file " + file + " due to " + e.getMessage(); 457 handleException(msg, e); 458 } 459 460 return true; 461 } 462 463 /** 464 * Override if required. Files are retrieved / returns true by default 465 * 466 * @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files. 467 */ 468 protected boolean isRetrieveFile() { 469 return true; 470 } 471 472 /** 473 * Processes the exchange using a custom processor. 474 * 475 * @param exchange the exchange 476 * @param processor the custom processor 477 */ 478 protected boolean customProcessExchange(final Exchange exchange, final Processor processor) { 479 GenericFile<T> file = getExchangeFileProperty(exchange); 480 log.trace("Custom processing file: {}", file); 481 482 // must extract the absolute name before the begin strategy as the file could potentially be pre moved 483 // and then the file name would be changed 484 String absoluteFileName = file.getAbsoluteFilePath(); 485 486 try { 487 // process using the custom processor 488 processor.process(exchange); 489 } catch (Exception e) { 490 if (log.isDebugEnabled()) { 491 log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e); 492 } 493 handleException(e); 494 } finally { 495 // always remove file from the in progress list as its no longer in progress 496 // use the original file name that was used to add it to the repository 497 // as the name can be different when using preMove option 498 endpoint.getInProgressRepository().remove(absoluteFileName); 499 } 500 501 return true; 502 } 503 504 /** 505 * Strategy for validating if the given remote file should be included or not 506 * 507 * @param file the file 508 * @param isDirectory whether the file is a directory or a file 509 * @param files files in the directory 510 * @return <tt>true</tt> to include the file, <tt>false</tt> to skip it 511 */ 512 protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) { 513 String absoluteFilePath = file.getAbsoluteFilePath(); 514 515 if (!isMatched(file, isDirectory, files)) { 516 log.trace("File did not match. Will skip this file: {}", file); 517 return false; 518 } 519 520 // directory is always valid 521 if (isDirectory) { 522 return true; 523 } 524 525 // check if file is already in progress 526 if (endpoint.getInProgressRepository().contains(absoluteFilePath)) { 527 if (log.isTraceEnabled()) { 528 log.trace("Skipping as file is already in progress: {}", file.getFileName()); 529 } 530 return false; 531 } 532 533 // if its a file then check we have the file in the idempotent registry already 534 if (endpoint.isIdempotent()) { 535 // use absolute file path as default key, but evaluate if an expression key was configured 536 String key = file.getAbsoluteFilePath(); 537 if (endpoint.getIdempotentKey() != null) { 538 Exchange dummy = endpoint.createExchange(file); 539 key = endpoint.getIdempotentKey().evaluate(dummy, String.class); 540 } 541 if (key != null && endpoint.getIdempotentRepository().contains(key)) { 542 log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file); 543 return false; 544 } 545 } 546 547 // okay so final step is to be able to add atomic as in-progress, so we are the 548 // only thread processing this file 549 return endpoint.getInProgressRepository().add(absoluteFilePath); 550 } 551 552 /** 553 * Strategy to perform file matching based on endpoint configuration. 554 * <p/> 555 * Will always return <tt>false</tt> for certain files/folders: 556 * <ul> 557 * <li>Starting with a dot</li> 558 * <li>lock files</li> 559 * </ul> 560 * And then <tt>true</tt> for directories. 561 * 562 * @param file the file 563 * @param isDirectory whether the file is a directory or a file 564 * @param files files in the directory 565 * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not 566 */ 567 protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) { 568 String name = file.getFileNameOnly(); 569 570 // folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock") 571 if (name.startsWith(".")) { 572 return false; 573 } 574 575 // lock files should be skipped 576 if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) { 577 return false; 578 } 579 580 if (endpoint.getFilter() != null) { 581 if (!endpoint.getFilter().accept(file)) { 582 return false; 583 } 584 } 585 586 if (endpoint.getAntFilter() != null) { 587 if (!endpoint.getAntFilter().accept(file)) { 588 return false; 589 } 590 } 591 592 // directories are regarded as matched if filter accepted them 593 if (isDirectory) { 594 return true; 595 } 596 597 if (ObjectHelper.isNotEmpty(endpoint.getExclude())) { 598 if (name.matches(endpoint.getExclude())) { 599 return false; 600 } 601 } 602 603 if (ObjectHelper.isNotEmpty(endpoint.getInclude())) { 604 if (!name.matches(endpoint.getInclude())) { 605 return false; 606 } 607 } 608 609 // use file expression for a simple dynamic file filter 610 if (endpoint.getFileName() != null) { 611 fileExpressionResult = evaluateFileExpression(); 612 if (fileExpressionResult != null) { 613 if (!name.equals(fileExpressionResult)) { 614 return false; 615 } 616 } 617 } 618 619 // if done file name is enabled, then the file is only valid if a done file exists 620 if (endpoint.getDoneFileName() != null) { 621 // done file must be in same path as the file 622 String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath()); 623 ObjectHelper.notEmpty(doneFileName, "doneFileName", endpoint); 624 625 // is it a done file name? 626 if (endpoint.isDoneFile(file.getFileNameOnly())) { 627 log.trace("Skipping done file: {}", file); 628 return false; 629 } 630 631 if (!isMatched(file, doneFileName, files)) { 632 return false; 633 } 634 } 635 636 return true; 637 } 638 639 /** 640 * Strategy to perform file matching based on endpoint configuration in terms of done file name. 641 * 642 * @param file the file 643 * @param doneFileName the done file name (without any paths) 644 * @param files files in the directory 645 * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not 646 */ 647 protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files); 648 649 /** 650 * Is the given file already in progress. 651 * 652 * @param file the file 653 * @return <tt>true</tt> if the file is already in progress 654 * @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead. 655 */ 656 @Deprecated 657 protected boolean isInProgress(GenericFile<T> file) { 658 String key = file.getAbsoluteFilePath(); 659 // must use add, to have operation as atomic 660 return !endpoint.getInProgressRepository().add(key); 661 } 662 663 protected String evaluateFileExpression() { 664 if (fileExpressionResult == null && endpoint.getFileName() != null) { 665 // create a dummy exchange as Exchange is needed for expression evaluation 666 Exchange dummy = endpoint.createExchange(); 667 fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class); 668 } 669 return fileExpressionResult; 670 } 671 672 @SuppressWarnings("unchecked") 673 private GenericFile<T> getExchangeFileProperty(Exchange exchange) { 674 return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE); 675 } 676 677 @Override 678 protected void doStart() throws Exception { 679 super.doStart(); 680 } 681 682 @Override 683 protected void doStop() throws Exception { 684 prepareOnStartup = false; 685 super.doStop(); 686 } 687}