001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.component.file;
018
019import java.util.ArrayList;
020import java.util.Collections;
021import java.util.Deque;
022import java.util.LinkedList;
023import java.util.List;
024import java.util.Queue;
025
026import org.apache.camel.AsyncCallback;
027import org.apache.camel.Exchange;
028import org.apache.camel.Processor;
029import org.apache.camel.ShutdownRunningTask;
030import org.apache.camel.impl.ScheduledBatchPollingConsumer;
031import org.apache.camel.spi.UriParam;
032import org.apache.camel.util.CastUtils;
033import org.apache.camel.util.ObjectHelper;
034import org.apache.camel.util.StopWatch;
035import org.apache.camel.util.TimeUtils;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * Base class for file consumers.
041 */
042public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer {
043    protected final Logger log = LoggerFactory.getLogger(getClass());
044    protected GenericFileEndpoint<T> endpoint;
045    protected GenericFileOperations<T> operations;
046    protected volatile boolean loggedIn;
047    protected String fileExpressionResult;
048    protected volatile ShutdownRunningTask shutdownRunningTask;
049    protected volatile int pendingExchanges;
050    protected Processor customProcessor;
051    @UriParam
052    protected boolean eagerLimitMaxMessagesPerPoll = true;
053    protected volatile boolean prepareOnStartup;
054
055    public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) {
056        super(endpoint, processor);
057        this.endpoint = endpoint;
058        this.operations = operations;
059    }
060
061    public Processor getCustomProcessor() {
062        return customProcessor;
063    }
064
065    /**
066     * Use a custom processor to process the exchange.
067     * <p/>
068     * Only set this if you need to do custom processing, instead of the regular processing.
069     * <p/>
070     * This is for example used to browse file endpoints by leveraging the file consumer to poll
071     * the directory to gather the list of exchanges. But to avoid processing the files regularly
072     * we can use a custom processor.
073     *
074     * @param processor a custom processor
075     */
076    public void setCustomProcessor(Processor processor) {
077        this.customProcessor = processor;
078    }
079
080    public boolean isEagerLimitMaxMessagesPerPoll() {
081        return eagerLimitMaxMessagesPerPoll;
082    }
083
084    public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) {
085        this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll;
086    }
087
088    /**
089     * Poll for files
090     */
091    protected int poll() throws Exception {
092        // must prepare on startup the very first time
093        if (!prepareOnStartup) {
094            // prepare on startup
095            endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint);
096            prepareOnStartup = true;
097        }
098
099        // must reset for each poll
100        fileExpressionResult = null;
101        shutdownRunningTask = null;
102        pendingExchanges = 0;
103
104        // before we poll is there anything we need to check?
105        // such as are we connected to the FTP Server still?
106        if (!prePollCheck()) {
107            log.debug("Skipping poll as pre poll check returned false");
108            return 0;
109        }
110
111        // gather list of files to process
112        List<GenericFile<T>> files = new ArrayList<GenericFile<T>>();
113        String name = endpoint.getConfiguration().getDirectory();
114
115        // time how long time it takes to poll
116        StopWatch stop = new StopWatch();
117        boolean limitHit;
118        try {
119            limitHit = !pollDirectory(name, files, 0);
120        } catch (Exception e) {
121            // during poll directory we add files to the in progress repository, in case of any exception thrown after this work
122            // we must then drain the in progress files before rethrowing the exception
123            log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress.");
124            removeExcessiveInProgressFiles(files);
125            throw e;
126        }
127
128        long delta = stop.stop();
129        if (log.isDebugEnabled()) {
130            log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name);
131        }
132
133        // log if we hit the limit
134        if (limitHit) {
135            log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll);
136        }
137
138        // sort files using file comparator if provided
139        if (endpoint.getSorter() != null) {
140            Collections.sort(files, endpoint.getSorter());
141        }
142
143        // sort using build in sorters so we can use expressions
144        // use a linked list so we can dequeue the exchanges
145        LinkedList<Exchange> exchanges = new LinkedList<Exchange>();
146        for (GenericFile<T> file : files) {
147            Exchange exchange = endpoint.createExchange(file);
148            endpoint.configureExchange(exchange);
149            endpoint.configureMessage(file, exchange.getIn());
150            exchanges.add(exchange);
151        }
152        // sort files using exchange comparator if provided
153        if (endpoint.getSortBy() != null) {
154            Collections.sort(exchanges, endpoint.getSortBy());
155        }
156        if (endpoint.isShuffle()) {
157            Collections.shuffle(exchanges);
158        }
159
160        // use a queue for the exchanges
161        Deque<Exchange> q = exchanges;
162
163        // we are not eager limiting, but we have configured a limit, so cut the list of files
164        if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) {
165            if (files.size() > maxMessagesPerPoll) {
166                log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll);
167                // must first remove excessive files from the in progress repository
168                removeExcessiveInProgressFiles(q, maxMessagesPerPoll);
169            }
170        }
171
172        // consume files one by one
173        int total = exchanges.size();
174        if (total > 0) {
175            log.debug("Total {} files to consume", total);
176        }
177
178        int polledMessages = processBatch(CastUtils.cast(q));
179
180        postPollCheck(polledMessages);
181
182        return polledMessages;
183    }
184
185    public int processBatch(Queue<Object> exchanges) {
186        int total = exchanges.size();
187        int answer = total;
188
189        // limit if needed
190        if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) {
191            log.debug("Limiting to maximum messages to poll {} as there was {} messages in this poll.", maxMessagesPerPoll, total);
192            total = maxMessagesPerPoll;
193        }
194
195        for (int index = 0; index < total && isBatchAllowed(); index++) {
196            // only loop if we are started (allowed to run)
197            // use poll to remove the head so it does not consume memory even after we have processed it
198            Exchange exchange = (Exchange) exchanges.poll();
199            // add current index and total as properties
200            exchange.setProperty(Exchange.BATCH_INDEX, index);
201            exchange.setProperty(Exchange.BATCH_SIZE, total);
202            exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1);
203
204            // update pending number of exchanges
205            pendingExchanges = total - index - 1;
206
207            // process the current exchange
208            boolean started;
209            if (customProcessor != null) {
210                // use a custom processor
211                started = customProcessExchange(exchange, customProcessor);
212            } else {
213                // process the exchange regular
214                started = processExchange(exchange);
215            }
216
217            // if we did not start process the file then decrement the counter
218            if (!started) {
219                answer--;
220            }
221        }
222
223        // drain any in progress files as we are done with this batch
224        removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0);
225
226        return answer;
227    }
228
229    /**
230     * Drain any in progress files as we are done with this batch
231     *
232     * @param exchanges  the exchanges
233     * @param limit      the limit
234     */
235    protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) {
236        // remove the file from the in progress list in case the batch was limited by max messages per poll
237        while (exchanges.size() > limit) {
238            // must remove last
239            Exchange exchange = exchanges.removeLast();
240            GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class);
241            String key = file.getAbsoluteFilePath();
242            endpoint.getInProgressRepository().remove(key);
243        }
244    }
245
246    /**
247     * Drain any in progress files as we are done with the files
248     *
249     * @param files  the files
250     */
251    protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) {
252        for (GenericFile file : files) {
253            String key = file.getAbsoluteFilePath();
254            endpoint.getInProgressRepository().remove(key);
255        }
256    }
257
258    /**
259     * Whether or not we can continue polling for more files
260     *
261     * @param fileList  the current list of gathered files
262     * @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit
263     */
264    public boolean canPollMoreFiles(List<?> fileList) {
265        // at this point we should not limit if we are not eager
266        if (!eagerLimitMaxMessagesPerPoll) {
267            return true;
268        }
269
270        if (maxMessagesPerPoll <= 0) {
271            // no limitation
272            return true;
273        }
274
275        // then only poll if we haven't reached the max limit
276        return fileList.size() < maxMessagesPerPoll;
277    }
278
279    /**
280     * Override if required. Perform some checks (and perhaps actions) before we poll.
281     *
282     * @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll.
283     */
284    protected boolean prePollCheck() throws Exception {
285        return true;
286    }
287
288    /**
289     * Override if required. Perform some checks (and perhaps actions) after we have polled.
290     *
291     * @param polledMessages number of polled messages
292     */
293    protected void postPollCheck(int polledMessages) {
294        // noop
295    }
296
297    /**
298     * Polls the given directory for files to process
299     *
300     * @param fileName current directory or file
301     * @param fileList current list of files gathered
302     * @param depth the current depth of the directory (will start from 0)
303     * @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit
304     */
305    protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth);
306
307    /**
308     * Sets the operations to be used.
309     * <p/>
310     * Can be used to set a fresh operations in case of recovery attempts
311     *
312     * @param operations the operations
313     */
314    public void setOperations(GenericFileOperations<T> operations) {
315        this.operations = operations;
316    }
317
318    /**
319     * Whether to ignore if the file cannot be retrieved.
320     * <p/>
321     * By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved.
322     * <p/>
323     * This method allows to suppress this and just ignore that.
324     *
325     * @param name        the file name
326     * @param exchange    the exchange
327     * @param cause       optional exception occurred during retrieving file
328     * @return <tt>true</tt> to ignore, <tt>false</tt> is the default.
329     */
330    protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) {
331        return false;
332    }
333
334    /**
335     * Processes the exchange
336     *
337     * @param exchange the exchange
338     * @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started
339     * to be processed, for some reason (not found, or aborted etc)
340     */
341    protected boolean processExchange(final Exchange exchange) {
342        GenericFile<T> file = getExchangeFileProperty(exchange);
343        log.trace("Processing file: {}", file);
344
345        // must extract the absolute name before the begin strategy as the file could potentially be pre moved
346        // and then the file name would be changed
347        String absoluteFileName = file.getAbsoluteFilePath();
348
349        // check if we can begin processing the file
350        final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy();
351
352        Exception beginCause = null;
353        boolean begin = false;
354        try {
355            begin = processStrategy.begin(operations, endpoint, exchange, file);
356        } catch (Exception e) {
357            beginCause = e;
358        }
359
360        if (!begin) {
361            // no something was wrong, so we need to abort and remove the file from the in progress list
362            Exception abortCause = null;
363            log.debug("{} cannot begin processing file: {}", endpoint, file);
364            try {
365                // abort
366                processStrategy.abort(operations, endpoint, exchange, file);
367            } catch (Exception e) {
368                abortCause = e;
369            } finally {
370                // begin returned false, so remove file from the in progress list as its no longer in progress
371                endpoint.getInProgressRepository().remove(absoluteFileName);
372            }
373            if (beginCause != null) {
374                String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage();
375                handleException(msg, beginCause);
376            }
377            if (abortCause != null) {
378                String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage();
379                handleException(msg2, abortCause);
380            }
381            return false;
382        }
383
384        // must use file from exchange as it can be updated due the
385        // preMoveNamePrefix/preMoveNamePostfix options
386        final GenericFile<T> target = getExchangeFileProperty(exchange);
387        // must use full name when downloading so we have the correct path
388        final String name = target.getAbsoluteFilePath();
389        try {
390            
391            if (isRetrieveFile()) {
392                // retrieve the file using the stream
393                log.trace("Retrieving file: {} from: {}", name, endpoint);
394    
395                // retrieve the file and check it was a success
396                boolean retrieved;
397                Exception cause = null;
398                try {
399                    retrieved = operations.retrieveFile(name, exchange);
400                } catch (Exception e) {
401                    retrieved = false;
402                    cause = e;
403                }
404
405                if (!retrieved) {
406                    if (ignoreCannotRetrieveFile(name, exchange, cause)) {
407                        log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name);
408                        // remove file from the in progress list as we could not retrieve it, but should ignore
409                        endpoint.getInProgressRepository().remove(absoluteFileName);
410                        return false;
411                    } else {
412                        // throw exception to handle the problem with retrieving the file
413                        // then if the method return false or throws an exception is handled the same in here
414                        // as in both cases an exception is being thrown
415                        if (cause != null && cause instanceof GenericFileOperationFailedException) {
416                            throw cause;
417                        } else {
418                            throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause);
419                        }
420                    }
421                }
422    
423                log.trace("Retrieved file: {} from: {}", name, endpoint);                
424            } else {
425                log.trace("Skipped retrieval of file: {} from: {}", name, endpoint);
426                exchange.getIn().setBody(null);
427            }
428
429            // register on completion callback that does the completion strategies
430            // (for instance to move the file after we have processed it)
431            exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName));
432
433            log.debug("About to process file: {} using exchange: {}", target, exchange);
434
435            if (endpoint.isSynchronous()) {
436                // process synchronously
437                getProcessor().process(exchange);
438            } else {
439                // process the exchange using the async consumer to support async routing engine
440                // which can be supported by this file consumer as all the done work is
441                // provided in the GenericFileOnCompletion
442                getAsyncProcessor().process(exchange, new AsyncCallback() {
443                    public void done(boolean doneSync) {
444                        // noop
445                        if (log.isTraceEnabled()) {
446                            log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously");
447                        }
448                    }
449                });
450            }
451
452        } catch (Exception e) {
453            // remove file from the in progress list due to failure
454            // (cannot be in finally block due to GenericFileOnCompletion will remove it
455            // from in progress when it takes over and processes the file, which may happen
456            // by another thread at a later time. So its only safe to remove it if there was an exception)
457            endpoint.getInProgressRepository().remove(absoluteFileName);
458
459            String msg = "Error processing file " + file + " due to " + e.getMessage();
460            handleException(msg, e);
461        }
462
463        return true;
464    }
465
466    /**
467     * Override if required.  Files are retrieved / returns true by default
468     *
469     * @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files.
470     */
471    protected boolean isRetrieveFile() {
472        return true;
473    }
474
475    /**
476     * Processes the exchange using a custom processor.
477     *
478     * @param exchange the exchange
479     * @param processor the custom processor
480     */
481    protected boolean customProcessExchange(final Exchange exchange, final Processor processor) {
482        GenericFile<T> file = getExchangeFileProperty(exchange);
483        log.trace("Custom processing file: {}", file);
484
485        // must extract the absolute name before the begin strategy as the file could potentially be pre moved
486        // and then the file name would be changed
487        String absoluteFileName = file.getAbsoluteFilePath();
488
489        try {
490            // process using the custom processor
491            processor.process(exchange);
492        } catch (Exception e) {
493            if (log.isDebugEnabled()) {
494                log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e);
495            }
496            handleException(e);
497        } finally {
498            // always remove file from the in progress list as its no longer in progress
499            // use the original file name that was used to add it to the repository
500            // as the name can be different when using preMove option
501            endpoint.getInProgressRepository().remove(absoluteFileName);
502        }
503
504        return true;
505    }
506
507    /**
508     * Strategy for validating if the given remote file should be included or not
509     *
510     * @param file        the file
511     * @param isDirectory whether the file is a directory or a file
512     * @param files       files in the directory
513     * @return <tt>true</tt> to include the file, <tt>false</tt> to skip it
514     */
515    protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) {
516        String absoluteFilePath = file.getAbsoluteFilePath();
517
518        if (!isMatched(file, isDirectory, files)) {
519            log.trace("File did not match. Will skip this file: {}", file);
520            return false;
521        }
522
523        // directory is always valid
524        if (isDirectory) {
525            return true;
526        }
527
528        // check if file is already in progress
529        if (endpoint.getInProgressRepository().contains(absoluteFilePath)) {
530            if (log.isTraceEnabled()) {
531                log.trace("Skipping as file is already in progress: {}", file.getFileName());
532            }
533            return false;
534        }
535
536        // if its a file then check we have the file in the idempotent registry already
537        if (endpoint.isIdempotent()) {
538            // use absolute file path as default key, but evaluate if an expression key was configured
539            String key = file.getAbsoluteFilePath();
540            if (endpoint.getIdempotentKey() != null) {
541                Exchange dummy = endpoint.createExchange(file);
542                key = endpoint.getIdempotentKey().evaluate(dummy, String.class);
543            }
544            if (key != null && endpoint.getIdempotentRepository().contains(key)) {
545                log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file);
546                return false;
547            }
548        }
549
550        // okay so final step is to be able to add atomic as in-progress, so we are the
551        // only thread processing this file
552        return endpoint.getInProgressRepository().add(absoluteFilePath);
553    }
554
555    /**
556     * Strategy to perform file matching based on endpoint configuration.
557     * <p/>
558     * Will always return <tt>false</tt> for certain files/folders:
559     * <ul>
560     * <li>Starting with a dot</li>
561     * <li>lock files</li>
562     * </ul>
563     * And then <tt>true</tt> for directories.
564     *
565     * @param file        the file
566     * @param isDirectory whether the file is a directory or a file
567     * @param files       files in the directory
568     * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
569     */
570    protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) {
571        String name = file.getFileNameOnly();
572
573        // folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock")
574        if (name.startsWith(".")) {
575            return false;
576        }
577
578        // lock files should be skipped
579        if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) {
580            return false;
581        }
582
583        if (endpoint.getFilter() != null) {
584            if (!endpoint.getFilter().accept(file)) {
585                return false;
586            }
587        }
588
589        if (endpoint.getAntFilter() != null) {
590            if (!endpoint.getAntFilter().accept(file)) {
591                return false;
592            }
593        }
594
595        // directories are regarded as matched if filter accepted them
596        if (isDirectory) {
597            return true;
598        }
599
600        if (ObjectHelper.isNotEmpty(endpoint.getExclude())) {
601            if (name.matches(endpoint.getExclude())) {
602                return false;
603            }
604        }
605
606        if (ObjectHelper.isNotEmpty(endpoint.getInclude())) {
607            if (!name.matches(endpoint.getInclude())) {
608                return false;
609            }
610        }
611
612        // use file expression for a simple dynamic file filter
613        if (endpoint.getFileName() != null) {
614            fileExpressionResult = evaluateFileExpression();
615            if (fileExpressionResult != null) {
616                if (!name.equals(fileExpressionResult)) {
617                    return false;
618                }
619            }
620        }
621
622        // if done file name is enabled, then the file is only valid if a done file exists
623        if (endpoint.getDoneFileName() != null) {
624            // done file must be in same path as the file
625            String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath());
626            ObjectHelper.notEmpty(doneFileName, "doneFileName", endpoint);
627
628            // is it a done file name?
629            if (endpoint.isDoneFile(file.getFileNameOnly())) {
630                log.trace("Skipping done file: {}", file);
631                return false;
632            }
633
634            if (!isMatched(file, doneFileName, files)) {
635                return false;
636            }
637        }
638
639        return true;
640    }
641
642    /**
643     * Strategy to perform file matching based on endpoint configuration in terms of done file name.
644     *
645     * @param file         the file
646     * @param doneFileName the done file name (without any paths)
647     * @param files        files in the directory
648     * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
649     */
650    protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files);
651
652    /**
653     * Is the given file already in progress.
654     *
655     * @param file the file
656     * @return <tt>true</tt> if the file is already in progress
657     * @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead.
658     */
659    @Deprecated
660    protected boolean isInProgress(GenericFile<T> file) {
661        String key = file.getAbsoluteFilePath();
662        // must use add, to have operation as atomic
663        return !endpoint.getInProgressRepository().add(key);
664    }
665
666    protected String evaluateFileExpression() {
667        if (fileExpressionResult == null && endpoint.getFileName() != null) {
668            // create a dummy exchange as Exchange is needed for expression evaluation
669            Exchange dummy = endpoint.createExchange();
670            fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class);
671        }
672        return fileExpressionResult;
673    }
674
675    @SuppressWarnings("unchecked")
676    private GenericFile<T> getExchangeFileProperty(Exchange exchange) {
677        return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE);
678    }
679
680    @Override
681    protected void doStart() throws Exception {
682        super.doStart();
683    }
684
685    @Override
686    protected void doStop() throws Exception {
687        prepareOnStartup = false;
688        super.doStop();
689    }
690}