001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.component.file;
018
019import java.util.ArrayList;
020import java.util.Collections;
021import java.util.Deque;
022import java.util.LinkedList;
023import java.util.List;
024import java.util.Queue;
025
026import org.apache.camel.AsyncCallback;
027import org.apache.camel.Exchange;
028import org.apache.camel.Processor;
029import org.apache.camel.ShutdownRunningTask;
030import org.apache.camel.impl.ScheduledBatchPollingConsumer;
031import org.apache.camel.spi.UriParam;
032import org.apache.camel.util.CastUtils;
033import org.apache.camel.util.ObjectHelper;
034import org.apache.camel.util.StopWatch;
035import org.apache.camel.util.TimeUtils;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * Base class for file consumers.
041 */
042public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer {
043    protected final Logger log = LoggerFactory.getLogger(getClass());
044    protected GenericFileEndpoint<T> endpoint;
045    protected GenericFileOperations<T> operations;
046    protected volatile boolean loggedIn;
047    protected String fileExpressionResult;
048    protected volatile ShutdownRunningTask shutdownRunningTask;
049    protected volatile int pendingExchanges;
050    protected Processor customProcessor;
051    @UriParam
052    protected boolean eagerLimitMaxMessagesPerPoll = true;
053    protected volatile boolean prepareOnStartup;
054
055    public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) {
056        super(endpoint, processor);
057        this.endpoint = endpoint;
058        this.operations = operations;
059    }
060
061    public Processor getCustomProcessor() {
062        return customProcessor;
063    }
064
065    /**
066     * Use a custom processor to process the exchange.
067     * <p/>
068     * Only set this if you need to do custom processing, instead of the regular processing.
069     * <p/>
070     * This is for example used to browse file endpoints by leveraging the file consumer to poll
071     * the directory to gather the list of exchanges. But to avoid processing the files regularly
072     * we can use a custom processor.
073     *
074     * @param processor a custom processor
075     */
076    public void setCustomProcessor(Processor processor) {
077        this.customProcessor = processor;
078    }
079
080    public boolean isEagerLimitMaxMessagesPerPoll() {
081        return eagerLimitMaxMessagesPerPoll;
082    }
083
084    public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) {
085        this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll;
086    }
087
088    /**
089     * Poll for files
090     */
091    protected int poll() throws Exception {
092        // must prepare on startup the very first time
093        if (!prepareOnStartup) {
094            // prepare on startup
095            endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint);
096            prepareOnStartup = true;
097        }
098
099        // must reset for each poll
100        fileExpressionResult = null;
101        shutdownRunningTask = null;
102        pendingExchanges = 0;
103
104        // before we poll is there anything we need to check?
105        // such as are we connected to the FTP Server still?
106        if (!prePollCheck()) {
107            log.debug("Skipping poll as pre poll check returned false");
108            return 0;
109        }
110
111        // gather list of files to process
112        List<GenericFile<T>> files = new ArrayList<GenericFile<T>>();
113        String name = endpoint.getConfiguration().getDirectory();
114
115        // time how long time it takes to poll
116        StopWatch stop = new StopWatch();
117        boolean limitHit;
118        try {
119            limitHit = !pollDirectory(name, files, 0);
120        } catch (Exception e) {
121            // during poll directory we add files to the in progress repository, in case of any exception thrown after this work
122            // we must then drain the in progress files before rethrowing the exception
123            log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress.");
124            removeExcessiveInProgressFiles(files);
125            throw e;
126        }
127
128        long delta = stop.stop();
129        if (log.isDebugEnabled()) {
130            log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name);
131        }
132
133        // log if we hit the limit
134        if (limitHit) {
135            log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll);
136        }
137
138        // sort files using file comparator if provided
139        if (endpoint.getSorter() != null) {
140            Collections.sort(files, endpoint.getSorter());
141        }
142
143        // sort using build in sorters so we can use expressions
144        // use a linked list so we can dequeue the exchanges
145        LinkedList<Exchange> exchanges = new LinkedList<Exchange>();
146        for (GenericFile<T> file : files) {
147            Exchange exchange = endpoint.createExchange(file);
148            endpoint.configureExchange(exchange);
149            endpoint.configureMessage(file, exchange.getIn());
150            exchanges.add(exchange);
151        }
152        // sort files using exchange comparator if provided
153        if (endpoint.getSortBy() != null) {
154            Collections.sort(exchanges, endpoint.getSortBy());
155        }
156
157        // use a queue for the exchanges
158        Deque<Exchange> q = exchanges;
159
160        // we are not eager limiting, but we have configured a limit, so cut the list of files
161        if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) {
162            if (files.size() > maxMessagesPerPoll) {
163                log.debug("Limiting maximum messages to poll at {} files as there was more messages in this poll.", maxMessagesPerPoll);
164                // must first remove excessive files from the in progress repository
165                removeExcessiveInProgressFiles(q, maxMessagesPerPoll);
166            }
167        }
168
169        // consume files one by one
170        int total = exchanges.size();
171        if (total > 0) {
172            log.debug("Total {} files to consume", total);
173        }
174
175        int polledMessages = processBatch(CastUtils.cast(q));
176
177        postPollCheck(polledMessages);
178
179        return polledMessages;
180    }
181
182    public int processBatch(Queue<Object> exchanges) {
183        int total = exchanges.size();
184        int answer = total;
185
186        // limit if needed
187        if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) {
188            log.debug("Limiting to maximum messages to poll {} as there was {} messages in this poll.", maxMessagesPerPoll, total);
189            total = maxMessagesPerPoll;
190        }
191
192        for (int index = 0; index < total && isBatchAllowed(); index++) {
193            // only loop if we are started (allowed to run)
194            // use poll to remove the head so it does not consume memory even after we have processed it
195            Exchange exchange = (Exchange) exchanges.poll();
196            // add current index and total as properties
197            exchange.setProperty(Exchange.BATCH_INDEX, index);
198            exchange.setProperty(Exchange.BATCH_SIZE, total);
199            exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1);
200
201            // update pending number of exchanges
202            pendingExchanges = total - index - 1;
203
204            // process the current exchange
205            boolean started;
206            if (customProcessor != null) {
207                // use a custom processor
208                started = customProcessExchange(exchange, customProcessor);
209            } else {
210                // process the exchange regular
211                started = processExchange(exchange);
212            }
213
214            // if we did not start process the file then decrement the counter
215            if (!started) {
216                answer--;
217            }
218        }
219
220        // drain any in progress files as we are done with this batch
221        removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0);
222
223        return answer;
224    }
225
226    /**
227     * Drain any in progress files as we are done with this batch
228     *
229     * @param exchanges  the exchanges
230     * @param limit      the limit
231     */
232    protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) {
233        // remove the file from the in progress list in case the batch was limited by max messages per poll
234        while (exchanges.size() > limit) {
235            // must remove last
236            Exchange exchange = exchanges.removeLast();
237            GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class);
238            String key = file.getAbsoluteFilePath();
239            endpoint.getInProgressRepository().remove(key);
240        }
241    }
242
243    /**
244     * Drain any in progress files as we are done with the files
245     *
246     * @param files  the files
247     */
248    protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) {
249        for (GenericFile file : files) {
250            String key = file.getAbsoluteFilePath();
251            endpoint.getInProgressRepository().remove(key);
252        }
253    }
254
255    /**
256     * Whether or not we can continue polling for more files
257     *
258     * @param fileList  the current list of gathered files
259     * @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit
260     */
261    public boolean canPollMoreFiles(List<?> fileList) {
262        // at this point we should not limit if we are not eager
263        if (!eagerLimitMaxMessagesPerPoll) {
264            return true;
265        }
266
267        if (maxMessagesPerPoll <= 0) {
268            // no limitation
269            return true;
270        }
271
272        // then only poll if we haven't reached the max limit
273        return fileList.size() < maxMessagesPerPoll;
274    }
275
276    /**
277     * Override if required. Perform some checks (and perhaps actions) before we poll.
278     *
279     * @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll.
280     */
281    protected boolean prePollCheck() throws Exception {
282        return true;
283    }
284
285    /**
286     * Override if required. Perform some checks (and perhaps actions) after we have polled.
287     *
288     * @param polledMessages number of polled messages
289     */
290    protected void postPollCheck(int polledMessages) {
291        // noop
292    }
293
294    /**
295     * Polls the given directory for files to process
296     *
297     * @param fileName current directory or file
298     * @param fileList current list of files gathered
299     * @param depth the current depth of the directory (will start from 0)
300     * @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit
301     */
302    protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth);
303
304    /**
305     * Sets the operations to be used.
306     * <p/>
307     * Can be used to set a fresh operations in case of recovery attempts
308     *
309     * @param operations the operations
310     */
311    public void setOperations(GenericFileOperations<T> operations) {
312        this.operations = operations;
313    }
314
315    /**
316     * Whether to ignore if the file cannot be retrieved.
317     * <p/>
318     * By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved.
319     * <p/>
320     * This method allows to suppress this and just ignore that.
321     *
322     * @param name        the file name
323     * @param exchange    the exchange
324     * @param cause       optional exception occurred during retrieving file
325     * @return <tt>true</tt> to ignore, <tt>false</tt> is the default.
326     */
327    protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) {
328        return false;
329    }
330
331    /**
332     * Processes the exchange
333     *
334     * @param exchange the exchange
335     * @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started
336     * to be processed, for some reason (not found, or aborted etc)
337     */
338    protected boolean processExchange(final Exchange exchange) {
339        GenericFile<T> file = getExchangeFileProperty(exchange);
340        log.trace("Processing file: {}", file);
341
342        // must extract the absolute name before the begin strategy as the file could potentially be pre moved
343        // and then the file name would be changed
344        String absoluteFileName = file.getAbsoluteFilePath();
345
346        // check if we can begin processing the file
347        final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy();
348
349        Exception beginCause = null;
350        boolean begin = false;
351        try {
352            begin = processStrategy.begin(operations, endpoint, exchange, file);
353        } catch (Exception e) {
354            beginCause = e;
355        }
356
357        if (!begin) {
358            // no something was wrong, so we need to abort and remove the file from the in progress list
359            Exception abortCause = null;
360            log.debug("{} cannot begin processing file: {}", endpoint, file);
361            try {
362                // abort
363                processStrategy.abort(operations, endpoint, exchange, file);
364            } catch (Exception e) {
365                abortCause = e;
366            } finally {
367                // begin returned false, so remove file from the in progress list as its no longer in progress
368                endpoint.getInProgressRepository().remove(absoluteFileName);
369            }
370            if (beginCause != null) {
371                String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage();
372                handleException(msg, beginCause);
373            }
374            if (abortCause != null) {
375                String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage();
376                handleException(msg2, abortCause);
377            }
378            return false;
379        }
380
381        // must use file from exchange as it can be updated due the
382        // preMoveNamePrefix/preMoveNamePostfix options
383        final GenericFile<T> target = getExchangeFileProperty(exchange);
384        // must use full name when downloading so we have the correct path
385        final String name = target.getAbsoluteFilePath();
386        try {
387            
388            if (isRetrieveFile()) {
389                // retrieve the file using the stream
390                log.trace("Retrieving file: {} from: {}", name, endpoint);
391    
392                // retrieve the file and check it was a success
393                boolean retrieved;
394                Exception cause = null;
395                try {
396                    retrieved = operations.retrieveFile(name, exchange);
397                } catch (Exception e) {
398                    retrieved = false;
399                    cause = e;
400                }
401
402                if (!retrieved) {
403                    if (ignoreCannotRetrieveFile(name, exchange, cause)) {
404                        log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name);
405                        // remove file from the in progress list as we could not retrieve it, but should ignore
406                        endpoint.getInProgressRepository().remove(absoluteFileName);
407                        return false;
408                    } else {
409                        // throw exception to handle the problem with retrieving the file
410                        // then if the method return false or throws an exception is handled the same in here
411                        // as in both cases an exception is being thrown
412                        if (cause != null && cause instanceof GenericFileOperationFailedException) {
413                            throw cause;
414                        } else {
415                            throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause);
416                        }
417                    }
418                }
419    
420                log.trace("Retrieved file: {} from: {}", name, endpoint);                
421            } else {
422                log.trace("Skipped retrieval of file: {} from: {}", name, endpoint);
423                exchange.getIn().setBody(null);
424            }
425
426            // register on completion callback that does the completion strategies
427            // (for instance to move the file after we have processed it)
428            exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName));
429
430            log.debug("About to process file: {} using exchange: {}", target, exchange);
431
432            if (endpoint.isSynchronous()) {
433                // process synchronously
434                getProcessor().process(exchange);
435            } else {
436                // process the exchange using the async consumer to support async routing engine
437                // which can be supported by this file consumer as all the done work is
438                // provided in the GenericFileOnCompletion
439                getAsyncProcessor().process(exchange, new AsyncCallback() {
440                    public void done(boolean doneSync) {
441                        // noop
442                        if (log.isTraceEnabled()) {
443                            log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously");
444                        }
445                    }
446                });
447            }
448
449        } catch (Exception e) {
450            // remove file from the in progress list due to failure
451            // (cannot be in finally block due to GenericFileOnCompletion will remove it
452            // from in progress when it takes over and processes the file, which may happen
453            // by another thread at a later time. So its only safe to remove it if there was an exception)
454            endpoint.getInProgressRepository().remove(absoluteFileName);
455
456            String msg = "Error processing file " + file + " due to " + e.getMessage();
457            handleException(msg, e);
458        }
459
460        return true;
461    }
462
463    /**
464     * Override if required.  Files are retrieved / returns true by default
465     *
466     * @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files.
467     */
468    protected boolean isRetrieveFile() {
469        return true;
470    }
471
472    /**
473     * Processes the exchange using a custom processor.
474     *
475     * @param exchange the exchange
476     * @param processor the custom processor
477     */
478    protected boolean customProcessExchange(final Exchange exchange, final Processor processor) {
479        GenericFile<T> file = getExchangeFileProperty(exchange);
480        log.trace("Custom processing file: {}", file);
481
482        // must extract the absolute name before the begin strategy as the file could potentially be pre moved
483        // and then the file name would be changed
484        String absoluteFileName = file.getAbsoluteFilePath();
485
486        try {
487            // process using the custom processor
488            processor.process(exchange);
489        } catch (Exception e) {
490            if (log.isDebugEnabled()) {
491                log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e);
492            }
493            handleException(e);
494        } finally {
495            // always remove file from the in progress list as its no longer in progress
496            // use the original file name that was used to add it to the repository
497            // as the name can be different when using preMove option
498            endpoint.getInProgressRepository().remove(absoluteFileName);
499        }
500
501        return true;
502    }
503
504    /**
505     * Strategy for validating if the given remote file should be included or not
506     *
507     * @param file        the file
508     * @param isDirectory whether the file is a directory or a file
509     * @param files       files in the directory
510     * @return <tt>true</tt> to include the file, <tt>false</tt> to skip it
511     */
512    protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) {
513        String absoluteFilePath = file.getAbsoluteFilePath();
514
515        if (!isMatched(file, isDirectory, files)) {
516            log.trace("File did not match. Will skip this file: {}", file);
517            return false;
518        }
519
520        // directory is always valid
521        if (isDirectory) {
522            return true;
523        }
524
525        // check if file is already in progress
526        if (endpoint.getInProgressRepository().contains(absoluteFilePath)) {
527            if (log.isTraceEnabled()) {
528                log.trace("Skipping as file is already in progress: {}", file.getFileName());
529            }
530            return false;
531        }
532
533        // if its a file then check we have the file in the idempotent registry already
534        if (endpoint.isIdempotent()) {
535            // use absolute file path as default key, but evaluate if an expression key was configured
536            String key = file.getAbsoluteFilePath();
537            if (endpoint.getIdempotentKey() != null) {
538                Exchange dummy = endpoint.createExchange(file);
539                key = endpoint.getIdempotentKey().evaluate(dummy, String.class);
540            }
541            if (key != null && endpoint.getIdempotentRepository().contains(key)) {
542                log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file);
543                return false;
544            }
545        }
546
547        // okay so final step is to be able to add atomic as in-progress, so we are the
548        // only thread processing this file
549        return endpoint.getInProgressRepository().add(absoluteFilePath);
550    }
551
552    /**
553     * Strategy to perform file matching based on endpoint configuration.
554     * <p/>
555     * Will always return <tt>false</tt> for certain files/folders:
556     * <ul>
557     * <li>Starting with a dot</li>
558     * <li>lock files</li>
559     * </ul>
560     * And then <tt>true</tt> for directories.
561     *
562     * @param file        the file
563     * @param isDirectory whether the file is a directory or a file
564     * @param files       files in the directory
565     * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
566     */
567    protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) {
568        String name = file.getFileNameOnly();
569
570        // folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock")
571        if (name.startsWith(".")) {
572            return false;
573        }
574
575        // lock files should be skipped
576        if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) {
577            return false;
578        }
579
580        if (endpoint.getFilter() != null) {
581            if (!endpoint.getFilter().accept(file)) {
582                return false;
583            }
584        }
585
586        if (endpoint.getAntFilter() != null) {
587            if (!endpoint.getAntFilter().accept(file)) {
588                return false;
589            }
590        }
591
592        // directories are regarded as matched if filter accepted them
593        if (isDirectory) {
594            return true;
595        }
596
597        if (ObjectHelper.isNotEmpty(endpoint.getExclude())) {
598            if (name.matches(endpoint.getExclude())) {
599                return false;
600            }
601        }
602
603        if (ObjectHelper.isNotEmpty(endpoint.getInclude())) {
604            if (!name.matches(endpoint.getInclude())) {
605                return false;
606            }
607        }
608
609        // use file expression for a simple dynamic file filter
610        if (endpoint.getFileName() != null) {
611            fileExpressionResult = evaluateFileExpression();
612            if (fileExpressionResult != null) {
613                if (!name.equals(fileExpressionResult)) {
614                    return false;
615                }
616            }
617        }
618
619        // if done file name is enabled, then the file is only valid if a done file exists
620        if (endpoint.getDoneFileName() != null) {
621            // done file must be in same path as the file
622            String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath());
623            ObjectHelper.notEmpty(doneFileName, "doneFileName", endpoint);
624
625            // is it a done file name?
626            if (endpoint.isDoneFile(file.getFileNameOnly())) {
627                log.trace("Skipping done file: {}", file);
628                return false;
629            }
630
631            if (!isMatched(file, doneFileName, files)) {
632                return false;
633            }
634        }
635
636        return true;
637    }
638
639    /**
640     * Strategy to perform file matching based on endpoint configuration in terms of done file name.
641     *
642     * @param file         the file
643     * @param doneFileName the done file name (without any paths)
644     * @param files        files in the directory
645     * @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
646     */
647    protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files);
648
649    /**
650     * Is the given file already in progress.
651     *
652     * @param file the file
653     * @return <tt>true</tt> if the file is already in progress
654     * @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead.
655     */
656    @Deprecated
657    protected boolean isInProgress(GenericFile<T> file) {
658        String key = file.getAbsoluteFilePath();
659        // must use add, to have operation as atomic
660        return !endpoint.getInProgressRepository().add(key);
661    }
662
663    protected String evaluateFileExpression() {
664        if (fileExpressionResult == null && endpoint.getFileName() != null) {
665            // create a dummy exchange as Exchange is needed for expression evaluation
666            Exchange dummy = endpoint.createExchange();
667            fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class);
668        }
669        return fileExpressionResult;
670    }
671
672    @SuppressWarnings("unchecked")
673    private GenericFile<T> getExchangeFileProperty(Exchange exchange) {
674        return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE);
675    }
676
677    @Override
678    protected void doStart() throws Exception {
679        super.doStart();
680    }
681
682    @Override
683    protected void doStop() throws Exception {
684        prepareOnStartup = false;
685        super.doStop();
686    }
687}