/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.google.common.io.Closeables;
import java.io.Closeable;
import java.net.URI;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.vectorizer.DocumentProcessor;
import org.junit.Test;

@ThreadLeakScope(value=ThreadLeakScope.Scope.NONE)
public class DocumentProcessorTest
extends MahoutTestCase {
    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testTokenizeDocuments() throws Exception {
        Configuration configuration = this.getConfiguration();
        Path input = new Path(this.getTestTempDirPath(), "inputDir");
        Path output = new Path(this.getTestTempDirPath(), "outputDir");
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)configuration);
        String documentId1 = "123";
        String documentId2 = "456";
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, configuration, input, Text.class, Text.class);
        try {
            String text1 = "A test for the document processor";
            writer.append((Writable)new Text(documentId1), (Writable)new Text(text1));
            String text2 = "and another one";
            writer.append((Writable)new Text(documentId2), (Writable)new Text(text2));
        }
        finally {
            Closeables.close((Closeable)writer, (boolean)false);
        }
        DocumentProcessor.tokenizeDocuments((Path)input, StandardAnalyzer.class, (Path)output, (Configuration)configuration);
        FileStatus[] statuses = fs.listStatus(output, PathFilters.logsCRCFilter());
        DocumentProcessorTest.assertEquals((long)1L, (long)statuses.length);
        Path filePath = statuses[0].getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, configuration);
        Text key = (Text)ClassUtils.instantiateAs((Class)reader.getKeyClass(), Text.class);
        StringTuple value = (StringTuple)ClassUtils.instantiateAs((Class)reader.getValueClass(), StringTuple.class);
        reader.next((Writable)key, (Writable)value);
        DocumentProcessorTest.assertEquals((Object)documentId1, (Object)key.toString());
        DocumentProcessorTest.assertEquals(Arrays.asList("test", "document", "processor"), (Object)value.getEntries());
        reader.next((Writable)key, (Writable)value);
        DocumentProcessorTest.assertEquals((Object)documentId2, (Object)key.toString());
        DocumentProcessorTest.assertEquals(Arrays.asList("another", "one"), (Object)value.getEntries());
    }
}

