/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.util.LinkedList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.vectorizer.DictionaryVectorizerTest;
import org.apache.mahout.vectorizer.RandomDocumentGenerator;
import org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles;
import org.junit.Test;

@ThreadLeakScope(value=ThreadLeakScope.Scope.NONE)
public class SparseVectorsFromSequenceFilesTest
extends MahoutTestCase {
    private static final int NUM_DOCS = 100;
    private Configuration conf;
    private Path inputPath;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void setupDocs() throws IOException {
        this.conf = this.getConfiguration();
        this.inputPath = this.getTestTempFilePath("documents/docs.file");
        FileSystem fs = FileSystem.get((URI)this.inputPath.toUri(), (Configuration)this.conf);
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, this.conf, this.inputPath, Text.class, Text.class);
        RandomDocumentGenerator gen = new RandomDocumentGenerator();
        try {
            for (int i = 0; i < 100; ++i) {
                writer.append((Writable)new Text("Document::ID::" + i), (Writable)new Text(gen.getRandomDocument()));
            }
        }
        finally {
            Closeables.close((Closeable)writer, (boolean)false);
        }
    }

    @Test
    public void testCreateTermFrequencyVectors() throws Exception {
        this.setupDocs();
        this.runTest(false, false, false, -1.0, 100);
    }

    @Test
    public void testCreateTermFrequencyVectorsNam() throws Exception {
        this.setupDocs();
        this.runTest(false, false, true, -1.0, 100);
    }

    @Test
    public void testCreateTermFrequencyVectorsSeq() throws Exception {
        this.setupDocs();
        this.runTest(false, true, false, -1.0, 100);
    }

    @Test
    public void testCreateTermFrequencyVectorsSeqNam() throws Exception {
        this.setupDocs();
        this.runTest(false, true, true, -1.0, 100);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testPruning() throws Exception {
        this.conf = this.getConfiguration();
        this.inputPath = this.getTestTempFilePath("documents/docs.file");
        FileSystem fs = FileSystem.get((URI)this.inputPath.toUri(), (Configuration)this.conf);
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, this.conf, this.inputPath, Text.class, Text.class);
        String[] docs = new String[]{"a b c", "a a a a a b", "a a a a a c"};
        try {
            for (int i = 0; i < docs.length; ++i) {
                writer.append((Writable)new Text("Document::ID::" + i), (Writable)new Text(docs[i]));
            }
        }
        finally {
            Closeables.close((Closeable)writer, (boolean)false);
        }
        Path outPath = this.runTest(false, false, false, 2.0, docs.length);
        Path tfidfVectors = new Path(outPath, "tfidf-vectors");
        int count = 0;
        Vector[] res = new Vector[docs.length];
        for (VectorWritable value : new SequenceFileDirValueIterable(tfidfVectors, PathType.LIST, PathFilters.partFilter(), null, true, this.conf)) {
            Vector v = value.get();
            System.out.println(v);
            SparseVectorsFromSequenceFilesTest.assertEquals((long)2L, (long)v.size());
            res[count] = v;
            ++count;
        }
        SparseVectorsFromSequenceFilesTest.assertEquals((long)docs.length, (long)count);
        SparseVectorsFromSequenceFilesTest.assertEquals((long)2L, (long)res[0].getNumNondefaultElements());
        SparseVectorsFromSequenceFilesTest.assertEquals((long)1L, (long)res[1].getNumNondefaultElements());
        SparseVectorsFromSequenceFilesTest.assertEquals((long)1L, (long)res[2].getNumNondefaultElements());
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testPruningTF() throws Exception {
        this.conf = this.getConfiguration();
        FileSystem fs = FileSystem.get((Configuration)this.conf);
        this.inputPath = this.getTestTempFilePath("documents/docs.file");
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, this.conf, this.inputPath, Text.class, Text.class);
        String[] docs = new String[]{"a b c", "a a a a a b", "a a a a a c"};
        try {
            for (int i = 0; i < docs.length; ++i) {
                writer.append((Writable)new Text("Document::ID::" + i), (Writable)new Text(docs[i]));
            }
        }
        finally {
            Closeables.close((Closeable)writer, (boolean)false);
        }
        Path outPath = this.runTest(true, false, false, 2.0, docs.length);
        Path tfVectors = new Path(outPath, "tf-vectors");
        int count = 0;
        Vector[] res = new Vector[docs.length];
        for (VectorWritable value : new SequenceFileDirValueIterable(tfVectors, PathType.LIST, PathFilters.partFilter(), null, true, this.conf)) {
            Vector v = value.get();
            System.out.println(v);
            SparseVectorsFromSequenceFilesTest.assertEquals((long)2L, (long)v.size());
            res[count] = v;
            ++count;
        }
        SparseVectorsFromSequenceFilesTest.assertEquals((long)docs.length, (long)count);
        SparseVectorsFromSequenceFilesTest.assertEquals((long)2L, (long)res[0].getNumNondefaultElements());
        SparseVectorsFromSequenceFilesTest.assertEquals((long)1L, (long)res[1].getNumNondefaultElements());
        SparseVectorsFromSequenceFilesTest.assertEquals((long)1L, (long)res[2].getNumNondefaultElements());
    }

    private Path runTest(boolean tfWeighting, boolean sequential, boolean named, double maxDFSigma, int numDocs) throws Exception {
        Path outputPath = this.getTestTempFilePath("output");
        LinkedList argList = Lists.newLinkedList();
        argList.add("-i");
        argList.add(this.inputPath.toString());
        argList.add("-o");
        argList.add(outputPath.toString());
        if (sequential) {
            argList.add("-seq");
        }
        if (named) {
            argList.add("-nv");
        }
        if (maxDFSigma >= 0.0) {
            argList.add("--maxDFSigma");
            argList.add(String.valueOf(maxDFSigma));
        }
        if (tfWeighting) {
            argList.add("--weight");
            argList.add("tf");
        }
        String[] args = argList.toArray(new String[argList.size()]);
        ToolRunner.run((Configuration)this.getConfiguration(), (Tool)new SparseVectorsFromSequenceFiles(), (String[])args);
        Path tfVectors = new Path(outputPath, "tf-vectors");
        Path tfidfVectors = new Path(outputPath, "tfidf-vectors");
        DictionaryVectorizerTest.validateVectors(this.conf, numDocs, tfVectors, sequential, named);
        if (!tfWeighting) {
            DictionaryVectorizerTest.validateVectors(this.conf, numDocs, tfidfVectors, sequential, named);
        }
        return outputPath;
    }
}

