/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier.df.data;

import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Random;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.DataLoader;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.data.DescriptorUtils;
import org.apache.mahout.classifier.df.data.Instance;
import org.apache.mahout.classifier.df.data.Utils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.junit.Test;

public final class DataLoaderTest
extends MahoutTestCase {
    private Random rng;

    @Override
    public void setUp() throws Exception {
        super.setUp();
        this.rng = RandomUtils.getRandom();
    }

    @Test
    public void testLoadDataWithDescriptor() throws Exception {
        int nbAttributes = 10;
        int datasize = 100;
        String descriptor = Utils.randomDescriptor(this.rng, nbAttributes);
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor((CharSequence)descriptor);
        double[][] data = Utils.randomDoubles(this.rng, descriptor, false, datasize);
        ArrayList missings = Lists.newArrayList();
        String[] sData = this.prepareData(data, attrs, missings);
        Dataset dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (String[])sData);
        Data loaded = DataLoader.loadData((Dataset)dataset, (String[])sData);
        DataLoaderTest.testLoadedData(data, attrs, missings, loaded);
        DataLoaderTest.testLoadedDataset(data, attrs, missings, loaded);
        data = Utils.randomDoubles(this.rng, descriptor, true, datasize);
        missings = Lists.newArrayList();
        sData = this.prepareData(data, attrs, missings);
        dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)true, (String[])sData);
        loaded = DataLoader.loadData((Dataset)dataset, (String[])sData);
        DataLoaderTest.testLoadedData(data, attrs, missings, loaded);
        DataLoaderTest.testLoadedDataset(data, attrs, missings, loaded);
    }

    @Test
    public void testGenerateDataset() throws Exception {
        int nbAttributes = 10;
        int datasize = 100;
        String descriptor = Utils.randomDescriptor(this.rng, nbAttributes);
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor((CharSequence)descriptor);
        double[][] data = Utils.randomDoubles(this.rng, descriptor, false, datasize);
        ArrayList missings = Lists.newArrayList();
        String[] sData = this.prepareData(data, attrs, missings);
        Dataset expected = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (String[])sData);
        Dataset dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (String[])sData);
        DataLoaderTest.assertEquals((Object)expected, (Object)dataset);
        data = Utils.randomDoubles(this.rng, descriptor, true, datasize);
        missings = Lists.newArrayList();
        sData = this.prepareData(data, attrs, missings);
        expected = DataLoader.generateDataset((CharSequence)descriptor, (boolean)true, (String[])sData);
        dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)true, (String[])sData);
        DataLoaderTest.assertEquals((Object)expected, (Object)dataset);
    }

    private String[] prepareData(double[][] data, Dataset.Attribute[] attrs, Collection<Integer> missings) {
        int nbAttributes = attrs.length;
        String[] sData = new String[data.length];
        for (int index = 0; index < data.length; ++index) {
            int missingAttr;
            if (this.rng.nextDouble() < 0.0) {
                missings.add(index);
                while (attrs[missingAttr = this.rng.nextInt(nbAttributes)].isIgnored()) {
                }
            } else {
                missingAttr = -1;
            }
            StringBuilder builder = new StringBuilder();
            for (int attr = 0; attr < nbAttributes; ++attr) {
                if (attr == missingAttr) {
                    builder.append('?').append(',');
                    continue;
                }
                builder.append(data[index][attr]).append(',');
            }
            sData[index] = builder.toString();
        }
        return sData;
    }

    static void testLoadedData(double[][] data, Dataset.Attribute[] attrs, Collection<Integer> missings, Data loaded) {
        int nbAttributes = attrs.length;
        DataLoaderTest.assertEquals((String)"number of instance", (long)(data.length - missings.size()), (long)loaded.size());
        int lind = 0;
        for (int index = 0; index < data.length; ++index) {
            if (missings.contains(index)) continue;
            double[] vector = data[index];
            Instance instance = loaded.get(lind);
            int aId = 0;
            for (int attr = 0; attr < nbAttributes; ++attr) {
                if (attrs[attr].isIgnored()) continue;
                if (attrs[attr].isNumerical()) {
                    DataLoaderTest.assertEquals((double)vector[attr], (double)instance.get(aId), (double)1.0E-6);
                    ++aId;
                    continue;
                }
                if (attrs[attr].isCategorical()) {
                    DataLoaderTest.checkCategorical(data, missings, loaded, attr, aId, vector[attr], instance.get(aId));
                    ++aId;
                    continue;
                }
                if (!attrs[attr].isLabel()) continue;
                if (loaded.getDataset().isNumerical(aId)) {
                    DataLoaderTest.assertEquals((double)vector[attr], (double)instance.get(aId), (double)1.0E-6);
                } else {
                    DataLoaderTest.checkCategorical(data, missings, loaded, attr, aId, vector[attr], instance.get(aId));
                }
                ++aId;
            }
            ++lind;
        }
    }

    static void testLoadedDataset(double[][] data, Dataset.Attribute[] attrs, Collection<Integer> missings, Data loaded) {
        int nbAttributes = attrs.length;
        int iId = 0;
        for (int index = 0; index < data.length; ++index) {
            if (missings.contains(index)) continue;
            Instance instance = loaded.get(iId++);
            int aId = 0;
            for (int attr = 0; attr < nbAttributes; ++attr) {
                String oValue;
                double nValue;
                if (attrs[attr].isIgnored()) continue;
                if (attrs[attr].isLabel()) {
                    if (!loaded.getDataset().isNumerical(aId)) {
                        nValue = instance.get(aId);
                        oValue = Double.toString(data[index][attr]);
                        DataLoaderTest.assertEquals((double)loaded.getDataset().valueOf(aId, oValue), (double)nValue, (double)1.0E-6);
                    }
                } else {
                    DataLoaderTest.assertEquals((Object)attrs[attr].isNumerical(), (Object)loaded.getDataset().isNumerical(aId));
                    if (attrs[attr].isCategorical()) {
                        nValue = instance.get(aId);
                        oValue = Double.toString(data[index][attr]);
                        DataLoaderTest.assertEquals((double)loaded.getDataset().valueOf(aId, oValue), (double)nValue, (double)1.0E-6);
                    }
                }
                ++aId;
            }
        }
    }

    @Test
    public void testLoadDataFromFile() throws Exception {
        int nbAttributes = 10;
        int datasize = 100;
        String descriptor = Utils.randomDescriptor(this.rng, nbAttributes);
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor((CharSequence)descriptor);
        double[][] source = Utils.randomDoubles(this.rng, descriptor, false, datasize);
        ArrayList missings = Lists.newArrayList();
        String[] sData = this.prepareData(source, attrs, missings);
        Dataset dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (String[])sData);
        Path dataPath = Utils.writeDataToTestFile(sData);
        FileSystem fs = dataPath.getFileSystem(this.getConfiguration());
        Data loaded = DataLoader.loadData((Dataset)dataset, (FileSystem)fs, (Path)dataPath);
        DataLoaderTest.testLoadedData(source, attrs, missings, loaded);
        source = Utils.randomDoubles(this.rng, descriptor, true, datasize);
        missings = Lists.newArrayList();
        sData = this.prepareData(source, attrs, missings);
        dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)true, (String[])sData);
        dataPath = Utils.writeDataToTestFile(sData);
        fs = dataPath.getFileSystem(this.getConfiguration());
        loaded = DataLoader.loadData((Dataset)dataset, (FileSystem)fs, (Path)dataPath);
        DataLoaderTest.testLoadedData(source, attrs, missings, loaded);
    }

    @Test
    public void testGenerateDatasetFromFile() throws Exception {
        int nbAttributes = 10;
        int datasize = 100;
        String descriptor = Utils.randomDescriptor(this.rng, nbAttributes);
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor((CharSequence)descriptor);
        double[][] source = Utils.randomDoubles(this.rng, descriptor, false, datasize);
        ArrayList missings = Lists.newArrayList();
        String[] sData = this.prepareData(source, attrs, missings);
        Dataset expected = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (String[])sData);
        Path path = Utils.writeDataToTestFile(sData);
        FileSystem fs = path.getFileSystem(this.getConfiguration());
        Dataset dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (FileSystem)fs, (Path)path);
        DataLoaderTest.assertEquals((Object)expected, (Object)dataset);
        source = Utils.randomDoubles(this.rng, descriptor, false, datasize);
        missings = Lists.newArrayList();
        sData = this.prepareData(source, attrs, missings);
        expected = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (String[])sData);
        path = Utils.writeDataToTestFile(sData);
        fs = path.getFileSystem(this.getConfiguration());
        dataset = DataLoader.generateDataset((CharSequence)descriptor, (boolean)false, (FileSystem)fs, (Path)path);
        DataLoaderTest.assertEquals((Object)expected, (Object)dataset);
    }

    static void checkCategorical(double[][] source, Collection<Integer> missings, Data loaded, int attr, int aId, double oValue, double nValue) {
        int lind = 0;
        for (int index = 0; index < source.length; ++index) {
            if (missings.contains(index)) continue;
            if (source[index][attr] == oValue) {
                DataLoaderTest.assertEquals((double)nValue, (double)loaded.get(lind).get(aId), (double)1.0E-6);
            } else {
                DataLoaderTest.assertFalse((nValue == loaded.get(lind).get(aId) ? 1 : 0) != 0);
            }
            ++lind;
        }
    }
}

