/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.stc;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.BitSetIterator;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntStack;
import com.google.common.base.Predicate;
import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import org.carrot2.clustering.stc.ClusterCandidate;
import org.carrot2.clustering.stc.GeneralizedSuffixTree;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.analysis.TokenTypeUtils;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.text.clustering.MultilingualClustering;
import org.carrot2.text.linguistic.ILexicalData;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="STCClusteringAlgorithm", inherit={CommonAttributes.class})
@Label(value="STC Clustering")
public final class STCClusteringAlgorithm
extends ProcessingComponentBase
implements IClusteringAlgorithm {
    private static final String BASE_CLUSTERS = "Base clusters";
    private static final String MERGING_AND_OUTPUT = "Merging and output";
    @Processing
    @Input
    @Internal
    @Attribute(key="query", inherit=true)
    public String query = null;
    @Processing
    @Input
    @Required
    @Internal
    @Attribute(key="documents", inherit=true)
    public List<Document> documents;
    @Processing
    @Output
    @Internal
    @Attribute(key="clusters", inherit=true)
    public List<Cluster> clusters = null;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2)
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Word filtering")
    public int ignoreWordIfInFewerDocs = 2;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Word filtering")
    public double ignoreWordIfInHigherDocsPercent = 0.9;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0, max=10.0)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Base clusters")
    public double minBaseClusterScore = 2.0;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Base clusters")
    public int maxBaseClusters = 300;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2, max=20)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Base clusters")
    public int minBaseClusterSize = 2;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1)
    @Level(value=AttributeLevel.BASIC)
    @Group(value="Merging and output")
    public int maxClusters = 15;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Merging and output")
    public double mergeThreshold = 0.6;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Labels")
    public double maxPhraseOverlap = 0.6;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Labels")
    public double mostGeneralPhraseCoverage = 0.5;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1)
    @Level(value=AttributeLevel.BASIC)
    @Group(value="Labels")
    public int maxDescPhraseLength = 4;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1)
    @Level(value=AttributeLevel.BASIC)
    @Group(value="Labels")
    public int maxPhrases = 3;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0)
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Base clusters")
    public double singleTermBoost = 0.5;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1)
    @Level(value=AttributeLevel.BASIC)
    @Group(value="Base clusters")
    public int optimalPhraseLength = 3;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.5)
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Base clusters")
    public double optimalPhraseLengthDev = 2.0;
    @Processing
    @Input
    @Attribute
    @DoubleRange(min=0.0)
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Base clusters")
    public double documentCountBoost = 1.0;
    @Init
    @Input
    @Attribute
    @Internal
    @ImplementingClasses(classes={BasicPreprocessingPipeline.class}, strict=false)
    @Level(value=AttributeLevel.ADVANCED)
    public IPreprocessingPipeline preprocessingPipeline = new BasicPreprocessingPipeline();
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Label(value="Size-Score sorting ratio")
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Clusters")
    public double scoreWeight = 1.0;
    @Input
    @Processing
    @Attribute
    @Label(value="Merge all stem-equivalent phrases when discovering base clusters")
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Clusters")
    public boolean mergeStemEquivalentBaseClusters = true;
    public final MultilingualClustering multilingualClustering = new MultilingualClustering();
    PreprocessingContext context;
    GeneralizedSuffixTree.SequenceBuilder sb;
    private static final Predicate<PhraseCandidate> notSelected = new Predicate<PhraseCandidate>(){

        public boolean apply(PhraseCandidate p) {
            return !p.selected;
        }
    };

    @Override
    public void process() throws ProcessingException {
        List<Document> originalDocuments = this.documents;
        this.clusters = this.multilingualClustering.process(this.documents, new IMonolingualClusteringAlgorithm(){

            @Override
            public List<Cluster> process(List<Document> documents, LanguageCode language) {
                STCClusteringAlgorithm.this.documents = documents;
                STCClusteringAlgorithm.this.cluster(language);
                return STCClusteringAlgorithm.this.clusters;
            }
        });
        this.documents = originalDocuments;
    }

    private void cluster(LanguageCode language) {
        this.clusters = new ArrayList<Cluster>();
        this.context = this.preprocessingPipeline.preprocess(this.documents, this.query, language);
        this.sb = new GeneralizedSuffixTree.SequenceBuilder();
        int[] tokenIndex = this.context.allTokens.wordIndex;
        short[] tokenType = this.context.allTokens.type;
        for (int i = 0; i < tokenIndex.length; ++i) {
            if (tokenIndex[i] == -1) {
                if ((tokenType[i] & 0xA00) == 0) continue;
                this.sb.endDocument();
                continue;
            }
            int s = i;
            while (tokenIndex[i + 1] != -1) {
                ++i;
            }
            int phraseLength = 1 + i - s;
            if (phraseLength < 1) continue;
            this.sb.addPhrase(tokenIndex, s, phraseLength);
        }
        this.sb.buildSuffixTree();
        List<ClusterCandidate> baseClusters = this.createBaseClusters(this.sb);
        ArrayList<ClusterCandidate> mergedClusters = this.createMergedClusters(baseClusters);
        this.postProcessing(mergedClusters);
    }

    @Override
    public void afterProcessing() {
        super.afterProcessing();
        this.context = null;
        this.sb = null;
    }

    private List<ClusterCandidate> createBaseClusters(GeneralizedSuffixTree.SequenceBuilder sb) {
        final ArrayList candidates = Lists.newArrayList();
        new GeneralizedSuffixTree.Visitor(sb, this.minBaseClusterSize){

            @Override
            protected void visit(int state, int cardinality, BitSet documents, IntStack path) {
                assert (cardinality >= STCClusteringAlgorithm.this.minBaseClusterSize);
                if (!STCClusteringAlgorithm.this.checkAcceptablePhrase(path)) {
                    return;
                }
                int effectivePhraseLen = STCClusteringAlgorithm.this.effectivePhraseLength(path);
                if (effectivePhraseLen == 0) {
                    return;
                }
                float score = STCClusteringAlgorithm.this.baseClusterScore(effectivePhraseLen, cardinality);
                candidates.add(new ClusterCandidate(path.toArray(), (BitSet)documents.clone(), cardinality, score));
            }
        }.visit();
        if (this.mergeStemEquivalentBaseClusters) {
            this.mergeStemEquivalentBaseClusters(sb, candidates);
        }
        int j = 0;
        int max = candidates.size();
        for (int i = 0; i < max; ++i) {
            ClusterCandidate cc = (ClusterCandidate)candidates.get(i);
            if (!((double)cc.score >= this.minBaseClusterScore)) continue;
            candidates.set(j++, cc);
        }
        candidates.subList(j, candidates.size()).clear();
        Collections.sort(candidates, new Comparator<ClusterCandidate>(){

            @Override
            public int compare(ClusterCandidate c1, ClusterCandidate c2) {
                return -Float.compare(c1.score, c2.score);
            }
        });
        j = 0;
        ILexicalData lexicalData = this.context.language.getLexicalData();
        int max2 = candidates.size();
        for (int i = 0; i < max2 && j < this.maxBaseClusters; ++i) {
            ClusterCandidate cc = (ClusterCandidate)candidates.get(i);
            assert (cc.phrases.size() == 1);
            if (lexicalData.isStopLabel(this.buildLabel(cc.phrases.get(0)))) continue;
            candidates.set(j++, cc);
        }
        if (j < candidates.size()) {
            candidates.subList(j, candidates.size()).clear();
            assert (candidates.size() == j);
        }
        return candidates;
    }

    private void mergeStemEquivalentBaseClusters(GeneralizedSuffixTree.SequenceBuilder sb, List<ClusterCandidate> candidates) {
        HashMap merged = Maps.newHashMap();
        int j = 0;
        int max = candidates.size();
        for (int i = 0; i < max; ++i) {
            ClusterCandidate cc = candidates.get(i);
            candidates.set(j, cc);
            assert (cc.phrases.size() == 1);
            int[] stemIndices = this.context.allWords.stemIndex;
            int[] phraseWords = cc.phrases.get(0);
            IntArrayList stemList = new IntArrayList(phraseWords.length);
            for (int seqIndex : phraseWords) {
                int termIndex = sb.input.get(seqIndex);
                stemList.add(stemIndices[termIndex]);
            }
            ClusterCandidate equivalent = (ClusterCandidate)merged.get(stemList);
            if (equivalent == null) {
                merged.put(stemList, cc);
                ++j;
                continue;
            }
            if (equivalent.cardinality < cc.cardinality) {
                equivalent.cardinality = cc.cardinality;
                equivalent.phrases.add(0, cc.phrases.get(0));
            } else {
                equivalent.phrases.add(cc.phrases.get(0));
            }
            equivalent.documents.or(cc.documents);
        }
        candidates.subList(j, candidates.size()).clear();
        IntStack scratch = new IntStack();
        for (ClusterCandidate cc : candidates) {
            if (cc.phrases.size() <= 1) continue;
            cc.cardinality = (int)cc.documents.cardinality();
            scratch.buffer = cc.phrases.get(0);
            scratch.elementsCount = scratch.buffer.length;
            cc.score = this.baseClusterScore(this.effectivePhraseLength(scratch), cc.cardinality);
            cc.phrases.subList(1, cc.phrases.size()).clear();
        }
    }

    private ArrayList<ClusterCandidate> createMergedClusters(List<ClusterCandidate> baseClusters) {
        int END = -1;
        IntStack neighborList = new IntStack();
        neighborList.push(-1);
        int[] neighbors = new int[baseClusters.size()];
        float m = (float)this.mergeThreshold;
        for (int i = 0; i < baseClusters.size(); ++i) {
            for (int j = i + 1; j < baseClusters.size(); ++j) {
                ClusterCandidate c1 = baseClusters.get(i);
                ClusterCandidate c2 = baseClusters.get(j);
                float a = c1.cardinality;
                float b = c2.cardinality;
                float c = BitSet.intersectionCount((BitSet)c1.documents, (BitSet)c2.documents);
                if (!(c / a > m) || !(c / b > m)) continue;
                neighborList.push(neighbors[i], j);
                neighbors[i] = neighborList.size() - 2;
                neighborList.push(neighbors[j], i);
                neighbors[j] = neighborList.size() - 2;
            }
        }
        int NO_INDEX = -1;
        int[] merged = new int[baseClusters.size()];
        Arrays.fill(merged, -1);
        ArrayList mergedClusters = Lists.newArrayListWithCapacity((int)baseClusters.size());
        IntStack stack = new IntStack(baseClusters.size());
        IntStack mergeList = new IntStack(baseClusters.size());
        int mergedIndex = 0;
        for (int v = 0; v < baseClusters.size(); ++v) {
            if (merged[v] != -1) continue;
            stack.push(v);
            while (stack.size() > 0) {
                int c = stack.pop();
                assert (merged[c] == -1 || merged[c] == mergedIndex);
                if (merged[c] == mergedIndex) continue;
                merged[c] = mergedIndex;
                mergeList.push(c);
                int i = neighbors[c];
                while (neighborList.get(i) != -1) {
                    int neighbor = neighborList.get(i + 1);
                    if (merged[neighbor] == -1) {
                        stack.push(neighbor);
                    } else assert (merged[neighbor] == mergedIndex);
                    i = neighborList.get(i);
                }
            }
            ++mergedIndex;
            mergedClusters.add(this.merge(mergeList, baseClusters));
            mergeList.clear();
        }
        Collections.sort(mergedClusters, new Comparator<ClusterCandidate>(){

            @Override
            public int compare(ClusterCandidate c1, ClusterCandidate c2) {
                if (c1.score < c2.score) {
                    return 1;
                }
                if (c1.score > c2.score) {
                    return -1;
                }
                if (c1.cardinality < c2.cardinality) {
                    return 1;
                }
                if (c1.cardinality > c2.cardinality) {
                    return -1;
                }
                return 0;
            }
        });
        if (mergedClusters.size() > this.maxClusters) {
            mergedClusters.subList(this.maxClusters, mergedClusters.size()).clear();
        }
        return mergedClusters;
    }

    private ClusterCandidate merge(IntStack mergeList, List<ClusterCandidate> baseClusters) {
        assert (mergeList.size() > 0);
        ClusterCandidate result = new ClusterCandidate();
        for (int i = 0; i < mergeList.size(); ++i) {
            ClusterCandidate cc = baseClusters.get(mergeList.get(i));
            result.documents.or(cc.documents);
            result.score += cc.score;
        }
        result.cardinality = (int)result.documents.cardinality();
        ArrayList<PhraseCandidate> phrases = new ArrayList<PhraseCandidate>(mergeList.size());
        for (int i = 0; i < mergeList.size(); ++i) {
            ClusterCandidate cc = baseClusters.get(mergeList.get(i));
            float coverage = (float)cc.cardinality / (float)result.cardinality;
            phrases.add(new PhraseCandidate(cc, coverage));
        }
        this.markSubSuperPhrases(phrases);
        Collections2.filter(phrases, notSelected).clear();
        this.markOverlappingPhrases(phrases);
        Collections2.filter(phrases, notSelected).clear();
        Collections.sort(phrases, new Comparator<PhraseCandidate>(){

            @Override
            public int compare(PhraseCandidate p1, PhraseCandidate p2) {
                if (p1.coverage < p2.coverage) {
                    return 1;
                }
                if (p1.coverage > p2.coverage) {
                    return -1;
                }
                return 0;
            }
        });
        int max = this.maxPhrases;
        for (PhraseCandidate p : phrases) {
            if (max-- <= 0) break;
            result.phrases.add(p.cluster.phrases.get(0));
        }
        return result;
    }

    private void markSubSuperPhrases(ArrayList<PhraseCandidate> phrases) {
        int i;
        int max = phrases.size();
        IntStack words = new IntStack(this.maxDescPhraseLength * phrases.size());
        IntStack offsets = new IntStack(phrases.size() * 2);
        for (PhraseCandidate p : phrases) {
            this.appendWords(words, offsets, p);
        }
        for (i = 0; i < max; ++i) {
            for (int j = 0; j < max; ++j) {
                int index;
                if (i == j || (index = STCClusteringAlgorithm.indexOf(words.buffer, offsets.get(2 * i), offsets.get(2 * i + 1), words.buffer, offsets.get(2 * j), offsets.get(2 * j + 1))) < 0) continue;
                phrases.get((int)i).mostGeneral = false;
                phrases.get((int)j).mostSpecific = false;
            }
        }
        for (i = 0; i < max; ++i) {
            PhraseCandidate a = phrases.get(i);
            if (!a.mostGeneral) continue;
            for (int j = 0; j < max; ++j) {
                int index;
                PhraseCandidate b = phrases.get(j);
                if (i == j || !b.mostSpecific || (index = STCClusteringAlgorithm.indexOf(words.buffer, offsets.get(2 * j), offsets.get(2 * j + 1), words.buffer, offsets.get(2 * i), offsets.get(2 * i + 1))) < 0 || !((double)(a.coverage - b.coverage) < this.mostGeneralPhraseCoverage)) continue;
                a.selected = false;
                j = max;
            }
        }
        for (PhraseCandidate p : phrases) {
            if (p.mostGeneral || p.mostSpecific) continue;
            p.selected = false;
        }
    }

    private void markOverlappingPhrases(ArrayList<PhraseCandidate> phrases) {
        int max = phrases.size();
        IntStack words = new IntStack(this.maxDescPhraseLength * phrases.size());
        IntStack offsets = new IntStack(phrases.size() * 2);
        for (PhraseCandidate p : phrases) {
            this.appendUniqueWords(words, offsets, p);
        }
        for (int i = 0; i < max; ++i) {
            for (int j = i + 1; j < max; ++j) {
                PhraseCandidate a = phrases.get(i);
                PhraseCandidate b = phrases.get(j);
                int a_words = offsets.get(2 * i + 1);
                int b_words = offsets.get(2 * j + 1);
                float intersection = STCClusteringAlgorithm.computeIntersection(words.buffer, offsets.get(2 * i), a_words, words.buffer, offsets.get(2 * j), b_words);
                if ((double)(intersection / (float)b_words) > this.maxPhraseOverlap && b.coverage < a.coverage) {
                    b.selected = false;
                }
                if (!((double)(intersection / (float)a_words) > this.maxPhraseOverlap) || !(a.coverage < b.coverage)) continue;
                a.selected = false;
            }
        }
    }

    static int computeIntersection(int[] a, int aPos, int aLength, int[] b, int bPos, int bLength) {
        int maxa = aPos + aLength;
        int maxb = bPos + bLength;
        int common = 0;
        while (aPos < maxa && bPos < maxb) {
            int ea = a[aPos];
            int eb = b[bPos];
            if (ea >= eb) {
                ++bPos;
            }
            if (ea <= eb) {
                ++aPos;
            }
            if (ea != eb) continue;
            ++common;
        }
        return common;
    }

    private void appendUniqueWords(IntStack words, IntStack offsets, PhraseCandidate p) {
        assert (p.cluster.phrases.size() == 1);
        int start = words.size();
        int[] phraseIndices = p.cluster.phrases.get(0);
        short[] tokenTypes = this.context.allWords.type;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; ++j) {
                int termIndex = this.sb.input.get(j);
                if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) continue;
                words.push(termIndex);
            }
        }
        Arrays.sort(words.buffer, start, words.size());
        int j = start;
        for (int i = start + 1; i < words.size(); ++i) {
            if (words.buffer[j] == words.buffer[i]) continue;
            words.buffer[++j] = words.buffer[i];
        }
        words.elementsCount = j + 1;
        offsets.push(start, words.size() - start);
    }

    private void appendWords(IntStack words, IntStack offsets, PhraseCandidate p) {
        int start = words.size();
        int[] phraseIndices = p.cluster.phrases.get(0);
        short[] tokenTypes = this.context.allWords.type;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; ++j) {
                int termIndex = this.sb.input.get(j);
                if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) continue;
                words.push(termIndex);
            }
        }
        offsets.push(start, words.size() - start);
    }

    private void postProcessing(List<ClusterCandidate> clusters) {
        BitSet all = new BitSet((long)this.documents.size());
        ArrayList docs = Lists.newArrayListWithCapacity((int)this.documents.size());
        ArrayList phrases = Lists.newArrayListWithCapacity((int)3);
        for (ClusterCandidate c : clusters) {
            Cluster c2 = new Cluster();
            c2.addPhrases(this.collectPhrases(phrases, c));
            c2.addDocuments(this.collectDocuments(docs, c.documents));
            c2.setScore(Double.valueOf(c.score));
            this.clusters.add(c2);
            all.or(c.documents);
            docs.clear();
            phrases.clear();
        }
        Collections.sort(this.clusters, Cluster.byReversedWeightedScoreAndSizeComparator(this.scoreWeight));
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    private List<String> collectPhrases(List<String> l, ClusterCandidate c) {
        assert (l != null);
        for (int[] phraseIndexes : c.phrases) {
            l.add(this.buildLabel(phraseIndexes));
        }
        return l;
    }

    private List<Document> collectDocuments(List<Document> l, BitSet bitset) {
        if (l == null) {
            l = Lists.newArrayListWithCapacity((int)((int)bitset.cardinality()));
        }
        BitSetIterator i = bitset.iterator();
        int d = i.nextSetBit();
        while (d >= 0) {
            l.add(this.documents.get(d));
            d = i.nextSetBit();
        }
        return l;
    }

    private String buildLabel(int[] phraseIndices) {
        int termsCount = 0;
        for (int j = 0; j < phraseIndices.length; j += 2) {
            termsCount += phraseIndices[j + 1] - phraseIndices[j] + 1;
        }
        boolean[] stopwords = new boolean[termsCount];
        char[][] images = new char[termsCount][];
        short[] tokenTypes = this.context.allWords.type;
        int k = 0;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            int j = phraseIndices[i];
            while (j <= phraseIndices[i + 1]) {
                int termIndex = this.sb.input.get(j);
                images[k] = this.context.allWords.image[termIndex];
                stopwords[k] = TokenTypeUtils.isCommon(tokenTypes[termIndex]);
                ++j;
                ++k;
            }
        }
        return LabelFormatter.format(images, stopwords, this.context.language.getLanguageCode().usesSpaceDelimiters());
    }

    private String toString(PhraseCandidate c) {
        return String.format(Locale.ENGLISH, "%3.2f %s %s %s %s", Float.valueOf(c.coverage), this.buildLabel(c.cluster.phrases.get(0)), c.selected ? "S" : "", c.mostGeneral ? "MG" : "", c.mostSpecific ? "MS" : "");
    }

    private String buildDebugLabel(int[] phraseIndices) {
        StringBuilder b = new StringBuilder();
        String sep = "";
        int k = 0;
        short[] tokenTypes = this.context.allWords.type;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            int j = phraseIndices[i];
            while (j <= phraseIndices[i + 1]) {
                b.append(sep);
                int termIndex = this.sb.input.get(j);
                b.append(this.context.allWords.image[termIndex]);
                if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) {
                    b.append("[S]");
                }
                sep = " ";
                ++j;
                ++k;
            }
            sep = "_";
        }
        return b.toString();
    }

    final boolean checkAcceptablePhrase(IntStack path) {
        int j;
        assert (path.size() > 0);
        short[] tokenTypes = this.context.allWords.type;
        int[] terms = this.sb.input.buffer;
        if (TokenTypeUtils.isCommon(tokenTypes[terms[path.get(0)]])) {
            return false;
        }
        int i = path.get(path.size() - 2);
        int k = j = path.get(path.size() - 1);
        while (i <= j && TokenTypeUtils.isCommon(tokenTypes[terms[j]])) {
            --j;
        }
        if (j < i) {
            return false;
        }
        if (j < k) {
            path.buffer[path.size() - 1] = j;
        }
        int termsCount = 0;
        for (j = 0; j < path.size(); j += 2) {
            termsCount += path.get(j + 1) - path.get(j) + 1;
        }
        return termsCount <= this.maxDescPhraseLength;
    }

    final int effectivePhraseLength(IntStack path) {
        int[] terms = this.sb.input.buffer;
        int lower = this.ignoreWordIfInFewerDocs;
        int upper = (int)(this.ignoreWordIfInHigherDocsPercent * (double)this.documents.size());
        int effectivePhraseLen = 0;
        for (int i = 0; i < path.size(); i += 2) {
            for (int j = path.get(i); j <= path.get(i + 1); ++j) {
                int docCount;
                int termIndex = terms[j];
                if (TokenTypeUtils.isCommon(this.context.allWords.type[termIndex]) || (docCount = this.context.allWords.tfByDocument[termIndex].length / 2) < lower || docCount > upper) continue;
                ++effectivePhraseLen;
            }
        }
        return effectivePhraseLen;
    }

    final float baseClusterScore(int phraseLength, int documentCount) {
        double boost;
        if (phraseLength == 1 && this.singleTermBoost > 0.0) {
            boost = this.singleTermBoost;
        } else {
            int tmp = phraseLength - this.optimalPhraseLength;
            boost = Math.exp((double)(-tmp * tmp) / (2.0 * this.optimalPhraseLengthDev * this.optimalPhraseLengthDev));
        }
        return (float)(boost * ((double)documentCount * this.documentCountBoost));
    }

    private static int indexOf(int[] source, int sourceOffset, int sourceCount, int[] target, int targetOffset, int targetCount) {
        if (targetCount == 0) {
            return 0;
        }
        int first = target[targetOffset];
        int max = sourceOffset + (sourceCount - targetCount);
        for (int i = sourceOffset; i <= max; ++i) {
            if (source[i] != first) {
                while (++i <= max && source[i] != first) {
                }
            }
            if (i > max) continue;
            int j = i + 1;
            int end = j + targetCount - 1;
            int k = targetOffset + 1;
            while (j < end && source[j] == target[k]) {
                ++j;
                ++k;
            }
            if (j != end) continue;
            return i - sourceOffset;
        }
        return -1;
    }

    private static final class PhraseCandidate {
        final ClusterCandidate cluster;
        final float coverage;
        boolean selected = true;
        boolean mostGeneral = true;
        boolean mostSpecific = true;

        PhraseCandidate(ClusterCandidate c, float coverage) {
            this.cluster = c;
            this.coverage = coverage;
        }
    }
}

