/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import org.apache.spark.ml.UnaryTransformer;
import org.apache.spark.ml.feature.RegexTokenizer$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.DefaultParamsWritable$class;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable$class;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005\u001dg\u0001B\u0001\u0003\u00015\u0011aBU3hKb$vn[3oSj,'O\u0003\u0002\u0004\t\u00059a-Z1ukJ,'BA\u0003\u0007\u0003\tiGN\u0003\u0002\b\u0011\u0005)1\u000f]1sW*\u0011\u0011BC\u0001\u0007CB\f7\r[3\u000b\u0003-\t1a\u001c:h\u0007\u0001\u00192\u0001\u0001\b+!\u0015y\u0001C\u0005\u000f)\u001b\u0005!\u0011BA\t\u0005\u0005A)f.\u0019:z)J\fgn\u001d4pe6,'\u000f\u0005\u0002\u001439\u0011AcF\u0007\u0002+)\ta#A\u0003tG\u0006d\u0017-\u0003\u0002\u0019+\u00051\u0001K]3eK\u001aL!AG\u000e\u0003\rM#(/\u001b8h\u0015\tAR\u0003E\u0002\u001eKIq!AH\u0012\u000f\u0005}\u0011S\"\u0001\u0011\u000b\u0005\u0005b\u0011A\u0002\u001fs_>$h(C\u0001\u0017\u0013\t!S#A\u0004qC\u000e\\\u0017mZ3\n\u0005\u0019:#aA*fc*\u0011A%\u0006\t\u0003S\u0001i\u0011A\u0001\t\u0003W9j\u0011\u0001\f\u0006\u0003[\u0011\tA!\u001e;jY&\u0011q\u0006\f\u0002\u0016\t\u00164\u0017-\u001e7u!\u0006\u0014\u0018-\\:Xe&$\u0018M\u00197f\u0011!\t\u0004A!b\u0001\n\u0003\u0012\u0014aA;jIV\t!\u0003K\u00021ii\u0002\"!\u000e\u001d\u000e\u0003YR!a\u000e\u0004\u0002\u0015\u0005tgn\u001c;bi&|g.\u0003\u0002:m\t)1+\u001b8dK\u0006\n1(A\u00032]Qr\u0003\u0007\u0003\u0005>\u0001\t\u0005\t\u0015!\u0003\u0013\u0003\u0011)\u0018\u000e\u001a\u0011)\u0007q\"$\bC\u0003A\u0001\u0011\u0005\u0011)\u0001\u0004=S:LGO\u0010\u000b\u0003Q\tCQ!M A\u0002IA3A\u0011\u001b;Q\ryDG\u000f\u0005\u0006\u0001\u0002!\tA\u0012\u000b\u0002Q!\u001aQ\t\u000e\u001e\t\u000f%\u0003!\u0019!C\u0001\u0015\u0006qQ.\u001b8U_.,g\u000eT3oORDW#A&\u0011\u00051{U\"A'\u000b\u00059#\u0011!\u00029be\u0006l\u0017B\u0001)N\u0005!Ie\u000e\u001e)be\u0006l\u0007f\u0001%5u!11\u000b\u0001Q\u0001\n-\u000bq\"\\5o)>\\WM\u001c'f]\u001e$\b\u000e\t\u0015\u0004%RR\u0004\"\u0002,\u0001\t\u00039\u0016!E:fi6Kg\u000eV8lK:dUM\\4uQR\u0011\u0001,W\u0007\u0002\u0001!)!,\u0016a\u00017\u0006)a/\u00197vKB\u0011A\u0003X\u0005\u0003;V\u00111!\u00138uQ\r)FG\u000f\u0005\u0006A\u0002!\t!Y\u0001\u0012O\u0016$X*\u001b8U_.,g\u000eT3oORDW#A.)\u0007}#$\bC\u0004e\u0001\t\u0007I\u0011A3\u0002\t\u001d\f\u0007o]\u000b\u0002MB\u0011AjZ\u0005\u0003Q6\u0013ABQ8pY\u0016\fg\u000eU1sC6D3a\u0019\u001b;\u0011\u0019Y\u0007\u0001)A\u0005M\u0006)q-\u00199tA!\u001a!\u000e\u000e\u001e\t\u000b9\u0004A\u0011A8\u0002\u000fM,GoR1qgR\u0011\u0001\f\u001d\u0005\u000656\u0004\r!\u001d\t\u0003)IL!a]\u000b\u0003\u000f\t{w\u000e\\3b]\"\u001aQ\u000e\u000e\u001e\t\u000bY\u0004A\u0011A<\u0002\u000f\u001d,GoR1qgV\t\u0011\u000fK\u0002viiBqA\u001f\u0001C\u0002\u0013\u000510A\u0004qCR$XM\u001d8\u0016\u0003q\u00042\u0001T?\u0013\u0013\tqXJA\u0003QCJ\fW\u000eK\u0002ziiBq!a\u0001\u0001A\u0003%A0\u0001\u0005qCR$XM\u001d8!Q\u0011\t\t\u0001\u000e\u001e\t\u000f\u0005%\u0001\u0001\"\u0001\u0002\f\u0005Q1/\u001a;QCR$XM\u001d8\u0015\u0007a\u000bi\u0001\u0003\u0004[\u0003\u000f\u0001\rA\u0005\u0015\u0005\u0003\u000f!$\b\u0003\u0004\u0002\u0014\u0001!\tAM\u0001\u000bO\u0016$\b+\u0019;uKJt\u0007\u0006BA\tiiB\u0001\"!\u0007\u0001\u0005\u0004%)!Z\u0001\fi>dun^3sG\u0006\u001cX\rK\u0003\u0002\u0018Q\ni\"\t\u0002\u0002 \u0005)\u0011G\f\u001c/a!9\u00111\u0005\u0001!\u0002\u001b1\u0017\u0001\u0004;p\u0019><XM]2bg\u0016\u0004\u0003&BA\u0011i\u0005u\u0001bBA\u0015\u0001\u0011\u0005\u00111F\u0001\u000fg\u0016$Hk\u001c'po\u0016\u00148-Y:f)\rA\u0016Q\u0006\u0005\u00075\u0006\u001d\u0002\u0019A9)\u000b\u0005\u001dB'!\b\t\r\u0005M\u0002\u0001\"\u0001x\u000399W\r\u001e+p\u0019><XM]2bg\u0016DS!!\r5\u0003;Aq!!\u000f\u0001\t#\nY$A\nde\u0016\fG/\u001a+sC:\u001chm\u001c:n\rVt7-\u0006\u0002\u0002>A)A#a\u0010\u00139%\u0019\u0011\u0011I\u000b\u0003\u0013\u0019+hn\u0019;j_:\f\u0004bBA#\u0001\u0011E\u0013qI\u0001\u0012m\u0006d\u0017\u000eZ1uK&s\u0007/\u001e;UsB,G\u0003BA%\u0003\u001f\u00022\u0001FA&\u0013\r\ti%\u0006\u0002\u0005+:LG\u000f\u0003\u0005\u0002R\u0005\r\u0003\u0019AA*\u0003%Ig\u000e];u)f\u0004X\r\u0005\u0003\u0002V\u0005}SBAA,\u0015\u0011\tI&a\u0017\u0002\u000bQL\b/Z:\u000b\u0007\u0005uc!A\u0002tc2LA!!\u0019\u0002X\tAA)\u0019;b)f\u0004X\rC\u0004\u0002f\u0001!\t&a\u001a\u0002\u001d=,H\u000f];u\t\u0006$\u0018\rV=qKV\u0011\u00111\u000b\u0005\b\u0003W\u0002A\u0011IA7\u0003\u0011\u0019w\u000e]=\u0015\u0007!\ny\u0007\u0003\u0005\u0002r\u0005%\u0004\u0019AA:\u0003\u0015)\u0007\u0010\u001e:b!\ra\u0015QO\u0005\u0004\u0003oj%\u0001\u0003)be\u0006lW*\u00199)\u000b\u0005%D'a\u001f\"\u0005\u0005u\u0014!B\u0019/i9\n\u0004f\u0001\u00015u\u001d9\u00111\u0011\u0002\t\u0002\u0005\u0015\u0015A\u0004*fO\u0016DHk\\6f]&TXM\u001d\t\u0004S\u0005\u001deAB\u0001\u0003\u0011\u0003\tIi\u0005\u0005\u0002\b\u0006-\u0015\u0011SAL!\r!\u0012QR\u0005\u0004\u0003\u001f+\"AB!osJ+g\r\u0005\u0003,\u0003'C\u0013bAAKY\t)B)\u001a4bk2$\b+\u0019:b[N\u0014V-\u00193bE2,\u0007c\u0001\u000b\u0002\u001a&\u0019\u00111T\u000b\u0003\u0019M+'/[1mSj\f'\r\\3\t\u000f\u0001\u000b9\t\"\u0001\u0002 R\u0011\u0011Q\u0011\u0005\t\u0003G\u000b9\t\"\u0011\u0002&\u0006!An\\1e)\rA\u0013q\u0015\u0005\b\u0003S\u000b\t\u000b1\u0001\u0013\u0003\u0011\u0001\u0018\r\u001e5)\u000b\u0005\u0005F'!\b\t\u0015\u0005=\u0016qQA\u0001\n\u0013\t\t,A\u0006sK\u0006$'+Z:pYZ,GCAAZ!\u0011\t),a0\u000e\u0005\u0005]&\u0002BA]\u0003w\u000bA\u0001\\1oO*\u0011\u0011QX\u0001\u0005U\u00064\u0018-\u0003\u0003\u0002B\u0006]&AB(cU\u0016\u001cG\u000fK\u0003\u0002\bR\ni\u0002K\u0003\u0002\u0002R\ni\u0002")
public class RegexTokenizer
extends UnaryTransformer<String, Seq<String>, RegexTokenizer>
implements DefaultParamsWritable {
    private final String uid;
    private final IntParam minTokenLength;
    private final BooleanParam gaps;
    private final Param<String> pattern;
    private final BooleanParam toLowercase;

    public static MLReader<RegexTokenizer> read() {
        return RegexTokenizer$.MODULE$.read();
    }

    public static RegexTokenizer load(String string) {
        return RegexTokenizer$.MODULE$.load(string);
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable$class.write(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable$class.save(this, path);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public IntParam minTokenLength() {
        return this.minTokenLength;
    }

    public RegexTokenizer setMinTokenLength(int value) {
        return (RegexTokenizer)this.set(this.minTokenLength(), BoxesRunTime.boxToInteger((int)value));
    }

    public int getMinTokenLength() {
        return BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
    }

    public BooleanParam gaps() {
        return this.gaps;
    }

    public RegexTokenizer setGaps(boolean value) {
        return (RegexTokenizer)this.set(this.gaps(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getGaps() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps()));
    }

    public Param<String> pattern() {
        return this.pattern;
    }

    public RegexTokenizer setPattern(String value) {
        return (RegexTokenizer)this.set(this.pattern(), value);
    }

    public String getPattern() {
        return this.$(this.pattern());
    }

    public final BooleanParam toLowercase() {
        return this.toLowercase;
    }

    public RegexTokenizer setToLowercase(boolean value) {
        return (RegexTokenizer)this.set(this.toLowercase(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getToLowercase() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.toLowercase()));
    }

    @Override
    public Function1<String, Seq<String>> createTransformFunc() {
        return new Serializable(this){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ RegexTokenizer $outer;

            public final Seq<String> apply(String originStr) {
                Regex re = new StringOps(Predef$.MODULE$.augmentString(this.$outer.$(this.$outer.pattern()))).r();
                String str = BoxesRunTime.unboxToBoolean((Object)this.$outer.$(this.$outer.toLowercase())) ? originStr.toLowerCase() : originStr;
                Seq tokens = BoxesRunTime.unboxToBoolean((Object)this.$outer.$(this.$outer.gaps())) ? Predef$.MODULE$.refArrayOps((Object[])re.split((CharSequence)str)).toSeq() : re.findAllIn((CharSequence)str).toSeq();
                int minLength = BoxesRunTime.unboxToInt((Object)this.$outer.$(this.$outer.minTokenLength()));
                return (Seq)tokens.filter((Function1)new Serializable(this, minLength){
                    public static final long serialVersionUID = 0L;
                    private final int minLength$1;

                    public final boolean apply(String x$2) {
                        return x$2.length() >= this.minLength$1;
                    }
                    {
                        this.minLength$1 = minLength$1;
                    }
                });
            }
            {
                if ($outer == null) {
                    throw null;
                }
                this.$outer = $outer;
            }
        };
    }

    @Override
    public void validateInputType(DataType inputType) {
        DataType dataType = inputType;
        StringType$ stringType$ = StringType$.MODULE$;
        Predef$.MODULE$.require(!(dataType != null ? !dataType.equals(stringType$) : stringType$ != null), (Function0)new Serializable(this, inputType){
            public static final long serialVersionUID = 0L;
            private final DataType inputType$2;

            public final String apply() {
                return new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Input type must be string type but got ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.inputType$2}));
            }
            {
                this.inputType$2 = inputType$2;
            }
        });
    }

    @Override
    public DataType outputDataType() {
        return new ArrayType((DataType)StringType$.MODULE$, true);
    }

    @Override
    public RegexTokenizer copy(ParamMap extra) {
        return (RegexTokenizer)this.defaultCopy(extra);
    }

    public RegexTokenizer(String uid) {
        this.uid = uid;
        MLWritable$class.$init$(this);
        DefaultParamsWritable$class.$init$(this);
        this.minTokenLength = new IntParam((Identifiable)this, "minTokenLength", "minimum token length (>= 0)", (Function1<Object, Object>)ParamValidators$.MODULE$.gtEq(0.0));
        this.gaps = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens");
        this.pattern = new Param(this, "pattern", "regex pattern used for tokenizing");
        this.toLowercase = new BooleanParam(this, "toLowercase", "whether to convert all characters to lowercase before tokenizing.");
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.minTokenLength().$minus$greater(BoxesRunTime.boxToInteger((int)1)), this.gaps().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true)), this.pattern().$minus$greater("\\s+"), this.toLowercase().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true))}));
    }

    public RegexTokenizer() {
        this(Identifiable$.MODULE$.randomUID("regexTok"));
    }
}

