/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import org.apache.spark.annotation.Experimental;
import org.apache.spark.ml.UnaryTransformer;
import org.apache.spark.ml.feature.RegexTokenizer$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.DefaultParamsWritable$class;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable$class;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

@Experimental
@ScalaSignature(bytes="\u0006\u0001\u0005ee\u0001B\u0001\u0003\u00015\u0011aBU3hKb$vn[3oSj,'O\u0003\u0002\u0004\t\u00059a-Z1ukJ,'BA\u0003\u0007\u0003\tiGN\u0003\u0002\b\u0011\u0005)1\u000f]1sW*\u0011\u0011BC\u0001\u0007CB\f7\r[3\u000b\u0003-\t1a\u001c:h\u0007\u0001\u00192\u0001\u0001\b+!\u0015y\u0001C\u0005\u000f)\u001b\u0005!\u0011BA\t\u0005\u0005A)f.\u0019:z)J\fgn\u001d4pe6,'\u000f\u0005\u0002\u001439\u0011AcF\u0007\u0002+)\ta#A\u0003tG\u0006d\u0017-\u0003\u0002\u0019+\u00051\u0001K]3eK\u001aL!AG\u000e\u0003\rM#(/\u001b8h\u0015\tAR\u0003E\u0002\u001eKIq!AH\u0012\u000f\u0005}\u0011S\"\u0001\u0011\u000b\u0005\u0005b\u0011A\u0002\u001fs_>$h(C\u0001\u0017\u0013\t!S#A\u0004qC\u000e\\\u0017mZ3\n\u0005\u0019:#aA*fc*\u0011A%\u0006\t\u0003S\u0001i\u0011A\u0001\t\u0003W9j\u0011\u0001\f\u0006\u0003[\u0011\tA!\u001e;jY&\u0011q\u0006\f\u0002\u0016\t\u00164\u0017-\u001e7u!\u0006\u0014\u0018-\\:Xe&$\u0018M\u00197f\u0011!\t\u0004A!b\u0001\n\u0003\u0012\u0014aA;jIV\t!\u0003\u0003\u00055\u0001\t\u0005\t\u0015!\u0003\u0013\u0003\u0011)\u0018\u000e\u001a\u0011\t\u000bY\u0002A\u0011A\u001c\u0002\rqJg.\u001b;?)\tA\u0003\bC\u00032k\u0001\u0007!\u0003C\u00037\u0001\u0011\u0005!\bF\u0001)\u0011\u001da\u0004A1A\u0005\u0002u\na\"\\5o)>\\WM\u001c'f]\u001e$\b.F\u0001?!\ty$)D\u0001A\u0015\t\tE!A\u0003qCJ\fW.\u0003\u0002D\u0001\nA\u0011J\u001c;QCJ\fW\u000e\u0003\u0004F\u0001\u0001\u0006IAP\u0001\u0010[&tGk\\6f]2+gn\u001a;iA!)q\t\u0001C\u0001\u0011\u0006\t2/\u001a;NS:$vn[3o\u0019\u0016tw\r\u001e5\u0015\u0005%SU\"\u0001\u0001\t\u000b-3\u0005\u0019\u0001'\u0002\u000bY\fG.^3\u0011\u0005Qi\u0015B\u0001(\u0016\u0005\rIe\u000e\u001e\u0005\u0006!\u0002!\t!U\u0001\u0012O\u0016$X*\u001b8U_.,g\u000eT3oORDW#\u0001'\t\u000fM\u0003!\u0019!C\u0001)\u0006!q-\u00199t+\u0005)\u0006CA W\u0013\t9\u0006I\u0001\u0007C_>dW-\u00198QCJ\fW\u000e\u0003\u0004Z\u0001\u0001\u0006I!V\u0001\u0006O\u0006\u00048\u000f\t\u0005\u00067\u0002!\t\u0001X\u0001\bg\u0016$x)\u00199t)\tIU\fC\u0003L5\u0002\u0007a\f\u0005\u0002\u0015?&\u0011\u0001-\u0006\u0002\b\u0005>|G.Z1o\u0011\u0015\u0011\u0007\u0001\"\u0001d\u0003\u001d9W\r^$baN,\u0012A\u0018\u0005\bK\u0002\u0011\r\u0011\"\u0001g\u0003\u001d\u0001\u0018\r\u001e;fe:,\u0012a\u001a\t\u0004\u007f!\u0014\u0012BA5A\u0005\u0015\u0001\u0016M]1n\u0011\u0019Y\u0007\u0001)A\u0005O\u0006A\u0001/\u0019;uKJt\u0007\u0005C\u0003n\u0001\u0011\u0005a.\u0001\u0006tKR\u0004\u0016\r\u001e;fe:$\"!S8\t\u000b-c\u0007\u0019\u0001\n\t\u000bE\u0004A\u0011\u0001\u001a\u0002\u0015\u001d,G\u000fU1ui\u0016\u0014h\u000eC\u0004t\u0001\t\u0007IQ\u0001+\u0002\u0017Q|Gj\\<fe\u000e\f7/\u001a\u0005\u0007k\u0002\u0001\u000bQB+\u0002\u0019Q|Gj\\<fe\u000e\f7/\u001a\u0011\t\u000b]\u0004A\u0011\u0001=\u0002\u001dM,G\u000fV8M_^,'oY1tKR\u0011\u0011*\u001f\u0005\u0006\u0017Z\u0004\rA\u0018\u0005\u0006w\u0002!\taY\u0001\u000fO\u0016$Hk\u001c'po\u0016\u00148-Y:f\u0011\u0015i\b\u0001\"\u0015\u007f\u0003M\u0019'/Z1uKR\u0013\u0018M\\:g_Jlg)\u001e8d+\u0005y\b#\u0002\u000b\u0002\u0002Ia\u0012bAA\u0002+\tIa)\u001e8di&|g.\r\u0005\b\u0003\u000f\u0001A\u0011KA\u0005\u0003E1\u0018\r\\5eCR,\u0017J\u001c9viRK\b/\u001a\u000b\u0005\u0003\u0017\t\t\u0002E\u0002\u0015\u0003\u001bI1!a\u0004\u0016\u0005\u0011)f.\u001b;\t\u0011\u0005M\u0011Q\u0001a\u0001\u0003+\t\u0011\"\u001b8qkR$\u0016\u0010]3\u0011\t\u0005]\u0011\u0011E\u0007\u0003\u00033QA!a\u0007\u0002\u001e\u0005)A/\u001f9fg*\u0019\u0011q\u0004\u0004\u0002\u0007M\fH.\u0003\u0003\u0002$\u0005e!\u0001\u0003#bi\u0006$\u0016\u0010]3\t\u000f\u0005\u001d\u0002\u0001\"\u0015\u0002*\u0005qq.\u001e;qkR$\u0015\r^1UsB,WCAA\u000b\u0011\u001d\ti\u0003\u0001C!\u0003_\tAaY8qsR\u0019\u0001&!\r\t\u0011\u0005M\u00121\u0006a\u0001\u0003k\tQ!\u001a=ue\u0006\u00042aPA\u001c\u0013\r\tI\u0004\u0011\u0002\t!\u0006\u0014\u0018-\\'ba\"\u001a\u0001!!\u0010\u0011\t\u0005}\u0012QI\u0007\u0003\u0003\u0003R1!a\u0011\u0007\u0003)\tgN\\8uCRLwN\\\u0005\u0005\u0003\u000f\n\tE\u0001\u0007FqB,'/[7f]R\fGnB\u0004\u0002L\tA\t!!\u0014\u0002\u001dI+w-\u001a=U_.,g.\u001b>feB\u0019\u0011&a\u0014\u0007\r\u0005\u0011\u0001\u0012AA)'!\ty%a\u0015\u0002Z\u0005}\u0003c\u0001\u000b\u0002V%\u0019\u0011qK\u000b\u0003\r\u0005s\u0017PU3g!\u0011Y\u00131\f\u0015\n\u0007\u0005uCFA\u000bEK\u001a\fW\u000f\u001c;QCJ\fWn\u001d*fC\u0012\f'\r\\3\u0011\u0007Q\t\t'C\u0002\u0002dU\u0011AbU3sS\u0006d\u0017N_1cY\u0016DqANA(\t\u0003\t9\u0007\u0006\u0002\u0002N!A\u00111NA(\t\u0003\ni'\u0001\u0003m_\u0006$Gc\u0001\u0015\u0002p!9\u0011\u0011OA5\u0001\u0004\u0011\u0012\u0001\u00029bi\"Dc!!\u001b\u0002v\u0005m\u0004\u0003BA \u0003oJA!!\u001f\u0002B\t)1+\u001b8dK\u0006\u0012\u0011QP\u0001\u0006c92d\u0006\r\u0005\u000b\u0003\u0003\u000by%!A\u0005\n\u0005\r\u0015a\u0003:fC\u0012\u0014Vm]8mm\u0016$\"!!\"\u0011\t\u0005\u001d\u0015\u0011S\u0007\u0003\u0003\u0013SA!a#\u0002\u000e\u0006!A.\u00198h\u0015\t\ty)\u0001\u0003kCZ\f\u0017\u0002BAJ\u0003\u0013\u0013aa\u00142kK\u000e$\bFBA(\u0003k\nY\b\u000b\u0004\u0002J\u0005U\u00141\u0010")
public class RegexTokenizer
extends UnaryTransformer<String, Seq<String>, RegexTokenizer>
implements DefaultParamsWritable {
    private final String uid;
    private final IntParam minTokenLength;
    private final BooleanParam gaps;
    private final Param<String> pattern;
    private final BooleanParam toLowercase;

    public static MLReader<RegexTokenizer> read() {
        return RegexTokenizer$.MODULE$.read();
    }

    public static RegexTokenizer load(String string) {
        return RegexTokenizer$.MODULE$.load(string);
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable$class.write(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable$class.save(this, path);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public IntParam minTokenLength() {
        return this.minTokenLength;
    }

    public RegexTokenizer setMinTokenLength(int value) {
        return (RegexTokenizer)this.set(this.minTokenLength(), BoxesRunTime.boxToInteger((int)value));
    }

    public int getMinTokenLength() {
        return BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
    }

    public BooleanParam gaps() {
        return this.gaps;
    }

    public RegexTokenizer setGaps(boolean value) {
        return (RegexTokenizer)this.set(this.gaps(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getGaps() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps()));
    }

    public Param<String> pattern() {
        return this.pattern;
    }

    public RegexTokenizer setPattern(String value) {
        return (RegexTokenizer)this.set(this.pattern(), value);
    }

    public String getPattern() {
        return this.$(this.pattern());
    }

    public final BooleanParam toLowercase() {
        return this.toLowercase;
    }

    public RegexTokenizer setToLowercase(boolean value) {
        return (RegexTokenizer)this.set(this.toLowercase(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getToLowercase() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.toLowercase()));
    }

    @Override
    public Function1<String, Seq<String>> createTransformFunc() {
        return new Serializable(this){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ RegexTokenizer $outer;

            public final Seq<String> apply(String originStr) {
                Regex re = new StringOps(Predef$.MODULE$.augmentString(this.$outer.$(this.$outer.pattern()))).r();
                String str = BoxesRunTime.unboxToBoolean((Object)this.$outer.$(this.$outer.toLowercase())) ? originStr.toLowerCase() : originStr;
                Seq tokens = BoxesRunTime.unboxToBoolean((Object)this.$outer.$(this.$outer.gaps())) ? Predef$.MODULE$.refArrayOps((Object[])re.split((CharSequence)str)).toSeq() : re.findAllIn((CharSequence)str).toSeq();
                int minLength = BoxesRunTime.unboxToInt((Object)this.$outer.$(this.$outer.minTokenLength()));
                return (Seq)tokens.filter((Function1)new Serializable(this, minLength){
                    public static final long serialVersionUID = 0L;
                    private final int minLength$1;

                    public final boolean apply(String x$2) {
                        return x$2.length() >= this.minLength$1;
                    }
                    {
                        this.minLength$1 = minLength$1;
                    }
                });
            }
            {
                if ($outer == null) {
                    throw new NullPointerException();
                }
                this.$outer = $outer;
            }
        };
    }

    @Override
    public void validateInputType(DataType inputType) {
        DataType dataType = inputType;
        StringType$ stringType$ = StringType$.MODULE$;
        Predef$.MODULE$.require(!(dataType != null ? !dataType.equals(stringType$) : stringType$ != null), (Function0)new Serializable(this, inputType){
            public static final long serialVersionUID = 0L;
            private final DataType inputType$2;

            public final String apply() {
                return new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Input type must be string type but got ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.inputType$2}));
            }
            {
                this.inputType$2 = inputType$2;
            }
        });
    }

    @Override
    public DataType outputDataType() {
        return new ArrayType((DataType)StringType$.MODULE$, true);
    }

    @Override
    public RegexTokenizer copy(ParamMap extra) {
        return (RegexTokenizer)this.defaultCopy(extra);
    }

    public RegexTokenizer(String uid) {
        this.uid = uid;
        MLWritable$class.$init$(this);
        DefaultParamsWritable$class.$init$(this);
        this.minTokenLength = new IntParam((Identifiable)this, "minTokenLength", "minimum token length (>= 0)", (Function1<Object, Object>)ParamValidators$.MODULE$.gtEq(0.0));
        this.gaps = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens");
        this.pattern = new Param(this, "pattern", "regex pattern used for tokenizing");
        this.toLowercase = new BooleanParam(this, "toLowercase", "whether to convert all characters to lowercase before tokenizing.");
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.minTokenLength().$minus$greater(BoxesRunTime.boxToInteger((int)1)), this.gaps().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true)), this.pattern().$minus$greater("\\s+"), this.toLowercase().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true))}));
    }

    public RegexTokenizer() {
        this(Identifiable$.MODULE$.randomUID("regexTok"));
    }
}

