/*
 * Decompiled with CFR 0.152.
 */
package org.apache.flink.ml.feature.regextokenizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.flink.ml.api.Stage;
import org.apache.flink.ml.api.Transformer;
import org.apache.flink.ml.feature.regextokenizer.RegexTokenizerParams;
import org.apache.flink.ml.param.Param;
import org.apache.flink.ml.param.WithParams;
import org.apache.flink.ml.util.ParamUtils;
import org.apache.flink.ml.util.ReadWriteUtils;
import org.apache.flink.table.api.Expressions;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.util.Preconditions;

public class RegexTokenizer
implements Transformer<RegexTokenizer>,
RegexTokenizerParams<RegexTokenizer> {
    private final Map<Param<?>, Object> paramMap = new HashMap();

    public RegexTokenizer() {
        ParamUtils.initializeMapWithDefaultValues(this.paramMap, (WithParams)this);
    }

    public Table[] transform(Table ... inputs) {
        Preconditions.checkArgument((inputs.length == 1 ? 1 : 0) != 0);
        Expression tokenizerUdf = (Expression)Expressions.call(RegexTokenizerUdf.class, (Object[])new Object[]{Expressions.$((String)this.getInputCol()), this.getPattern(), this.getGaps(), this.getToLowercase(), this.getMinTokenLength()}).as(this.getOutputCol(), new String[0]);
        Table output = inputs[0].addColumns(new Expression[]{tokenizerUdf});
        return new Table[]{output};
    }

    public void save(String path) throws IOException {
        ReadWriteUtils.saveMetadata((Stage)this, (String)path);
    }

    public Map<Param<?>, Object> getParamMap() {
        return this.paramMap;
    }

    public static RegexTokenizer load(StreamTableEnvironment tEnv, String path) throws IOException {
        return (RegexTokenizer)ReadWriteUtils.loadStageParam((String)path);
    }

    public static class RegexTokenizerUdf
    extends ScalarFunction {
        public String[] eval(String input, String pattern, Boolean gaps, boolean toLowercase, int minTokenLength) {
            Pattern regPattern = Pattern.compile(pattern);
            input = toLowercase ? input.toLowerCase() : input;
            ArrayList<String> tokens = new ArrayList<String>();
            if (gaps.booleanValue()) {
                String[] tokenArray;
                for (String token : tokenArray = regPattern.split(input)) {
                    if (token.length() < minTokenLength) continue;
                    tokens.add(token);
                }
            } else {
                Matcher matcher = regPattern.matcher(input);
                while (matcher.find()) {
                    String token = matcher.group();
                    if (token.length() < minTokenLength) continue;
                    tokens.add(token);
                }
            }
            return tokens.toArray(new String[0]);
        }
    }
}

