/*
 * Decompiled with CFR 0.152.
 */
package hex;

import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.parser.BufferedString;

public class RegexTokenizer
extends MRTask<RegexTokenizer> {
    private final String _regex;
    private final boolean _toLowercase;
    private final int _minLength;

    public RegexTokenizer(String regex) {
        this(regex, false, 0);
    }

    private RegexTokenizer(String regex, boolean toLowercase, int minLength) {
        this._regex = regex;
        this._toLowercase = toLowercase;
        this._minLength = minLength;
    }

    @Override
    public void map(Chunk[] cs, NewChunk nc) {
        BufferedString tmpStr = new BufferedString();
        for (int row = 0; row < cs[0]._len; ++row) {
            for (Chunk chk : cs) {
                String[] ss;
                if (chk.isNA(row)) continue;
                String str = chk.atStr(tmpStr, row).toString();
                if (this._toLowercase) {
                    str = str.toLowerCase();
                }
                for (String s2 : ss = str.split(this._regex)) {
                    if (s2.length() < this._minLength) continue;
                    nc.addStr(s2);
                }
            }
            nc.addNA();
        }
    }

    public Frame transform(Frame input) {
        return ((RegexTokenizer)this.doAll((byte)2, input)).outputFrame();
    }

    public static class Builder {
        private String _regex;
        private boolean _toLowercase;
        private int _minLength;

        public Builder setRegex(String regex) {
            this._regex = regex;
            return this;
        }

        public Builder setToLowercase(boolean toLowercase) {
            this._toLowercase = toLowercase;
            return this;
        }

        public Builder setMinLength(int minLength) {
            this._minLength = minLength;
            return this;
        }

        public RegexTokenizer create() {
            return new RegexTokenizer(this._regex, this._toLowercase, this._minLength);
        }
    }
}

