/*
 * Decompiled with CFR 0.152.
 */
package org.jpmml.evaluator;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import org.jpmml.model.TermUtil;

public class TextTokenizer {
    private Pattern pattern = null;

    public TextTokenizer(Pattern pattern) {
        this.setPattern(pattern);
    }

    public List<String> tokenize(String string) {
        Pattern pattern = this.getPattern();
        if ("".equals(string)) {
            return Collections.emptyList();
        }
        String[] tokens = pattern.split(string, -1);
        int count = 0;
        int max = tokens.length;
        for (int i = 0; i < max; ++i) {
            String token = tokens[i];
            if (token.length() <= 0 || (token = TermUtil.trimPunctuation((String)token)).length() <= 0) continue;
            tokens[count] = token;
            ++count;
        }
        if (count < tokens.length) {
            String[] tmpTokens = new String[count];
            System.arraycopy(tokens, 0, tmpTokens, 0, count);
            tokens = tmpTokens;
        }
        return Arrays.asList(tokens);
    }

    public Pattern getPattern() {
        return this.pattern;
    }

    private void setPattern(Pattern pattern) {
        this.pattern = pattern;
    }
}

