/*
 * Decompiled with CFR 0.152.
 */
package org.jpmml.evaluator;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dmg.pmml.PMMLObject;
import org.jpmml.evaluator.RegExUtil;
import org.jpmml.evaluator.TextTokenizer;
import org.jpmml.evaluator.TokenizedString;
import org.jpmml.model.TermUtil;

public class TextSplitter
extends TextTokenizer {
    public TextSplitter(String wordSeparatorCharacterRE, PMMLObject context) {
        this(RegExUtil.compile(wordSeparatorCharacterRE, context));
    }

    public TextSplitter(Pattern pattern) {
        super(pattern);
    }

    @Override
    public TokenizedString tokenize(String string) {
        Pattern pattern = this.getPattern();
        if ("".equals(string)) {
            return TokenizedString.EMPTY;
        }
        Matcher matcher = pattern.matcher(string);
        if (!matcher.find()) {
            String token = TermUtil.trimPunctuation((String)string);
            if (!token.isEmpty()) {
                return new TokenizedString(token);
            }
            return TokenizedString.EMPTY;
        }
        ArrayList<String> tokens = new ArrayList<String>(Math.max(string.length() / 4, 16));
        int index = 0;
        do {
            int start = matcher.start();
            int end = matcher.end();
            String token = TermUtil.trimPunctuation((String)string.substring(index, start));
            if (!token.isEmpty()) {
                tokens.add(token);
            }
            index = end;
        } while (matcher.find());
        String token = TermUtil.trimPunctuation((String)string.substring(index));
        if (!token.isEmpty()) {
            tokens.add(token);
        }
        return new TokenizedString(tokens);
    }
}

