package org.omegat.tokenizer;

import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.HMMChineseTokenizer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.omegat.util.Token;

@Tokenizer(languages = {"zh"}, isDefault = true)
/* loaded from: input_file:org/omegat/tokenizer/LuceneSmartChineseTokenizer.class */
public class LuceneSmartChineseTokenizer extends BaseTokenizer {
    @Override // org.omegat.tokenizer.BaseTokenizer, org.omegat.tokenizer.ITokenizer
    public Token[] tokenizeVerbatim(String str) {
        return tokenizeByCodePoint(str);
    }

    @Override // org.omegat.tokenizer.BaseTokenizer, org.omegat.tokenizer.ITokenizer
    public String[] tokenizeVerbatimToStrings(String str) {
        return tokenizeByCodePointToStrings(str);
    }

    @Override // org.omegat.tokenizer.BaseTokenizer
    protected TokenStream getTokenStream(String str, boolean z, boolean z2) throws IOException {
        if (z) {
            return new SmartChineseAnalyzer(z2).tokenStream("", new StringReader(str));
        }
        HMMChineseTokenizer hMMChineseTokenizer = new HMMChineseTokenizer();
        hMMChineseTokenizer.setReader(new StringReader(str));
        return hMMChineseTokenizer;
    }
}
