package org.omegat.tokenizer;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Collections;
import java.util.regex.Matcher;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
import org.apache.lucene.analysis.ja.JapaneseTokenizer;
import org.apache.lucene.analysis.ja.dict.UserDictionary;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.omegat.util.PatternConsts;

@Tokenizer(languages = {"ja"}, isDefault = true)
/* loaded from: input_file:org/omegat/tokenizer/LuceneJapaneseTokenizer.class */
public class LuceneJapaneseTokenizer extends BaseTokenizer {

    /* loaded from: input_file:org/omegat/tokenizer/LuceneJapaneseTokenizer$TagJoiningFilter.class */
    private static class TagJoiningFilter extends TokenFilter {
        private static final int BUFFER_INITIAL_SIZE = 5;
        private final CharTermAttribute termAtt;
        private final OffsetAttribute offsetAtt;
        private StringBuilder buffer;
        private int startOffset;
        private boolean buffering;
        private final ArrayDeque<CachedToken> inputStack;
        private final ArrayDeque<CachedToken> outputStack;
        private final ArrayDeque<CachedToken> recoveryStack;

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:org/omegat/tokenizer/LuceneJapaneseTokenizer$TagJoiningFilter$CachedToken.class */
        public static class CachedToken {
            public final char[] chars;
            public final int startOffset;

            CachedToken(char[] cArr, int i) {
                this.chars = cArr;
                this.startOffset = i;
            }
        }

        protected TagJoiningFilter(TokenStream tokenStream) {
            super(tokenStream);
            this.termAtt = addAttribute(CharTermAttribute.class);
            this.offsetAtt = addAttribute(OffsetAttribute.class);
            this.buffer = new StringBuilder(5);
            this.startOffset = -1;
            this.buffering = false;
            this.inputStack = new ArrayDeque<>();
            this.outputStack = new ArrayDeque<>();
            this.recoveryStack = new ArrayDeque<>();
        }

        public boolean incrementToken() throws IOException {
            if (!this.outputStack.isEmpty()) {
                replayToken(this.outputStack.poll());
                return true;
            }
            while (getNextInput()) {
                char[] buffer = this.termAtt.buffer();
                int length = this.termAtt.length();
                if (this.buffering) {
                    if (finishBuffering(buffer, length) || cancelBuffering(buffer, length)) {
                        return true;
                    }
                    cacheRecoveryToken(buffer, length);
                    this.buffer.append(buffer, 0, length);
                } else if (!startBuffering(buffer, length)) {
                    return true;
                }
            }
            return finishToken();
        }

        private boolean getNextInput() throws IOException {
            if (this.inputStack.isEmpty()) {
                return this.input.incrementToken();
            }
            replayToken(this.inputStack.poll());
            return true;
        }

        private boolean startBuffering(char[] cArr, int i) {
            for (int i2 = 0; i2 < i; i2++) {
                if (isTagOpen(cArr[i2])) {
                    if (i2 > 0) {
                        cacheInputToken(Arrays.copyOfRange(cArr, i2, i), this.offsetAtt.startOffset() + i2);
                        truncateToken(i2);
                        return false;
                    }
                    this.buffer.append(cArr, i2, i);
                    this.startOffset = this.offsetAtt.startOffset();
                    cacheRecoveryToken(cArr, i);
                    this.buffering = true;
                    return true;
                }
            }
            return false;
        }

        private void truncateToken(int i) {
            this.termAtt.setLength(i);
            this.offsetAtt.setOffset(this.offsetAtt.startOffset(), this.offsetAtt.startOffset() + i);
        }

        private boolean isTagOpen(char c) {
            return c == '<' || c == '{';
        }

        private boolean cancelBuffering(char[] cArr, int i) {
            for (int i2 = 0; i2 < i; i2++) {
                if (!isTagContent(cArr[i2])) {
                    cacheRecoveryToken(cArr, i);
                    this.outputStack.addAll(this.recoveryStack);
                    this.recoveryStack.clear();
                    replayToken(this.outputStack.poll());
                    clearBuffer();
                    return true;
                }
            }
            return false;
        }

        private boolean isTagContent(char c) {
            return c == '/' || Character.isLetterOrDigit(c);
        }

        private void replayToken(CachedToken cachedToken) {
            this.termAtt.copyBuffer(cachedToken.chars, 0, cachedToken.chars.length);
            this.termAtt.setLength(cachedToken.chars.length);
            this.offsetAtt.setOffset(cachedToken.startOffset, cachedToken.startOffset + cachedToken.chars.length);
        }

        private boolean finishBuffering(char[] cArr, int i) {
            for (int i2 = 0; i2 < i; i2++) {
                if (isTagClose(cArr[i2])) {
                    if (i2 < i - 1) {
                        cacheInputToken(Arrays.copyOfRange(cArr, i2 + 1, i), this.offsetAtt.startOffset() + i2 + 1);
                    }
                    this.buffer.append(cArr, 0, i2 + 1);
                    return finishToken();
                }
            }
            return false;
        }

        private boolean isTagClose(char c) {
            char charAt = this.buffer.charAt(0);
            return (charAt == '<' && c == '>') || (charAt == '{' && c == '}');
        }

        private boolean finishToken() {
            if (this.buffer.length() == 0) {
                return false;
            }
            String sb = this.buffer.toString();
            this.termAtt.copyBuffer(sb.toCharArray(), 0, sb.length());
            this.termAtt.setLength(sb.length());
            this.offsetAtt.setOffset(this.startOffset, this.startOffset + sb.length());
            clearBuffer();
            this.recoveryStack.clear();
            return true;
        }

        private void clearBuffer() {
            this.buffer = new StringBuilder(5);
            this.buffering = false;
        }

        private void cacheInputToken(char[] cArr, int i) {
            this.inputStack.add(new CachedToken(cArr, i));
        }

        private void cacheRecoveryToken(char[] cArr, int i) {
            this.recoveryStack.add(new CachedToken(Arrays.copyOf(cArr, i), this.offsetAtt.startOffset()));
        }
    }

    public LuceneJapaneseTokenizer() {
        this.shouldDelegateTokenizeExactly = false;
    }

    @Override // org.omegat.tokenizer.BaseTokenizer
    protected TokenStream getTokenStream(String str, boolean z, boolean z2) throws IOException {
        if (!z) {
            JapaneseTokenizer japaneseTokenizer = new JapaneseTokenizer((UserDictionary) null, false, JapaneseTokenizer.Mode.NORMAL);
            japaneseTokenizer.setReader(new StringReader(str));
            return new TagJoiningFilter(japaneseTokenizer);
        }
        String blankOutTags = blankOutTags(str);
        return new JapaneseAnalyzer((UserDictionary) null, JapaneseTokenizer.Mode.SEARCH, z2 ? JapaneseAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET, z2 ? JapaneseAnalyzer.getDefaultStopTags() : Collections.emptySet()).tokenStream("", new StringReader(blankOutTags));
    }

    private String blankOutTags(String str) {
        StringBuilder sb = new StringBuilder(str);
        Matcher matcher = PatternConsts.OMEGAT_TAG.matcher(str);
        while (matcher.find()) {
            int end = matcher.end();
            for (int start = matcher.start(); start < end; start++) {
                sb.setCharAt(start, ' ');
            }
        }
        return sb.toString();
    }
}
