package org.omegat.core.segmentation;

import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.omegat.util.Language;
import org.omegat.util.PatternConsts;

/* loaded from: input_file:org/omegat/core/segmentation/Segmenter.class */
public final class Segmenter {
    private final SRX srx;
    private static final Pattern DEFAULT_BEFOREBREAK_PATTERN = Pattern.compile(".", 32);
    private static final Pattern LINE_BREAK_OR_TAB_PATTERN = Pattern.compile("^( *)[\\r\\n\\t]");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/omegat/core/segmentation/Segmenter$BreakPosition.class */
    public static class BreakPosition implements Comparable<BreakPosition> {
        int position;
        Rule reason;

        BreakPosition(int i, Rule rule) {
            this.position = i;
            this.reason = rule;
        }

        public boolean equals(Object obj) {
            return obj != null && (obj instanceof BreakPosition) && this.position == ((BreakPosition) obj).position;
        }

        public int hashCode() {
            return this.position;
        }

        @Override // java.lang.Comparable
        public int compareTo(BreakPosition breakPosition) {
            return this.position - breakPosition.position;
        }
    }

    public Segmenter(SRX srx) {
        this.srx = srx;
    }

    public SRX getSRX() {
        return this.srx;
    }

    public List<String> segment(Language language, String str, List<StringBuilder> list, List<Rule> list2) {
        if (str == null) {
            return null;
        }
        List<String> breakParagraph = breakParagraph(language, str, list2);
        ArrayList arrayList = new ArrayList(breakParagraph.size());
        if (list != null) {
            list.clear();
        }
        for (String str2 : breakParagraph) {
            int length = str2.length();
            int i = 0;
            StringBuilder sb = new StringBuilder();
            while (i < length) {
                int codePointAt = str2.codePointAt(i);
                if (!Character.isWhitespace(codePointAt)) {
                    break;
                }
                sb.appendCodePoint(codePointAt);
                i += Character.charCount(codePointAt);
            }
            int i2 = length;
            StringBuilder sb2 = new StringBuilder();
            while (i2 > i) {
                int codePointBefore = str2.codePointBefore(i2);
                if (!Character.isWhitespace(codePointBefore)) {
                    break;
                }
                sb2.appendCodePoint(codePointBefore);
                i2 -= Character.charCount(codePointBefore);
            }
            sb2.reverse();
            arrayList.add(str2.substring(i, i2));
            if (list != null) {
                list.add(sb);
                list.add(sb2);
            }
        }
        return arrayList;
    }

    private List<String> breakParagraph(Language language, String str, List<Rule> list) {
        List<Rule> lookupRulesForLanguage = this.srx.lookupRulesForLanguage(language);
        TreeSet treeSet = new TreeSet();
        TreeSet<BreakPosition> treeSet2 = new TreeSet();
        for (int size = lookupRulesForLanguage.size() - 1; size >= 0; size--) {
            Rule rule = lookupRulesForLanguage.get(size);
            List<BreakPosition> breaks = getBreaks(str, rule);
            if (rule.isBreakRule()) {
                treeSet2.addAll(breaks);
                treeSet.removeAll(breaks);
            } else {
                treeSet.addAll(breaks);
                treeSet2.removeAll(breaks);
            }
        }
        treeSet2.removeAll(treeSet);
        ArrayList arrayList = new ArrayList();
        if (list != null) {
            list.clear();
        }
        int i = 0;
        for (BreakPosition breakPosition : treeSet2) {
            arrayList.add(str.substring(i, breakPosition.position));
            if (list != null) {
                list.add(breakPosition.reason);
            }
            i = breakPosition.position;
        }
        try {
            String substring = str.substring(i);
            if (!substring.trim().isEmpty() || arrayList.isEmpty()) {
                arrayList.add(substring);
            } else {
                arrayList.set(arrayList.size() - 1, ((String) arrayList.get(arrayList.size() - 1)) + substring);
            }
        } catch (IndexOutOfBoundsException e) {
        }
        return arrayList;
    }

    private static List<BreakPosition> getBreaks(String str, Rule rule) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = null;
        if (rule.getBeforebreak() != null) {
            matcher = rule.getCompiledBeforebreak().matcher(str);
        }
        Matcher matcher2 = null;
        if (rule.getAfterbreak() != null) {
            matcher2 = rule.getCompiledAfterbreak().matcher(str);
        }
        if (matcher == null && matcher2 == null) {
            return arrayList;
        }
        if (matcher2 != null && !matcher2.find()) {
            return arrayList;
        }
        if (matcher == null) {
            matcher = DEFAULT_BEFOREBREAK_PATTERN.matcher(str);
        }
        while (matcher.find()) {
            int end = matcher.end();
            if (matcher2 == null) {
                arrayList.add(new BreakPosition(end, rule));
            } else {
                int start = matcher2.start();
                while (true) {
                    int i = start;
                    if (i < end) {
                        if (!matcher2.find()) {
                            return arrayList;
                        }
                        start = matcher2.start();
                    } else if (i == end) {
                        arrayList.add(new BreakPosition(end, rule));
                    }
                }
            }
        }
        return arrayList;
    }

    public String glue(Language language, Language language2, List<String> list, List<StringBuilder> list2, List<Rule> list3) {
        if (list.size() <= 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        sb.append(list.get(0));
        for (int i = 1; i < list.size(); i++) {
            StringBuilder sb2 = new StringBuilder();
            sb2.append((CharSequence) list2.get((2 * i) - 1));
            sb2.append((CharSequence) list2.get(2 * i));
            if (!language2.isSpaceDelimited()) {
                Rule rule = list3.get(i - 1);
                if (sb.length() > 0) {
                    char charAt = sb.charAt(sb.length() - 1);
                    Matcher matcher = LINE_BREAK_OR_TAB_PATTERN.matcher(sb2.toString());
                    if (matcher.find()) {
                        String group = matcher.group(1);
                        if (!group.isEmpty()) {
                            sb2.replace(0, group.length(), "");
                        }
                    } else if (charAt != '.' && (!PatternConsts.SPACY_REGEX.matcher(rule.getBeforebreak()).matches() || !PatternConsts.SPACY_REGEX.matcher(rule.getAfterbreak()).matches())) {
                        sb2.setLength(0);
                    }
                }
            } else if (!language.isSpaceDelimited() && sb2.length() == 0) {
                sb2.append(" ");
            }
            sb.append((CharSequence) sb2);
            sb.append(list.get(i));
        }
        return sb.toString();
    }

    public void segmentEntries(boolean z, Language language, String str, Language language2, String str2, List<String> list, List<String> list2) {
        if (z) {
            List<String> segment = segment(language, str, null, null);
            if (str2 != null) {
                List<String> segment2 = segment(language2, str2, null, null);
                if (segment.size() == segment2.size()) {
                    list.addAll(segment);
                    list2.addAll(segment2);
                    return;
                }
            }
        }
        list.add(str);
        list2.add(str2);
    }
}
