package org.omegat.core.statistics;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.omegat.core.Core;
import org.omegat.core.data.EntryKey;
import org.omegat.core.data.ExternalTMX;
import org.omegat.core.data.IProject;
import org.omegat.core.data.PrepareTMXEntry;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.data.TMXEntry;
import org.omegat.core.events.IStopped;
import org.omegat.core.matching.FuzzyMatcher;
import org.omegat.core.matching.ISimilarityCalculator;
import org.omegat.core.matching.LevenshteinDistance;
import org.omegat.core.matching.NearString;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.util.Language;
import org.omegat.util.OStrings;
import org.omegat.util.PatternConsts;
import org.omegat.util.TMXProp;
import org.omegat.util.Token;

/* loaded from: input_file:org/omegat/core/statistics/FindMatches.class */
public class FindMatches {
    static final int PENALTY_FOR_FUZZY = 40;
    private static final int PENALTY_FOR_REMOVED = 5;
    private static final int SUBSEGMENT_MATCH_THRESHOLD = 85;
    private static final Pattern SEARCH_FOR_PENALTY = Pattern.compile("penalty-(\\d+)");
    private static final String ORPHANED_FILE_NAME = OStrings.getString("CT_ORPHAN_STRINGS");
    private final IProject project;
    private final ITokenizer tok;
    private final Locale srcLocale;
    private final int maxCount;
    private List<NearString> result;
    private final boolean searchExactlyTheSame;
    private String srcText;
    private String removedText;
    private Token[] strTokensStem;
    private Token[] strTokensNoStem;
    private Token[] strTokensAll;
    private FindMatches separateSegmentMatcher;
    private final ISimilarityCalculator distance = new LevenshteinDistance();
    private final Pattern removePattern = PatternConsts.getRemovePattern();
    Map<String, Token[]> tokenizeStemCache = new HashMap();
    Map<String, Token[]> tokenizeNoStemCache = new HashMap();
    Map<String, Token[]> tokenizeAllCache = new HashMap();

    /* loaded from: input_file:org/omegat/core/statistics/FindMatches$StoppedException.class */
    public static class StoppedException extends RuntimeException {
    }

    public FindMatches(IProject iProject, int i, boolean z, boolean z2) {
        this.project = iProject;
        this.tok = iProject.getSourceTokenizer();
        this.srcLocale = iProject.getProjectProperties().getSourceLanguage().getLocale();
        this.maxCount = i;
        this.searchExactlyTheSame = z2;
        if (!z || iProject.getProjectProperties().isSentenceSegmentingEnabled()) {
            return;
        }
        this.separateSegmentMatcher = new FindMatches(iProject, 1, false, true);
    }

    public List<NearString> search(final String str, final boolean z, boolean z2, final IStopped iStopped) throws StoppedException {
        this.result = new ArrayList(6);
        this.srcText = str;
        this.removedText = "";
        if (this.removePattern != null) {
            StringBuilder sb = new StringBuilder();
            Matcher matcher = this.removePattern.matcher(this.srcText);
            while (matcher.find()) {
                sb.append(matcher.group());
            }
            this.srcText = matcher.replaceAll("");
            this.removedText = sb.toString();
        }
        this.strTokensStem = tokenizeStem(this.srcText);
        this.strTokensNoStem = tokenizeNoStem(this.srcText);
        this.strTokensAll = tokenizeAll(this.srcText);
        if (this.project.getProjectProperties().isSupportDefaultTranslations()) {
            this.project.iterateByDefaultTranslations(new IProject.DefaultTranslationsIterator() { // from class: org.omegat.core.statistics.FindMatches.1
                @Override // org.omegat.core.data.IProject.DefaultTranslationsIterator
                public void iterate(String str2, TMXEntry tMXEntry) {
                    FindMatches.this.checkStopped(iStopped);
                    if (FindMatches.this.searchExactlyTheSame || !str2.equals(str)) {
                        if (z && tMXEntry.translation == null) {
                            return;
                        }
                        FindMatches.this.processEntry(null, str2, tMXEntry.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, FindMatches.this.project.isOrphaned(str2) ? FindMatches.ORPHANED_FILE_NAME : null, tMXEntry.creator, tMXEntry.creationDate, tMXEntry.changer, tMXEntry.changeDate, null);
                    }
                }
            });
        }
        this.project.iterateByMultipleTranslations(new IProject.MultipleTranslationsIterator() { // from class: org.omegat.core.statistics.FindMatches.2
            @Override // org.omegat.core.data.IProject.MultipleTranslationsIterator
            public void iterate(EntryKey entryKey, TMXEntry tMXEntry) {
                FindMatches.this.checkStopped(iStopped);
                if (FindMatches.this.searchExactlyTheSame || !entryKey.sourceText.equals(str)) {
                    if (z && tMXEntry.translation == null) {
                        return;
                    }
                    FindMatches.this.processEntry(entryKey, entryKey.sourceText, tMXEntry.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, FindMatches.this.project.isOrphaned(entryKey) ? FindMatches.ORPHANED_FILE_NAME : null, tMXEntry.creator, tMXEntry.creationDate, tMXEntry.changer, tMXEntry.changeDate, null);
                }
            }
        });
        for (Map.Entry<String, ExternalTMX> entry : this.project.getTransMemories().entrySet()) {
            Matcher matcher2 = SEARCH_FOR_PENALTY.matcher(entry.getKey());
            int parseInt = matcher2.find() ? Integer.parseInt(matcher2.group(1)) : 0;
            for (PrepareTMXEntry prepareTMXEntry : entry.getValue().getEntries()) {
                checkStopped(iStopped);
                if (prepareTMXEntry.source != null && (!z || prepareTMXEntry.translation != null)) {
                    processEntry(null, prepareTMXEntry.source, prepareTMXEntry.translation, NearString.MATCH_SOURCE.TM, false, parseInt, entry.getKey(), prepareTMXEntry.creator, prepareTMXEntry.creationDate, prepareTMXEntry.changer, prepareTMXEntry.changeDate, prepareTMXEntry.otherProperties);
                }
            }
        }
        for (SourceTextEntry sourceTextEntry : this.project.getAllEntries()) {
            checkStopped(iStopped);
            if (sourceTextEntry.getSourceTranslation() != null) {
                processEntry(sourceTextEntry.getKey(), sourceTextEntry.getSrcText(), sourceTextEntry.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, sourceTextEntry.isSourceTranslationFuzzy(), 0, sourceTextEntry.getKey().file, "", 0L, "", 0L, null);
            }
        }
        if (this.separateSegmentMatcher != null) {
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            Language sourceLanguage = this.project.getProjectProperties().getSourceLanguage();
            Language targetLanguage = this.project.getProjectProperties().getTargetLanguage();
            List<String> segment = Core.getSegmenter().segment(sourceLanguage, this.srcText, arrayList, arrayList2);
            if (segment.size() > 1) {
                ArrayList arrayList3 = new ArrayList(segment.size());
                ArrayList arrayList4 = new ArrayList(segment.size());
                short s = 0;
                while (true) {
                    short s2 = s;
                    if (s2 >= segment.size()) {
                        break;
                    }
                    List<NearString> search = this.separateSegmentMatcher.search(segment.get(s2), z, false, iStopped);
                    if (search.isEmpty() || search.get(0).scores[0].score < SUBSEGMENT_MATCH_THRESHOLD) {
                        arrayList3.add("");
                        arrayList4.add("");
                    } else {
                        arrayList3.add(search.get(0).source);
                        arrayList4.add(search.get(0).translation);
                    }
                    s = (short) (s2 + 1);
                }
                processEntry(null, Core.getSegmenter().glue(sourceLanguage, sourceLanguage, arrayList3, arrayList, arrayList2), Core.getSegmenter().glue(sourceLanguage, targetLanguage, arrayList4, arrayList, arrayList2), NearString.MATCH_SOURCE.TM, false, 0, "", "", 0L, "", 0L, null);
            }
        }
        if (z2) {
            for (NearString nearString : this.result) {
                nearString.attr = FuzzyMatcher.buildSimilarityData(this.strTokensAll, tokenizeAll(nearString.source));
            }
        }
        return this.result;
    }

    protected void processEntry(EntryKey entryKey, String str, String str2, NearString.MATCH_SOURCE match_source, boolean z, int i, String str3, String str4, long j, String str5, long j2, List<TMXProp> list) {
        String str6 = str;
        int i2 = 0;
        if (this.removePattern != null) {
            StringBuilder sb = new StringBuilder();
            Matcher matcher = this.removePattern.matcher(str6);
            while (matcher.find()) {
                sb.append(matcher.group());
            }
            str6 = matcher.replaceAll("");
            if (!sb.toString().equals(this.removedText)) {
                i2 = 5;
            }
        }
        int calcSimilarity = FuzzyMatcher.calcSimilarity(this.distance, this.strTokensStem, tokenizeStem(str6)) - i;
        if (z) {
            calcSimilarity -= 40;
        }
        int i3 = calcSimilarity - i2;
        if (haveChanceToAdd(i3, Integer.MAX_VALUE, Integer.MAX_VALUE)) {
            int calcSimilarity2 = FuzzyMatcher.calcSimilarity(this.distance, this.strTokensNoStem, tokenizeNoStem(str6)) - i;
            if (z) {
                calcSimilarity2 -= 40;
            }
            int i4 = calcSimilarity2 - i2;
            if (haveChanceToAdd(i3, i4, Integer.MAX_VALUE)) {
                int calcSimilarity3 = FuzzyMatcher.calcSimilarity(this.distance, this.strTokensAll, tokenizeAll(str6)) - i;
                if (z) {
                    calcSimilarity3 -= 40;
                }
                int i5 = calcSimilarity3 - i2;
                if (haveChanceToAdd(i3, i4, i5)) {
                    addNearString(entryKey, str, str2, match_source, z, i3, i4, i5, null, str3, str4, j, str5, j2, list);
                }
            }
        }
    }

    protected boolean haveChanceToAdd(int i, int i2, int i3) {
        if (i < 30 && i2 < 30) {
            return false;
        }
        if (this.result.size() < this.maxCount) {
            return true;
        }
        NearString nearString = this.result.get(this.result.size() - 1);
        int compare = Integer.compare(nearString.scores[0].score, i);
        if (compare == 0) {
            compare = Integer.compare(nearString.scores[0].scoreNoStem, i2);
        }
        if (compare == 0) {
            compare = Integer.compare(nearString.scores[0].adjustedScore, i3);
        }
        return compare != 1;
    }

    protected void addNearString(EntryKey entryKey, String str, String str2, NearString.MATCH_SOURCE match_source, boolean z, int i, int i2, int i3, byte[] bArr, String str3, String str4, long j, String str5, long j2, List<TMXProp> list) {
        int i4 = 0;
        for (int i5 = 0; i5 < this.result.size(); i5++) {
            NearString nearString = this.result.get(i5);
            if (str.equals(nearString.source) && Objects.equals(str2, nearString.translation)) {
                this.result.set(i5, NearString.merge(nearString, entryKey, str, str2, match_source, z, i, i2, i3, bArr, str3, str4, j, str5, j2, list));
                return;
            }
            if (nearString.scores[0].score < i || (nearString.scores[0].score == i && (nearString.scores[0].scoreNoStem < i2 || (nearString.scores[0].scoreNoStem == i2 && (nearString.scores[0].adjustedScore < i3 || (i == 100 && !nearString.source.equals(this.srcText) && str.equals(this.srcText))))))) {
                break;
            }
            i4 = i5 + 1;
        }
        this.result.add(i4, new NearString(entryKey, str, str2, match_source, z, i, i2, i3, bArr, str3, str4, j, str5, j2, list));
        if (this.result.size() > this.maxCount) {
            this.result.remove(this.result.size() - 1);
        }
    }

    public Token[] tokenizeStem(String str) {
        Token[] tokenArr = this.tokenizeStemCache.get(str);
        if (tokenArr == null) {
            tokenArr = this.tok.tokenizeWords(str, ITokenizer.StemmingMode.MATCHING);
            this.tokenizeStemCache.put(str, tokenArr);
        }
        return tokenArr;
    }

    public Token[] tokenizeNoStem(String str) {
        String lowerCase = str.toLowerCase(this.srcLocale);
        Token[] tokenArr = this.tokenizeNoStemCache.get(lowerCase);
        if (tokenArr == null) {
            tokenArr = this.tok.tokenizeWords(lowerCase, ITokenizer.StemmingMode.NONE);
            this.tokenizeNoStemCache.put(lowerCase, tokenArr);
        }
        return tokenArr;
    }

    public Token[] tokenizeAll(String str) {
        String lowerCase = str.toLowerCase(this.srcLocale);
        Token[] tokenArr = this.tokenizeAllCache.get(lowerCase);
        if (tokenArr == null) {
            tokenArr = this.tok.tokenizeVerbatim(lowerCase);
            this.tokenizeAllCache.put(lowerCase, tokenArr);
        }
        return tokenArr;
    }

    protected void checkStopped(IStopped iStopped) throws StoppedException {
        if (iStopped.isStopped()) {
            throw new StoppedException();
        }
    }
}
