/*
 * Decompiled with CFR 0.152.
 */
package org.apdplat.word.analysis;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apdplat.word.analysis.Similarity;
import org.apdplat.word.analysis.SimilarityRanker;
import org.apdplat.word.recognition.StopWord;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.apdplat.word.segmentation.Word;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class TextSimilarity
implements Similarity,
SimilarityRanker {
    protected static final Logger LOGGER = LoggerFactory.getLogger(TextSimilarity.class);
    private Segmentation segmentation = null;
    protected boolean filterStopWord = false;

    public void setSegmentationAlgorithm(SegmentationAlgorithm segmentationAlgorithm) {
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
        LOGGER.info("\u8bbe\u7f6e\u5206\u8bcd\u7b97\u6cd5\u4e3a\uff1a" + segmentationAlgorithm.getDes());
    }

    @Override
    public double similarScore(String string, String string2) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("\u6587\u672c1\uff1a");
            LOGGER.debug("\t" + string);
            LOGGER.debug("\u6587\u672c2\uff1a");
            LOGGER.debug("\t" + string2);
        }
        if (string == null || string2 == null) {
            return 0.0;
        }
        List<Word> list = this.seg(string);
        List<Word> list2 = this.seg(string2);
        return this.similarScore(list, list2);
    }

    @Override
    public double similarScore(List<Word> list, List<Word> list2) {
        if (list == null || list2 == null) {
            return 0.0;
        }
        if (list.isEmpty() && list2.isEmpty()) {
            return 1.0;
        }
        if (list.isEmpty() || list2.isEmpty()) {
            return 0.0;
        }
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("\u8bcd\u5217\u88681\uff1a");
            LOGGER.debug("\t" + list);
            LOGGER.debug("\u8bcd\u5217\u88682\uff1a");
            LOGGER.debug("\t" + list2);
        }
        double d = this.scoreImpl(list, list2);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("\u5206\u503c\uff1a" + d);
        }
        d = (double)((int)(d * 1000000.0 + 0.5)) / 1000000.0;
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("\u53d6\u516d\u4f4d\u5c0f\u6570\uff0c\u56db\u820d\u4e94\u5165\uff0c\u5206\u503c\uff1a" + d);
        }
        return d;
    }

    protected abstract double scoreImpl(List<Word> var1, List<Word> var2);

    private List<Word> seg(String string) {
        if (string == null) {
            return Collections.emptyList();
        }
        if (this.segmentation == null) {
            this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
        }
        List<Word> list = this.segmentation.seg(string);
        if (this.filterStopWord) {
            StopWord.filterStopWords(list);
        }
        return list;
    }

    protected void taggingWeightWithWordFrequency(List<Word> list, List<Word> list2) {
        if (list.get(0).getWeight() != null || list2.get(0).getWeight() != null) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("\u8bcd\u5df2\u7ecf\u88ab\u6307\u5b9a\u6743\u91cd\uff0c\u4e0d\u518d\u4f7f\u7528\u8bcd\u9891\u8fdb\u884c\u6807\u6ce8");
            }
            return;
        }
        Map<String, AtomicInteger> map = this.frequency(list);
        Map<String, AtomicInteger> map2 = this.frequency(list2);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("\u8bcd\u9891\u7edf\u8ba11\uff1a\n{}", (Object)this.formatWordsFrequency(map));
            LOGGER.debug("\u8bcd\u9891\u7edf\u8ba12\uff1a\n{}", (Object)this.formatWordsFrequency(map2));
        }
        list.parallelStream().forEach(word -> word.setWeight(Float.valueOf(((AtomicInteger)map.get(word.getText())).floatValue())));
        list2.parallelStream().forEach(word -> word.setWeight(Float.valueOf(((AtomicInteger)map2.get(word.getText())).floatValue())));
    }

    protected Map<String, Float> toFastSearchMap(List<Word> list) {
        ConcurrentHashMap<String, Float> concurrentHashMap = new ConcurrentHashMap<String, Float>();
        if (list == null) {
            return concurrentHashMap;
        }
        list.parallelStream().forEach(word -> {
            if (word.getWeight() != null) {
                concurrentHashMap.put(word.getText(), word.getWeight());
            } else {
                LOGGER.error("\u8bcd\u6ca1\u6709\u6743\u91cd\u4fe1\u606f\uff1a" + word.getText());
            }
        });
        return concurrentHashMap;
    }

    private Map<String, AtomicInteger> frequency(List<Word> list) {
        HashMap<String, AtomicInteger> hashMap = new HashMap<String, AtomicInteger>();
        list.forEach(word -> hashMap.computeIfAbsent(word.getText(), string -> new AtomicInteger()).incrementAndGet());
        return hashMap;
    }

    private String formatWordsFrequency(Map<String, AtomicInteger> map) {
        StringBuilder stringBuilder = new StringBuilder();
        if (map != null && !map.isEmpty()) {
            AtomicInteger atomicInteger = new AtomicInteger();
            map.entrySet().stream().sorted((entry, entry2) -> ((AtomicInteger)entry2.getValue()).get() - ((AtomicInteger)entry.getValue()).get()).forEach(entry -> stringBuilder.append("\t").append(atomicInteger.incrementAndGet()).append("\u3001").append((String)entry.getKey()).append("=").append(entry.getValue()).append("\n"));
        }
        stringBuilder.setLength(stringBuilder.length() - 1);
        return stringBuilder.toString();
    }
}

