/*
 * Decompiled with CFR 0.152.
 */
package org.apdplat.word;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apdplat.word.WordSegmenter;
import org.apdplat.word.recognition.StopWord;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class WordFrequencyStatistics {
    private static final Logger LOGGER = LoggerFactory.getLogger(WordSegmenter.class);
    private String resultPath = "WordFrequencyStatistics-Result.txt";
    private Segmentation segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
    private Map<String, AtomicInteger> statisticsMap = new ConcurrentHashMap<String, AtomicInteger>();
    private boolean removeStopWord = false;

    public WordFrequencyStatistics() {
    }

    public WordFrequencyStatistics(String string) {
        this.resultPath = string;
    }

    public WordFrequencyStatistics(String string, SegmentationAlgorithm segmentationAlgorithm) {
        this.resultPath = string;
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
    }

    public WordFrequencyStatistics(String string, String string2) {
        this.resultPath = string;
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.valueOf(string2));
    }

    public void setRemoveStopWord(boolean bl) {
        this.removeStopWord = bl;
    }

    public boolean isRemoveStopWord() {
        return this.removeStopWord;
    }

    public String getResultPath() {
        return this.resultPath;
    }

    public void setResultPath(String string) {
        this.resultPath = string;
    }

    public SegmentationAlgorithm getSegmentationAlgorithm() {
        return this.segmentation.getSegmentationAlgorithm();
    }

    public void setSegmentationAlgorithm(SegmentationAlgorithm segmentationAlgorithm) {
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
    }

    public void seg(String string) {
        this.segmentation.seg(string).parallelStream().forEach(word -> {
            if (this.isRemoveStopWord() && StopWord.is(word.getText())) {
                return;
            }
            this.statistics((Word)word, 1, this.statisticsMap);
        });
    }

    public void seg(File file, File file2) throws Exception {
        Utils.seg(file, file2, this.isRemoveStopWord(), this.segmentation.getSegmentationAlgorithm(), word -> this.statistics(word, 1, this.statisticsMap));
    }

    private void statistics(String string, int n, Map<String, AtomicInteger> map) {
        map.putIfAbsent(string, new AtomicInteger());
        map.get(string).addAndGet(n);
    }

    private void statistics(Word word, int n, Map<String, AtomicInteger> map) {
        this.statistics(word.getText(), n, map);
    }

    public void dump(String string) {
        this.resultPath = string;
        this.dump();
    }

    public void dump() {
        this.dump(this.statisticsMap, this.resultPath);
    }

    private void dump(Map<String, AtomicInteger> map, String string2) {
        try {
            List<String> list = map.entrySet().parallelStream().sorted((entry, entry2) -> new Integer(((AtomicInteger)entry2.getValue()).get()).compareTo(((AtomicInteger)entry.getValue()).intValue())).map(entry -> (String)entry.getKey() + " " + ((AtomicInteger)entry.getValue()).get()).collect(Collectors.toList());
            Files.write(Paths.get(string2, new String[0]), list, new OpenOption[0]);
            if (list.size() < 100) {
                LOGGER.info("\u8bcd\u9891\u7edf\u8ba1\u7ed3\u679c\uff1a");
                AtomicInteger atomicInteger = new AtomicInteger();
                list.forEach(string -> LOGGER.info("\t" + atomicInteger.incrementAndGet() + "\u3001" + string));
            }
            LOGGER.info("\u8bcd\u9891\u7edf\u8ba1\u7ed3\u679c\u6210\u529f\u4fdd\u5b58\u5230\u6587\u4ef6\uff1a" + string2);
        }
        catch (Exception exception) {
            LOGGER.error("dump error!", (Throwable)exception);
        }
    }

    public void merge(String string2, String ... stringArray) {
        try {
            ConcurrentHashMap<String, AtomicInteger> concurrentHashMap = new ConcurrentHashMap<String, AtomicInteger>();
            for (String string3 : stringArray) {
                Files.lines(Paths.get(string3, new String[0])).forEach(string -> {
                    String[] stringArray = string.split("\\s+");
                    if (stringArray != null && stringArray.length == 2) {
                        this.statistics(stringArray[0], Integer.parseInt(stringArray[1]), (Map<String, AtomicInteger>)concurrentHashMap);
                    }
                });
            }
            this.dump(concurrentHashMap, string2);
        }
        catch (Exception exception) {
            LOGGER.error("merge error!", (Throwable)exception);
        }
    }

    public void reset() {
        this.statisticsMap.clear();
    }

    public static void main(String[] stringArray) throws Exception {
        if (stringArray.length > 0) {
            WordFrequencyStatistics wordFrequencyStatistics = new WordFrequencyStatistics();
            HashSet<String> hashSet = new HashSet<String>();
            for (String string : stringArray) {
                if (string.equals("-removeStopWord")) {
                    wordFrequencyStatistics.setRemoveStopWord(true);
                }
                if (string.startsWith("-textFile=")) {
                    hashSet.add(string.replace("-textFile=", ""));
                }
                if (string.startsWith("-statisticsResultFile=")) {
                    wordFrequencyStatistics.setResultPath(string.replace("-statisticsResultFile=", ""));
                }
                if (!string.startsWith("-segmentationAlgorithm=")) continue;
                wordFrequencyStatistics.setSegmentationAlgorithm(SegmentationAlgorithm.valueOf(string.replace("-segmentationAlgorithm=", "")));
            }
            for (String string : hashSet) {
                wordFrequencyStatistics.seg(new File(string), new File(string + ".seg.txt"));
            }
            wordFrequencyStatistics.dump();
            return;
        }
        WordFrequencyStatistics wordFrequencyStatistics = new WordFrequencyStatistics();
        wordFrequencyStatistics.setRemoveStopWord(false);
        wordFrequencyStatistics.setResultPath("word-frequency-statistics.txt");
        wordFrequencyStatistics.setSegmentationAlgorithm(SegmentationAlgorithm.MaxNgramScore);
        wordFrequencyStatistics.seg("\u660e\u5929\u4e0b\u96e8\uff0c\u7ed3\u5408\u6210\u5206\u5b50\uff0c\u660e\u5929\u6709\u5173\u4e8e\u5206\u5b50\u548c\u539f\u5b50\u7684\u8bfe\u7a0b\uff0c\u4e0b\u96e8\u4e86\u4e5f\u8981\u53bb\u542c\u8bfe");
        wordFrequencyStatistics.dump();
        Files.write(Paths.get("text-to-seg.txt", new String[0]), Arrays.asList("word\u5206\u8bcd\u662f\u4e00\u4e2aJava\u5b9e\u73b0\u7684\u5206\u5e03\u5f0f\u4e2d\u6587\u5206\u8bcd\u7ec4\u4ef6\uff0c\u63d0\u4f9b\u4e86\u591a\u79cd\u57fa\u4e8e\u8bcd\u5178\u7684\u5206\u8bcd\u7b97\u6cd5\uff0c\u5e76\u5229\u7528ngram\u6a21\u578b\u6765\u6d88\u9664\u6b67\u4e49\u3002"), new OpenOption[0]);
        wordFrequencyStatistics.reset();
        wordFrequencyStatistics.seg(new File("text-to-seg.txt"), new File("text-seg-result.txt"));
        wordFrequencyStatistics.dump("file-seg-statistics-result.txt");
    }
}

