package com.hankcs.hanlp.mining.phrase;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.occurrence.Occurrence;
import com.hankcs.hanlp.corpus.occurrence.PairFrequency;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.dictionary.stopword.Filter;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.NotionalTokenizer;
import java.util.LinkedList;
import java.util.List;

/* loaded from: classes.dex */
public class MutualInformationEntropyPhraseExtractor implements IPhraseExtractor {

    /* renamed from: com.hankcs.hanlp.mining.phrase.MutualInformationEntropyPhraseExtractor$O8〇oO8〇88, reason: invalid class name */
    /* loaded from: classes.dex */
    public class O8oO888 implements Filter {
        public O8oO888() {
        }

        @Override // com.hankcs.hanlp.dictionary.stopword.Filter
        public boolean shouldInclude(Term term) {
            Nature nature = term.nature;
            return (nature == Nature.t || nature == Nature.nx) ? false : true;
        }
    }

    public static List<String> extract(String str, int i) {
        return new MutualInformationEntropyPhraseExtractor().extractPhrase(str, i);
    }

    @Override // com.hankcs.hanlp.mining.phrase.IPhraseExtractor
    public List<String> extractPhrase(String str, int i) {
        LinkedList linkedList = new LinkedList();
        Occurrence occurrence = new Occurrence();
        for (List<Term> list : NotionalTokenizer.seg2sentence(str, CoreStopWordDictionary.FILTER, new O8oO888())) {
            if (HanLP.Config.DEBUG) {
                System.out.println(list);
            }
            occurrence.addAll(list);
        }
        occurrence.compute();
        if (HanLP.Config.DEBUG) {
            System.out.println(occurrence);
            for (PairFrequency pairFrequency : occurrence.getPhraseByMi()) {
                System.out.print(pairFrequency.getKey().replace((char) 0, (char) 8594) + "\tmi=" + pairFrequency.mi + " , ");
            }
            System.out.println();
            for (PairFrequency pairFrequency2 : occurrence.getPhraseByLe()) {
                System.out.print(pairFrequency2.getKey().replace((char) 0, (char) 8594) + "\tle=" + pairFrequency2.le + " , ");
            }
            System.out.println();
            for (PairFrequency pairFrequency3 : occurrence.getPhraseByRe()) {
                System.out.print(pairFrequency3.getKey().replace((char) 0, (char) 8594) + "\tre=" + pairFrequency3.re + " , ");
            }
            System.out.println();
            for (PairFrequency pairFrequency4 : occurrence.getPhraseByScore()) {
                System.out.print(pairFrequency4.getKey().replace((char) 0, (char) 8594) + "\tscore=" + pairFrequency4.score + " , ");
            }
            System.out.println();
        }
        for (PairFrequency pairFrequency5 : occurrence.getPhraseByScore()) {
            if (linkedList.size() == i) {
                break;
            }
            linkedList.add(pairFrequency5.first + pairFrequency5.second);
        }
        return linkedList;
    }
}
