/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.LabeledWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.ArabicUnknownWordSignatures;
import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Numberer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ArabicUnknownWordModel
extends BaseUnknownWordModel {
    private static final long serialVersionUID = 4825624957364628771L;
    private static final int MIN_UNKNOWN = 6;
    private static final int MAX_UNKNOWN = 10;
    String UNKNOWN_WORD = "UNK";
    String BOUNDARY_TAG = ".$$.";
    protected boolean smartMutation = false;
    protected transient int lastSignatureIndex = -1;
    protected transient int lastSentencePosition = -1;
    protected transient int lastWordToSignaturize = -1;
    private static final boolean DOCUMENT_UNKNOWNS = false;
    protected int unknownSuffixSize = 0;
    protected int unknownPrefixSize = 0;
    private transient Numberer tagNumberer;
    private transient Numberer wordNumberer;

    public ArabicUnknownWordModel(Options.LexOptions op, Lexicon lex) {
        super(op, lex);
        this.unknownLevel = op.useUnknownWordSignatures;
        if (this.unknownLevel < 6 || this.unknownLevel > 10) {
            if (this.unknownLevel < 6) {
                this.unknownLevel = 6;
            } else if (this.unknownLevel > 10) {
                this.unknownLevel = 10;
            }
            System.err.println("Invalid value for useUnknownWordSignatures");
        }
        this.smartMutation = op.smartMutation;
        this.unknownSuffixSize = op.unknownSuffixSize;
        this.unknownPrefixSize = op.unknownPrefixSize;
    }

    @Override
    public void train(Collection<Tree> trees) {
        this.train(trees, 1.0, false);
    }

    public void train(Collection<Tree> trees, boolean keepTagsAsLabels) {
        this.train(trees, 1.0, keepTagsAsLabels);
    }

    public void train(Collection<Tree> trees, double weight) {
        this.train(trees, weight, false);
    }

    public void train(Collection<Tree> trees, double weight, boolean keepTagsAsLabels) {
        ClassicCounter<IntTaggedWord> seenCounter = new ClassicCounter<IntTaggedWord>();
        int tNum = 0;
        int tSize = trees.size();
        int indexToStartUnkCounting = (int)((double)tSize * Train.fractionBeforeUnseenCounting);
        Numberer wNumberer = this.wordNumberer();
        Numberer tNumberer = this.tagNumberer();
        for (Tree tree : trees) {
            ++tNum;
            List<IntTaggedWord> taggedWords = this.treeToEvents(tree, keepTagsAsLabels);
            int sz = taggedWords.size();
            for (int w = 0; w < sz; ++w) {
                IntTaggedWord iTW = taggedWords.get(w);
                IntTaggedWord iT = new IntTaggedWord(-1, iTW.tag);
                IntTaggedWord iW = new IntTaggedWord(iTW.word, -1);
                seenCounter.incrementCount(iW, weight);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                if (tNum <= indexToStartUnkCounting || !(seenCounter.getCount(iW) < 2.0)) continue;
                int s = this.getSignatureIndex(iTW.word, w);
                IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
                IntTaggedWord iS = new IntTaggedWord(s, -1);
                this.unSeenCounter.incrementCount(iTS, weight);
                this.unSeenCounter.incrementCount(iT, weight);
                this.unSeenCounter.incrementCount(iS, weight);
                this.unSeenCounter.incrementCount(i, weight);
            }
        }
        if (this.unSeenCounter.isEmpty()) {
            int numTags = this.tagNumberer().total();
            for (int tt = 0; tt < numTags; ++tt) {
                if (this.BOUNDARY_TAG.equals(this.tagNumberer().object(tt))) continue;
                IntTaggedWord iT = new IntTaggedWord(-1, tt);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                this.unSeenCounter.incrementCount(iT, weight);
                this.unSeenCounter.incrementCount(i, weight);
            }
        }
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree, boolean keepTagsAsLabels) {
        if (!keepTagsAsLabels) {
            return this.treeToEvents(tree);
        }
        List<LabeledWord> labeledWords = tree.labeledYield();
        return this.listOfLabeledWordsToEvents(labeledWords);
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree) {
        Sentence<TaggedWord> taggedWords = tree.taggedYield();
        return this.listToEvents(taggedWords);
    }

    protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (TaggedWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    protected List<IntTaggedWord> listOfLabeledWordsToEvents(List<LabeledWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (LabeledWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    @Override
    public float score(IntTaggedWord iTW, int loc, double c_Tseen, double total, double smooth) {
        int word = iTW.word;
        short tag = iTW.tag;
        iTW.word = this.getSignatureIndex(iTW.word, loc);
        double c_TS = this.unSeenCounter.getCount(iTW);
        iTW.tag = (short)-1;
        double c_S = this.unSeenCounter.getCount(iTW);
        iTW.word = -1;
        double c_U = this.unSeenCounter.getCount(iTW);
        iTW.tag = tag;
        double c_T = this.unSeenCounter.getCount(iTW);
        iTW.word = word;
        double p_T_U = c_T / c_U;
        if (this.unknownLevel == 0) {
            c_TS = 0.0;
            c_S = 0.0;
        }
        double pb_T_S = (c_TS + smooth * p_T_U) / (c_S + smooth);
        double p_T = c_Tseen / total;
        double p_W = 1.0 / total;
        double pb_W_T = Math.log(pb_T_S * p_W / p_T);
        return (float)pb_W_T;
    }

    private Numberer tagNumberer() {
        if (this.tagNumberer == null) {
            this.tagNumberer = Numberer.getGlobalNumberer("tags");
        }
        return this.tagNumberer;
    }

    private Numberer wordNumberer() {
        if (this.wordNumberer == null) {
            this.wordNumberer = Numberer.getGlobalNumberer("words");
        }
        return this.wordNumberer;
    }

    @Override
    public int getSignatureIndex(int wordIndex, int sentencePosition) {
        int sig;
        if (wordIndex == this.lastWordToSignaturize && sentencePosition == this.lastSentencePosition) {
            return this.lastSignatureIndex;
        }
        String uwSig = this.getSignature((String)this.wordNumberer().object(wordIndex), sentencePosition);
        this.lastSignatureIndex = sig = this.wordNumberer().number(uwSig);
        this.lastSentencePosition = sentencePosition;
        this.lastWordToSignaturize = wordIndex;
        return sig;
    }

    @Override
    public String getSignature(String word, int loc) {
        StringBuilder sb = new StringBuilder("UNK");
        switch (this.unknownLevel) {
            case 10: {
                boolean allDigitPlus = ArabicUnknownWordSignatures.allDigitPlus(word);
                int leng = word.length();
                if (allDigitPlus) {
                    sb.append("-NUM");
                } else if (word.startsWith("Al") || word.startsWith("\u0627\u0644")) {
                    sb.append("-Al");
                } else if (this.unknownPrefixSize > 0) {
                    int min = leng < this.unknownPrefixSize ? leng : this.unknownPrefixSize;
                    sb.append('-').append(word.substring(0, min));
                }
                if (word.length() == 1) {
                    sb.append(Character.getType(word.charAt(0)));
                }
                sb.append(ArabicUnknownWordSignatures.likelyAdjectivalSuffix(word));
                sb.append(ArabicUnknownWordSignatures.pastTenseVerbNumberSuffix(word));
                sb.append(ArabicUnknownWordSignatures.presentTenseVerbNumberSuffix(word));
                String ans = ArabicUnknownWordSignatures.abstractionNounSuffix(word);
                if (!"".equals(ans)) {
                    sb.append(ans);
                } else {
                    sb.append(ArabicUnknownWordSignatures.taaMarbuuTaSuffix(word));
                }
                if (this.unknownSuffixSize <= 0 || allDigitPlus) break;
                int min = leng < this.unknownSuffixSize ? leng : this.unknownSuffixSize;
                sb.append('-').append(word.substring(word.length() - min));
                break;
            }
            case 9: {
                boolean allDigitPlus = ArabicUnknownWordSignatures.allDigitPlus(word);
                int leng = word.length();
                if (allDigitPlus) {
                    sb.append("-NUM");
                } else if (word.startsWith("Al") || word.startsWith("\u0627\u0644")) {
                    sb.append("-Al");
                } else if (this.unknownPrefixSize > 0) {
                    int min = leng < this.unknownPrefixSize ? leng : this.unknownPrefixSize;
                    sb.append('-').append(word.substring(0, min));
                }
                sb.append(ArabicUnknownWordSignatures.likelyAdjectivalSuffix(word));
                sb.append(ArabicUnknownWordSignatures.pastTenseVerbNumberSuffix(word));
                sb.append(ArabicUnknownWordSignatures.presentTenseVerbNumberSuffix(word));
                String ans = ArabicUnknownWordSignatures.abstractionNounSuffix(word);
                if (!"".equals(ans)) {
                    sb.append(ans);
                } else {
                    sb.append(ArabicUnknownWordSignatures.taaMarbuuTaSuffix(word));
                }
                if (this.unknownSuffixSize <= 0 || allDigitPlus) break;
                int min = leng < this.unknownSuffixSize ? leng : this.unknownSuffixSize;
                sb.append('-').append(word.substring(word.length() - min));
                break;
            }
            case 8: {
                boolean allDigitPlus;
                if (word.startsWith("Al")) {
                    sb.append("-Al");
                }
                if (allDigitPlus = ArabicUnknownWordSignatures.allDigitPlus(word)) {
                    sb.append("-NUM");
                } else {
                    sb.append('-').append(word.charAt(0));
                }
                sb.append(ArabicUnknownWordSignatures.likelyAdjectivalSuffix(word));
                sb.append(ArabicUnknownWordSignatures.pastTenseVerbNumberSuffix(word));
                sb.append(ArabicUnknownWordSignatures.presentTenseVerbNumberSuffix(word));
                sb.append(ArabicUnknownWordSignatures.taaMarbuuTaSuffix(word));
                sb.append(ArabicUnknownWordSignatures.abstractionNounSuffix(word));
                break;
            }
            case 7: {
                boolean allDigitPlus = ArabicUnknownWordSignatures.allDigitPlus(word);
                if (allDigitPlus) {
                    sb.append("-NUM");
                    break;
                }
                sb.append(word.charAt(word.length() - 1));
                break;
            }
            case 6: {
                boolean allDigitPlus;
                if (word.startsWith("Al")) {
                    sb.append("-Al");
                }
                if (allDigitPlus = ArabicUnknownWordSignatures.allDigitPlus(word)) {
                    sb.append("-NUM");
                    break;
                }
                sb.append(word.charAt(word.length() - 1));
                break;
            }
        }
        return sb.toString();
    }

    @Override
    public void setUnknownLevel(int unknownLevel) {
        this.unknownLevel = unknownLevel;
    }

    @Override
    public int getUnknownLevel() {
        return this.unknownLevel;
    }
}

