def evaluate_sentence(self, n, tokens, display=True): """ Get the probability for this sentence Args: n (int): max ngram order to use tokens (list(str)): Sentence """ grams = zip(*[tokens[i:] for i in range(n)]) p = 1 for gram in grams: if n == 1 and gram[0] == "": continue if gram in self.ngrams[n]: p *= self.ngrams[n][gram] if display: self._pretty_print(gram, p) else: s = 1 keys = set() keys_h = set() for k, v in self.ngrams[n].items(): if k[0] == gram[0]: s -= v keys_h.add(k[1]) else: keys.add(k[1]) keys = keys - keys_h gram = list(gram[1:]) pbo = 0 for k in keys: pbo += self.evaluate_sentence(n-1, [k], display=False) l = s / pbo p *= l * self.evaluate_sentence(n-1, gram, display=True) return p def _pretty_print(self, gram, p): if len(gram) == 1: print('P_bo({}) = {}'.format(gram[0], p)) else: print('P_bo({} | {}) = {}'.format(gram[-1], ' '.join(gram[:len(gram)-1]), p))