def evaluate_sentence(self, n, tokens, display=True):
"""
Get the probability for this sentence
Args:
n (int): max ngram order to use
tokens (list(str)): Sentence
"""
grams = zip(*[tokens[i:] for i in range(n)])
p = 1
for gram in grams:
if n == 1 and gram[0] == "<s>":
continue
if gram in self.ngrams[n]:
p *= self.ngrams[n][gram]
if display:
self._pretty_print(gram, p)
else:
s = 1
keys = set()
keys_h = set()
for k, v in self.ngrams[n].items():
if k[0] == gram[0]:
s -= v
keys_h.add(k[1])
else:
keys.add(k[1])
keys = keys - keys_h
gram = list(gram[1:])
pbo = 0
for k in keys:
pbo += self.evaluate_sentence(n-1, [k], display=False)
l = s / pbo
p *= l * self.evaluate_sentence(n-1, gram, display=True)
return p
def _pretty_print(self, gram, p):
if len(gram) == 1:
print('P_bo({}) = {}'.format(gram[0], p))
else:
print('P_bo({} | {}) = {}'.format(gram[-1], ' '.join(gram[:len(gram)-1]), p))