Untitled


SUBMITTED BY: sitven7097

DATE: Jan. 22, 2018, 5:53 p.m.

FORMAT: Text only

SIZE: 1.5 kB

HITS: 522

  1. def evaluate_sentence(self, n, tokens, display=True):
  2. """
  3. Get the probability for this sentence
  4. Args:
  5. n (int): max ngram order to use
  6. tokens (list(str)): Sentence
  7. """
  8. grams = zip(*[tokens[i:] for i in range(n)])
  9. p = 1
  10. for gram in grams:
  11. if n == 1 and gram[0] == "<s>":
  12. continue
  13. if gram in self.ngrams[n]:
  14. p *= self.ngrams[n][gram]
  15. if display:
  16. self._pretty_print(gram, p)
  17. else:
  18. s = 1
  19. keys = set()
  20. keys_h = set()
  21. for k, v in self.ngrams[n].items():
  22. if k[0] == gram[0]:
  23. s -= v
  24. keys_h.add(k[1])
  25. else:
  26. keys.add(k[1])
  27. keys = keys - keys_h
  28. gram = list(gram[1:])
  29. pbo = 0
  30. for k in keys:
  31. pbo += self.evaluate_sentence(n-1, [k], display=False)
  32. l = s / pbo
  33. p *= l * self.evaluate_sentence(n-1, gram, display=True)
  34. return p
  35. def _pretty_print(self, gram, p):
  36. if len(gram) == 1:
  37. print('P_bo({}) = {}'.format(gram[0], p))
  38. else:
  39. print('P_bo({} | {}) = {}'.format(gram[-1], ' '.join(gram[:len(gram)-1]), p))

comments powered by Disqus