If you want more of my pastes visit:  https://randompaste.000webhostapp.com/index.html
--------------------------------------------------------------------------------------
view my last post at:  https://bitbin.it/SGh8LiEJ/
--------------------------------------------------------------------------------------

import random
import requests, json
import _pickle as pickle
import googlemaps
from datetime import datetime

gmaps = googlemaps.Client(key='AIzaSyCZs17oOJSf7i6GyL19gV5sjV8TNL9RguQ')
vec = []
detected_locations = {}
NERs = {}
recognized = {}
outputFormat = 'json'


def apply_developed_rules(x, ln):
	i = 0
	try:
		while i < len(x["sentences"][0]["tokens"]):
			if x["sentences"][0]["tokens"][i]["ner"] == "LOCATION" and x["sentences"][0]["tokens"][i-1]["ner"] != "LOCATION":
				j = i + 1
				loc = x["sentences"][0]["tokens"][i]["word"]

				while x["sentences"][0]["tokens"][j]["ner"] == "LOCATION" or x["sentences"][0]["tokens"][j]["pos"] == "NNP":
					loc += " " + x["sentences"][0]["tokens"][j]["word"]
					j += 1
				i = j
				try:
					NERs[ln].append(loc)
				except:
					NERs[ln] = []
					NERs[ln].append(loc)
				#print(ln, loc)
				try:
					detected_locations[loc] += 1
				except KeyError:
					detected_locations[loc] = 1
				#out_file.write(loc+"\n");
			else:
				i += 1
	except KeyError:
		pass

def apply_collection(x,ln):
	i = 0
	try:
		while i < len(x["sentences"][0]["tokens"]):
			if x["sentences"][0]["tokens"][i]["ner"] == "LOCATION":
				loc = x["sentences"][0]["tokens"][i]["word"]
				#print (loc)
				try:
					NERs[ln].append(loc)
				except:
					NERs[ln] = []
					NERs[ln].append(loc)

				try:
					detected_locations[loc] += 1
				except KeyError:
					detected_locations[loc] = 1
				#out_file.write(loc+"\n");
			i += 1
	except KeyError:
		pass


def main():
	to_annotate = "final_files/hand_annotated_dataset.txt"
	hand_annotated = "final_files/hand_annotated_dataset.tsv"
	use_rules = True

	with open("final_files/entity_database.dat", "rb") as myFile:
	    recognized = pickle.load(myFile)

	with open("final_files/testset.txt", encoding="utf8") as in_file:
	    for l in in_file:
	    	vec.append(l)

	with open(to_annotate, 'r', encoding="utf8") as x_file:
		for line in x_file:
			ln = line
			total = ""
			try:
				total = recognized[ln]
				#print("\n\n\n")
			except:
				pass

			try:
				x = json.loads(total)
				if use_rules:
					apply_developed_rules(x,ln)
				else:
					apply_collection(x,ln)
			except:
				pass
	

	#print(detected_locations)
	#for i in NERs:
	#	print(i)
		#print((i.replace("\n","") + "\t" + str(NERs[i]).replace("[","").replace("]","").replace(', ',"\t")).split('\t'))
	num_correct = 0
	with open(hand_annotated, 'r', encoding="utf8") as x_file:
		for l in x_file:
			v = l.replace('\n','').split('\t')
			a = []
			for i in range(1, len(v)):
				a.append(v[i].replace('\'',''))
			try:
				print(NERs[v[0]+'\n'], a, NERs[v[0]+'\n'] == a)
				if NERs[v[0]+'\n'] == a:
					num_correct += 1
			except:
				pass
	print (len(NERs), num_correct, num_correct/len(NERs)*100)


if __name__ == '__main__':
	main()