If you want more of my pastes visit: https://randompaste.000webhostapp.com/index.html
--------------------------------------------------------------------------------------
view my last post at: https://bitbin.it/SGh8LiEJ/
--------------------------------------------------------------------------------------
import random
import requests, json
import _pickle as pickle
import googlemaps
from datetime import datetime
gmaps = googlemaps.Client(key='AIzaSyCZs17oOJSf7i6GyL19gV5sjV8TNL9RguQ')
vec = []
detected_locations = {}
NERs = {}
recognized = {}
outputFormat = 'json'
def apply_developed_rules(x, ln):
i = 0
try:
while i < len(x["sentences"][0]["tokens"]):
if x["sentences"][0]["tokens"][i]["ner"] == "LOCATION" and x["sentences"][0]["tokens"][i-1]["ner"] != "LOCATION":
j = i + 1
loc = x["sentences"][0]["tokens"][i]["word"]
while x["sentences"][0]["tokens"][j]["ner"] == "LOCATION" or x["sentences"][0]["tokens"][j]["pos"] == "NNP":
loc += " " + x["sentences"][0]["tokens"][j]["word"]
j += 1
i = j
try:
NERs[ln].append(loc)
except:
NERs[ln] = []
NERs[ln].append(loc)
#print(ln, loc)
try:
detected_locations[loc] += 1
except KeyError:
detected_locations[loc] = 1
#out_file.write(loc+"\n");
else:
i += 1
except KeyError:
pass
def apply_collection(x,ln):
i = 0
try:
while i < len(x["sentences"][0]["tokens"]):
if x["sentences"][0]["tokens"][i]["ner"] == "LOCATION":
loc = x["sentences"][0]["tokens"][i]["word"]
#print (loc)
try:
NERs[ln].append(loc)
except:
NERs[ln] = []
NERs[ln].append(loc)
try:
detected_locations[loc] += 1
except KeyError:
detected_locations[loc] = 1
#out_file.write(loc+"\n");
i += 1
except KeyError:
pass
def main():
to_annotate = "final_files/hand_annotated_dataset.txt"
hand_annotated = "final_files/hand_annotated_dataset.tsv"
use_rules = True
with open("final_files/entity_database.dat", "rb") as myFile:
recognized = pickle.load(myFile)
with open("final_files/testset.txt", encoding="utf8") as in_file:
for l in in_file:
vec.append(l)
with open(to_annotate, 'r', encoding="utf8") as x_file:
for line in x_file:
ln = line
total = ""
try:
total = recognized[ln]
#print("\n\n\n")
except:
pass
try:
x = json.loads(total)
if use_rules:
apply_developed_rules(x,ln)
else:
apply_collection(x,ln)
except:
pass
#print(detected_locations)
#for i in NERs:
# print(i)
#print((i.replace("\n","") + "\t" + str(NERs[i]).replace("[","").replace("]","").replace(', ',"\t")).split('\t'))
num_correct = 0
with open(hand_annotated, 'r', encoding="utf8") as x_file:
for l in x_file:
v = l.replace('\n','').split('\t')
a = []
for i in range(1, len(v)):
a.append(v[i].replace('\'',''))
try:
print(NERs[v[0]+'\n'], a, NERs[v[0]+'\n'] == a)
if NERs[v[0]+'\n'] == a:
num_correct += 1
except:
pass
print (len(NERs), num_correct, num_correct/len(NERs)*100)
if __name__ == '__main__':
main()