If you want more of my pastes visit: https://randompaste.000webhostapp.com/index.html -------------------------------------------------------------------------------------- view my last post at: https://bitbin.it/SGh8LiEJ/ -------------------------------------------------------------------------------------- import random import requests, json import _pickle as pickle import googlemaps from datetime import datetime gmaps = googlemaps.Client(key='AIzaSyCZs17oOJSf7i6GyL19gV5sjV8TNL9RguQ') vec = [] detected_locations = {} NERs = {} recognized = {} outputFormat = 'json' def apply_developed_rules(x, ln): i = 0 try: while i < len(x["sentences"][0]["tokens"]): if x["sentences"][0]["tokens"][i]["ner"] == "LOCATION" and x["sentences"][0]["tokens"][i-1]["ner"] != "LOCATION": j = i + 1 loc = x["sentences"][0]["tokens"][i]["word"] while x["sentences"][0]["tokens"][j]["ner"] == "LOCATION" or x["sentences"][0]["tokens"][j]["pos"] == "NNP": loc += " " + x["sentences"][0]["tokens"][j]["word"] j += 1 i = j try: NERs[ln].append(loc) except: NERs[ln] = [] NERs[ln].append(loc) #print(ln, loc) try: detected_locations[loc] += 1 except KeyError: detected_locations[loc] = 1 #out_file.write(loc+"\n"); else: i += 1 except KeyError: pass def apply_collection(x,ln): i = 0 try: while i < len(x["sentences"][0]["tokens"]): if x["sentences"][0]["tokens"][i]["ner"] == "LOCATION": loc = x["sentences"][0]["tokens"][i]["word"] #print (loc) try: NERs[ln].append(loc) except: NERs[ln] = [] NERs[ln].append(loc) try: detected_locations[loc] += 1 except KeyError: detected_locations[loc] = 1 #out_file.write(loc+"\n"); i += 1 except KeyError: pass def main(): to_annotate = "final_files/hand_annotated_dataset.txt" hand_annotated = "final_files/hand_annotated_dataset.tsv" use_rules = True with open("final_files/entity_database.dat", "rb") as myFile: recognized = pickle.load(myFile) with open("final_files/testset.txt", encoding="utf8") as in_file: for l in in_file: vec.append(l) with open(to_annotate, 'r', encoding="utf8") as x_file: for line in x_file: ln = line total = "" try: total = recognized[ln] #print("\n\n\n") except: pass try: x = json.loads(total) if use_rules: apply_developed_rules(x,ln) else: apply_collection(x,ln) except: pass #print(detected_locations) #for i in NERs: # print(i) #print((i.replace("\n","") + "\t" + str(NERs[i]).replace("[","").replace("]","").replace(', ',"\t")).split('\t')) num_correct = 0 with open(hand_annotated, 'r', encoding="utf8") as x_file: for l in x_file: v = l.replace('\n','').split('\t') a = [] for i in range(1, len(v)): a.append(v[i].replace('\'','')) try: print(NERs[v[0]+'\n'], a, NERs[v[0]+'\n'] == a) if NERs[v[0]+'\n'] == a: num_correct += 1 except: pass print (len(NERs), num_correct, num_correct/len(NERs)*100) if __name__ == '__main__': main()