from eventlet.green import urllib2 import eventlet import re def getProxies(url): src = urllib2.urlopen(url).read().split("\n") css = False classes = {} ips = [] proxies = [] for i in xrange(len(src)): line = src[i] # Start of style if "" in line: css = False if "display:none" in line: classes[line[1:5]] = "none" if "display:inline" in line: classes[line[1:5]] = "inline" # IP line if len(classes) > 0 and css == False: ip = line linePort = src[i + 2] lineCountry = src[i + 4] lineResponseTime = src[i + 7] lineConnectionTime = src[i + 11] lineType = src[i + 16] lineAnonymity = src[i + 17] # Replace class declarations with style ones for class_ in classes: ip = ip.replace("class=\"%s\"" % (class_), "style=\"display:%s\"" % (classes[class_])) # Remove all unecessary poop :) ip = re.sub(r"<(div|span) style=\"display:none\">[\.0-9]+", r"", ip) ip = re.sub(r"class=\"[0-9]+\"", r"", ip) ip = re.sub(r"[^0123456789\.]", r"", ip) # Port port = linePort.replace("", "") # Country country = lineCountry.split("/> ")[1].split("<")[0] # Response Time Percents responseTime = lineResponseTime.split(":")[1].split("%")[0] # Connection Time Percents connectionTime = lineConnectionTime.split(":")[1].split("%")[0] # Connection Type type = lineType.split(">")[1].split("<")[0] # Anonymity anonymity = lineAnonymity.split(">")[1].split("<")[0] proxies.append({"ip":ip, "port":port, "country":country, "responseTime":responseTime, "connectionTime":connectionTime, "type":type, "anonymity":anonymity}) classes = {} return url, proxies proxies = [] pool = eventlet.GreenPool(12) for url, proxyList in pool.imap(getProxies, ["https://hidemyass.com/proxy-list/" + str(i + 1) for i in xrange(12)]): for proxy in proxyList: proxies.append(proxy) for proxy in proxies: print "%s:%s" % (proxy["ip"], proxy["port"]) print "\nGot %s proxies!" % (len(proxies))