Created
August 17, 2014 03:49
-
-
Save robinfang/98039fa0f68b99412dcb to your computer and use it in GitHub Desktop.
检查代理可用否
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from grab import Grab, GrabError | |
from multiprocessing.dummy import Pool as ThreadPool | |
import time | |
def load_proxy_list(filepath): | |
f = open(filepath) | |
proxy_list = f.read().splitlines() | |
f.close() | |
return proxy_list | |
def get_valid_proxy(proxy_list): #format of items e.g. '128.2.198.188:3124' | |
g = Grab() | |
for proxy in proxy_list: | |
g.setup(proxy=proxy, proxy_type='http', connect_timeout=5, timeout=5) | |
print("checking %s" % proxy) | |
try: | |
g.go('baidu.com') | |
except GrabError: | |
#logging.info("Test error") | |
pass | |
else: | |
yield proxy | |
def check_proxy(proxy): | |
g = Grab() | |
g.setup(proxy=proxy, proxy_type='http', connect_timeout=4, timeout=4) | |
try: | |
g.go('baidu.com') | |
except GrabError: | |
#logging.info("Test error") | |
pass | |
else: | |
return proxy | |
def save_list(valid_list): | |
nowtime = time.time() | |
filename = str(nowtime).split(".")[0] | |
f = open(filename ".txt", "w") | |
f.write("\n".join(valid_list)) | |
f.close() | |
if __name__ == "__main__": | |
proxy_list = load_proxy_list("proxylist.txt") | |
# valid_list = get_valid_proxy(proxy_list) | |
pool = ThreadPool(8) | |
valid_list = pool.map(check_proxy, proxy_list) | |
pool.close() | |
pool.join() | |
valid_list = [x for x in valid_list if x is not None] | |
save_list(valid_list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment