-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_names.py
65 lines (50 loc) · 1.4 KB
/
get_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import urllib2
import MySQLdb
from bs4 import BeautifulSoup as bs
def getSource(num):
url = "http://www.name-list.net/facebook/" num
page = urllib2.urlopen(url)
f=open("check","a")
f.write(str(num) "\n")
f.close()
return page.read()
def storeContent(name):
db = MySQLdb.connect('127.0.0.1','root','root','google_plus')
cursor = db.cursor()
name = filter(lambda x:ord(x)<128,name)
name = name.strip()
try:
sql = "INSERT INTO names(`name`) values ('%s')"%(name)
cursor.execute(sql)
db.commit()
except Exception as x:
pass
#print x
names = name.split()
for nam in names:
try:
sql = "INSERT INTO names_single(`name`) values ('%s')"%(nam)
cursor.execute(sql)
db.commit()
except Exception as x:
pass
#print x
db.close()
def parsePage(soup):
table = soup.find('table',{'class':'list'})
trs = table.findAll('tr')
for tr in trs:
tds = tr.findAll('td')
for td in tds:
storeContent(td.text)
def startCrawling(num):
page = bs(getSource(str(num)))
parsePage(page)
def main():
import threading
for i in range(44955,95001):
print i
while threading.active_count()>400:
continue
threading.Thread(target=startCrawling,args=(i,)).start()
main()