Skip to content

Commit

Permalink
start to reconstruction; fix bugs in weibo friends collections
Browse files Browse the repository at this point in the history
  • Loading branch information
qinxuye committed Aug 12, 2013
1 parent 146884a commit 781a445
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 58 deletions.
44 changes: 30 additions & 14 deletions cola/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 29,30 @@
except ImportError:
raise DependencyNotInstalledError('pyyaml')

class PropertyObject(dict):
def __getattr__(self, name):
if name not in self:
return
attr = self[name]
if isinstance(attr, dict):
return PropertyObject(attr)
elif isinstance(attr, list):
return [PropertyObject(itm) for itm in attr]
else:
return attr
class PropertyObject(dict):
def __init__(self, d):
super(PropertyObject, self).__init__()
self._update(d)

def _update(self, d):
for k, v in d.iteritems():
if not k.startswith('_'):
self[k] = v

if isinstance(v, dict):
setattr(self, k, PropertyObject(v))
elif isinstance(v, list):
setattr(self, k, [PropertyObject(itm) for itm in v])
else:
setattr(self, k, v)

def update(self, config=None, **kwargs):
self._update(kwargs)
if config is not None:
if isinstance(config, dict):
self._update(config)
else:
self._update(config.conf)

class Config(object):
def __init__(self, yaml_file):
Expand All @@ -52,11 65,14 @@ def __init__(self, yaml_file):
finally:
f.close()

def __getattr__(self, name):
return getattr(self.conf, name)
for k, v in self.conf.iteritems():
if not k.startswith('_'):
if isinstance(v, dict):
v = PropertyObject(v)
setattr(self, k, v)

def __getitem__(self, name):
return getattr(self.conf, name)
return getattr(self, name)

conf_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'conf')
Expand Down
57 changes: 38 additions & 19 deletions cola/core/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 27,54 @@
import cPickle as pickle
except ImportError:
import pickle

def get_logger(name='cola', filename=None, server=None, is_master=False,
basic_level=logging.INFO):
logger = logging.getLogger(name)
logger.setLevel(basic_level)

stream_handler = logging.StreamHandler()
logger.addHandler(stream_handler)

if filename is not None:
class Log(object):
def __init__(self, name, default_level=logging.DEBUG):
self.logger = logging.getLogger(name)
self.logger.setLevel(default_level)
self.formatter = logging.Formatter(
'%(asctime)s - %(module)s.%(funcName)s.%(lineno)d - %(levelname)s - %(message)s')

def add_stream_log(self, level=logging.DEBUG):
stream_handler = logging.StreamHandler()
stream_handler.setLevel(level)
self.logger.addHandler(stream_handler)

def add_file_log(self, filename, level=logging.INFO):
handler = logging.FileHandler(filename)
formatter = logging.Formatter('%(asctime)s - %(module)s.%(funcName)s.%(lineno)d - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
if is_master:
handler.setLevel(logging.ERROR)
logger.addHandler(handler)

if server is not None:
handler.setFormatter(self.formatter)
handler.setLevel(level)
self.logger.addHandler(handler)

def add_remote_log(self, server, level=logging.INFO):
if ':' in server:
server, port = tuple(server.split(':', 1))
port = int(port)
else:
port = logging.handlers.DEFAULT_TCP_LOGGING_PORT

socket_handler = logging.handlers.SocketHandler(server, port)
socket_handler.setLevel(logging.INFO)
logger.addHandler(socket_handler)
socket_handler.setLevel(level)
self.logger.addHandler(socket_handler)

def get_logger(self):
return self.logger

def get_logger(name='cola', filename=None, server=None, is_master=False,
basic_level=logging.INFO):
log = Log(name, basic_level)
log.add_stream_log(basic_level)

if filename is not None:
level = basic_level
if is_master:
level = logging.ERROR
log.add_file_log(filename, level)

if server is not None:
log.add_remote_log(server, logging.INFO)

return logger
return log.get_logger()

def add_log_client(logger, client):
if ':' in client:
Expand Down
4 changes: 2 additions & 2 deletions contrib/weibo/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 42,8 @@ def urls(self):
start = int(time.time() * (10**6))
return [
'http://weibo.com/%s/follow' % self.uid,
'http://weibo.com/aj/mblog/mbloglist?uid=%s&_k=%s' % (self.uid, start),
'http://weibo.com/%s/info' % self.uid,
# 'http://weibo.com/aj/mblog/mbloglist?uid=%s&_k=%s' % (self.uid, start),
# 'http://weibo.com/%s/info' % self.uid,
# remove because some user's link has been http://weibo.com/uid/follow?relate=fans
# 'http://weibo.com/%s/fans' % self.uid
]
33 changes: 12 additions & 21 deletions contrib/weibo/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,21 344,11 @@ def set_instance(instance, dl):
return [], []

params = urldecode(url)
next_page = soup.find('a', attrs={'class': 'btn_page_next'})
if next_page is not None:
self.bundle.current_mblog = mblog

try:
next_page_str = next_page['action-data']
except KeyError:
next_page_str = next_page.find('span')['action-data']
new_params = urldecode('?%s'%next_page_str)
params.update(new_params)
params['__rnd'] = int(time.time()*1000)
next_page = '%s?%s' % (url.split('?')[0] , urllib.urlencode(params))
return [next_page, ], []

return [], []
new_params = urldecode('?%s'%(current_page 1))
params.update(new_params)
params['__rnd'] = int(time.time()*1000)
next_page = '%s?%s' % (url.split('?')[0] , urllib.urlencode(params))
return [next_page, ], []

class UserInfoParser(WeiboParser):
def parse(self, url=None):
Expand Down Expand Up @@ -516,6 506,7 @@ def parse(self, url=None):
weibo_user = self.get_weibo_user()

html = None
decodes = urldecode(url)
is_follow = True
is_new_mode = False
for script in soup.find_all('script'):
Expand All @@ -530,7 521,6 @@ def parse(self, url=None):
domid = data['domid']
if domid == 'Pl_Official_LeftHisRelation__15':
html = beautiful_soup(data['html'])
decodes = urldecode(url)
if 'relate' in decodes and decodes['relate'] == 'fans':
is_follow = False
is_new_mode = True
Expand Down Expand Up @@ -558,10 548,12 @@ def parse(self, url=None):
urls.append('http://weibo.com/%s/fans' % self.uid)
return urls, bundles

if is_follow:
weibo_user.follows = []
else:
weibo_user.fans = []
current_page = decodes.get('page', 1)
if current_page == 1:
if is_follow:
weibo_user.follows = []
else:
weibo_user.fans = []
for li in ul.find_all(attrs={'class': 'S_line1', 'action-type': 'itemClick'}):
data = dict([l.split('=') for l in li['action-data'].split('&')])

Expand All @@ -586,7 578,6 @@ def parse(self, url=None):
if len(a) > 0:
next_ = a[-1]
if next_['class'] == ['W_btn_c']:
decodes = urldecode(url)
decodes['page'] = int(decodes.get('page', 1)) 1
query_str = urllib.urlencode(decodes)
url = '%s?%s' % (url.split('?')[0], query_str)
Expand Down
9 changes: 7 additions & 2 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 18,9 @@
@author: Chine
'''
import unittest
import pickle

from cola.core.config import PropertyObject
from cola.core.config import PropertyObject, main_conf

class Test(unittest.TestCase):

Expand All @@ -40,7 41,11 @@ def testPropertyObject(self):
assert self.obj.name == 'cola'
assert isinstance(self.obj.list, list)
assert self.obj.list[0].count == 1


def testPickle(self):
c = pickle.dumps(main_conf)
new_conf = pickle.loads(c)
self.assertEqual(new_conf.master.port, 11103)

if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
Expand Down

0 comments on commit 781a445

Please sign in to comment.