Coverage for bookie.lib.readable : 79%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
"""Handle processing and setting web content into Readability/cleaned
"""
try: return self.__getitem__(name) except KeyError: return super(DictObj, self).__getattr__(name)
url="https://github.com/mitechie/bookie", )
'1': 1, # used for manual parsed '200': 200, '404': 404, '403': 403, '429': 429, # wtf, 429 doesn't exist...
# errors like 9's '900': 900, # used for unparseable '901': 901, # url is not parseable/usable '902': 902, # socket.error during download '903': 903, # httplib.IncompleteRead error '904': 904, # lxml error about document is empty '905': 905, # httplib.BadStatusLine })
'png': 'image/png', 'jpeg': 'image/jpeg', 'jpg': 'image/jpg', 'gif': 'image/gif', })
"""Understand the base concept of making readable"""
"""This readable request was an error, assign it so"""
"""Check if this is indeed an error or not""" else:
"""Check if the current object is an image""" # we can only get this if we have headers self.content_type.lower() in IMAGE_TYPES.values()): else:
"""assign the content and potentially content type header"""
"""Handle some given content and parse the readable out of it"""
"""Handle the parsing out of the html content given"""
read.error(STATUS_CODES['900'], "Could not parse content.") else: content_type=content_type)
"""Fetch a url and read some content out of it"""
def parse(url): """Fetch the given url and parse out a Readable Obj for the content"""
# first check if we have a special url with the #! content in it # rewrite it with _escaped_fragment_=xxx # we should be doing with this some regex, but cheating for now idx = url.index(u'#') fragment = url[idx:] clean_url = u"{0}?_escaped_fragment_={1}".format(url[0:idx], fragment) else: # we need to clean up the url first, we can't have any anchor tag # on the url or urllib2 gets cranky
query = u'?' else:
parsed[0], parsed[1], parsed[2], parsed[4], query=query)
# if it works, then we default to a 200 request # it's ok, promise :)
# for some reason getting a code 429 from a server else: read.error(exc.code, unicode(exc.code) + ': ' + clean_url)
read.error(STATUS_CODES['901'], str(exc))
except httplib.BadStatusLine, exc: read.error(STATUS_CODES['905'], str(exc))
except socket.error, exc: read.error(STATUS_CODES['902'], str(exc))
# let's check to make sure we should be parsing this # for example: don't parse images read.error(STATUS_CODES['900'], "Could not parse document.") else:
except socket.error, exc: read.error(STATUS_CODES['902'], str(exc)) except httplib.IncompleteRead, exc: read.error(STATUS_CODES['903'], str(exc)) except lxml.etree.ParserError, exc: read.error(STATUS_CODES['904'], str(exc))
|