#!/usr/bin/env python2.5 import hashlib import socket socket.setdefaulttimeout(5) import os import urllib2 import xml.etree.cElementTree as ET def makedirs(): try: for path in ['xml', 'html']: os.mkdir(os.path.join(os.path.dirname(__file__), path)) except OSError: print "store dirs already in place" pass def parse_opml(filename): tree = ET.parse(filename) outlines = tree.findall("//outline") for outline in outlines: print "Processing %s" % (outline.get('text') or outline.get('htmlUrl')) for path in ['xml', 'html']: url = outline.get('%sUrl' % path) if url is None: continue if not url.startswith('http://'): url = "http://%s" % url url_hash = hashlib.md5(url).hexdigest() file_path = os.path.join(path, url_hash) if os.path.exists(file_path): continue try: print "\tdownloading: %s ..." % url, page = urllib2.urlopen(url).read() except (urllib2.HTTPError, urllib2.URLError): continue f = open(os.path.join(path, url_hash), 'w+') f.write(page) f.close() print "\tdone." def main(opml_file): makedirs() parse_opml(opml_file) if __name__ == '__main__': import sys main(sys.argv[1])