#!/usr/bin/python # -*- coding: iso-8859-1 -*- ''' bashfr_download.py 1.0.0 This program downloads the whole bashfr.org archive into a single HTML file. http://www.bashfr.org/?sort=browse&p=1 Why: I wanted to read bashfr.org offline. License: This program is public domain. Author: Sébastien SAUVAGE (webmaster of http://sebsauvage.net) ''' # Use this if you use a proxy (or define the HTTP_PROXY variable in your environment) #import os; os.environ['HTTP_PROXY'] = 'http://proxy.myisp.com:3128' import sys,urllib2,re currentPage = 1 print 'bashfr_download 1.0.0' sys.stdout.write('Downloading page...') re_quote = re.compile('
#.+?
(.+?)
',re.IGNORECASE|re.DOTALL) quotes = {} # List of quotes. key=quote number, value=the quote itself (HTML code) while True: html = urllib2.urlopen('http://www.bashfr.org/?sort=browse&p=%d' % currentPage).read(200000) sys.stdout.write('.') if not '>>>' in html: break for (number,quote) in re_quote.findall(html): quotes[int(number)] = quote.decode('ISO-8859-1') currentPage += 1 body = u'' for number in sorted(quotes.keys()): body += u'
%d
%s' % (number,quotes[number]) body += u'
' html = u''' http://www.bashfr.org/</tile> <style type="text/css"> <!-- body { font-family: monospace; font-size:10pt;} --> </style> </head> <body><b>http://www.bashfr.org/</b>%s</body> </html>''' % body file = open('bashfr_quotes.html','w+b') file.write(html.encode('utf-8')) file.close()