#!/usr/bin/python # -*- coding: iso-8859-1 -*- # # myRadioPlayer 1.0.6 # # Purpose: # A no-brainer "Play the music I want". # # Usage: # 1) Enter an artist name or song title # 2) clic "Play!" # # That's all ! # # # Description: # This program uses RadioBlogClub.com and Google to search for the music # you want and plays it with your favorite player. # Quality is sometimes low (MP3 64 kbits sometimes), but you get almost any # music you want. # No fiddling with P2P, BitTorrent, search engines, downloaders, # Usenet, QuickPar... Just plain HTTP wrapped in a very easy interface # (There is only one button !). # Type a name, click, listen. # You do not even have to subscribe to RadioBlogClub.com: myRadioPlayer # automatically fetchs a valid login from BugMeNot. # # THIS PROGRAM DOES NOT DOWNLOAD A SINGLE BYTE OF MUSIC FROM THE INTERNET. # So don't bother suing me, you'd be wasting your time. # # How it work: # It searchs for the words you enter ("u2", "wild horses"...) # on RadioBlogClub.com and Google.com, gets all the musics URLs, # writes them in a .m3u playlist file which is sent to your player # (default operating system handler for .m3u files). # That's all. # It's so easy to program that I could bang my head against the walls. # If you want a lightweight MP3 player which starts quickly under Windows, # I recommend Foobar2000 or XMPlay. # # Note on proxy support: # If you use a proxy, simply put the proxy address in an environment # variable names HTTP_PROXY. # eg. SET HTTP_PROXY=http://proxy.myisp.com:3128 # or SET HTTP_PROXY="http://John doe:mysecret@proxy.myisp.com:3128" # # Tested: # - Under Windows XP + XMPlay + Python 2.5 : Ok # # License: # Public domain. # Do whatever you want with this thing. # # Requirements: # Python 2.5 or later. # An internet connexion. # An MP3 player which supports the .m3u playlist format and HTTP streaming. # # # History: # 1.0.0 beta 1 (2006-08-22): # - First version. # 1.0.0 beta 2 (2006-08-22): # - Correction for BugMeNot User-Agent (Python seems banned) # - Correction for playlist launching under Windows. # 1.0.0 beta 3 (2006-08-22): # - Corrections in comments. # - oops... forgot to import a module (os) # - forced to lowercase because uppercase search seems useless. # (Searching for "U2" brings not results, "u2" works) # - After search, focus is re-set on the text entry in order # to be ready for next search. # 1.0.0 beta 4 (2006-08-23): # - RadioBlogClub logic moved to a class so that I can plug # new websites in myRadioPlayer in the future. # - Added "No-cache" to requests to BugMeNot (Will this prevent # the erratic 404 behaviour of BugMeNot ?) # - Quotes in filenames are now properly unescaped. # 1.0.0 beta 5 (2006-08-23): # - webbrowser module is now used to start playing the playlist. # This should work on all operating systems. # 1.0.0 beta 6 (2006-08-23): # - webbrowser module is now used to start playing the playlist on # non-Windows operating systems only. # 1.0.0 beta 7 (2006-08-28): # - Default login bugmenot/bugmenot will be used on RadioBlogClub.com # because BugMeNot sometines does not return logins. # (Erratic behaviour) # 1.0.0 (2006-08-29): # - Updated status display. # - Minor corrections. # - Time to go live ! # 1.0.1 (2006-08-29): # - Correction for tkinter/DCOP encoding problem under Linux. # 1.0.2 (2006-09-04): # - Add support for custom .m3u player as a command-line option. # (--player). # - Changed the m3u player start mode in Windows. # (os.startfile() instead of os.system()) # 1.0.3 (2006-09-15): # - Added HypeMachine as a source of MP3 URLs. Works great ! # - Corrected a text encoding problem in RadioBlogClub scraper # (Result were assumed to be ascii, but they were in fact latin-1). # 1.0.4 (2006-09-19): # - Error 404 that sometimes rears its head on HypeMachine is now # properly handled. # 1.0.5 (2006-09-24): # - HypeMachine scraper removed. # 1.0.6 (2006-11-28): # - Added Google search. # - Reduced network timeout from 30 seconds to 15 seconds # to cope with crappy servers. # - Also implemented a pooled parallel URL fetcher # (catchy name, isn't it ?) in order to speed up page fetching # for links returned by Google. # - Found a bug: Although I perform a urlfile.close() # it looks like urllib2 leaves some connections open. :-( # It does not prevent myRadioPlayer from working, but it's not clean. # # To do: # - Check playlist launching under Linux. # How do you find the default handler for each MIME type in Linux ? # Or use some shebang ? # Test the presence of different player ? (/usr/bin/xmms etc. ?) # Let the browser or windowing toolkit handle the file ? # - Integrate other sites like RadioBlogClub for a wider choice # of music (MagnaTunes ? GarageBand ? Others ?) # Last.fm ? See http://applications.linux.com/article.pl?sid=06/07/07/1930238&tid=47&tid=13 # See also: http://dir.yahoo.com/News_and_Media/Internet_Broadcasts/Podcasting_and_Audioblogging/Software/ # and: http://dir.yahoo.com/Entertainment/Music/Internet_Broadcasts/ # - Transform this program into a mini local webserver ? (with search form etc. ?) # - Use http://webjay.org/ ? # # If you use a proxy, uncomment the following line. #import os; os.environ['HTTP_PROXY']='http://proxy.myisp.com:3128' # (Sorry the proxy is hard-coded and I don't have time to code command-line parsing.) # You can also set the HTTP_PROXY in your operating system environment: # This program will automatically use it. You will therefore not have to uncomment this line. try: import re,socket,urllib,urllib2,urlparse,cookielib,thread,threading,Queue,os,sys,os.path,Tkinter,webbrowser,optparse,xml.dom.minidom except ImportError: raise ImportError, "This program requires Python 2.4 or later." socket.setdefaulttimeout(15) # 15 seconds timeout on network operations. class scrapError(Exception): pass class myRadioPlayer: def __init__(self,root,player=None): ''' Input: root -- parent tkinter object (can be None). player (string) -- path of .m3u file handler (MP3 player). Can be None. ''' self.root = root self.player = player self.status = None # Status bar text (Tkinter.StringVar object) self.searchTerms = "" # Words to search for (Tkinter.StringVar object) self.entry = None # Reference to the text entry widget self.scrapers = [] # List of song scrapers. self.scrapers.append( GoogleScraper() ) self.scrapers.append( RadioBlogClubScraper() ) self.initialize() # Create the GUI # === GUI stuff - boring ================================================= def initialize(self): Tkinter.Label(self.root,text=u"Search for:").grid(column=0,row=0,sticky='W') self.searchTerms = Tkinter.StringVar() self.entry = Tkinter.Entry(self.root,textvariable=self.searchTerms) self.entry.bind("", self.EnterPressedInText) self.entry.grid(column=1,row=0,sticky='EW') self.searchTerms.set(u"Enter artist name or song title") Tkinter.Button(self.root,text=u"Play !",command=self.playButtonClicked).grid(column=2,row=0,sticky='E') self.status = Tkinter.StringVar() Tkinter.Label(self.root,textvariable=self.status).grid(column=0,row=1,columnspan=3,sticky='W') self.status.set(u"Status: Idle.") self.root.grid_columnconfigure(1,weight=1) self.root.propagate(False) self.root.geometry('400x50') self.entry.focus_set() self.entry.selection_range(0, Tkinter.END) self.root.update() def playButtonClicked(self): self.searchAndPlay(self.searchTerms.get().lower()) self.entry.focus_set() self.entry.selection_range(0, Tkinter.END) def EnterPressedInText(self,event): self.searchAndPlay(self.searchTerms.get().lower()) self.entry.focus_set() self.entry.selection_range(0, Tkinter.END) # ========================================================================= def searchAndPlay(self,searchTerms): ''' Search the artist name or song title. ''' # Get music files URLs musicsUrls = [] for scraper in self.scrapers: self.status.set(u"Searching %s" % scraper.name) ; self.root.update() try: musicsUrls += scraper.scrap(searchTerms) # FIXME: Make asynchronous calls to .scrap() ? except scrapError: self.status.set(u"%s : %s" % (scraper.name,scraper.status)) ; self.root.update() return if len(musicsUrls) == 0: self.status.set(u"Nothing found.") return self.status.set(u"Total: %d songs" % len(musicsUrls)) ; self.root.update() # Save the playlist: playlistFilename = 'myRadioPlayer.m3u' file = open(playlistFilename,'w+b') data = u'\r\n'.join(musicsUrls) file.write(data.encode('latin-1','replace')) file.close() # Launch the .m3u file # FIXME: See how the webbrowser module works, and create a similar # module for .m3u. if self.player: # If the player was specified. commandLine = '%s "%s"' % (self.player, os.path.abspath(playlistFilename)) if "%1" in self.player: # If %1 was specified # replace %1 with filename. commandLine = self.player.replace('%1', os.path.abspath(playlistFilename) ) #else: otherwise, simply append filename to self.player print "Starting %s" % commandLine thread.start_new_thread(os.system, (self.player,)) else: # Player was not specified. Let's use system's default. if sys.platform=="win32": print "Starting %s" % playlistFilename thread.start_new_thread(os.startfile, (playlistFilename,)) else: url = "file://" + os.path.abspath(playlistFilename) # Get absolute URL of file print "Opening %s" % url thread.start_new_thread(webbrowser.open, (url,)) # (We start in a thread because otherwise it seems to block Tkinter.) class parallelFetcher: '''This class is capable of downloading a list of URLs in parallel. This is merely an urllib2.open().read() executed by a pool of threads. It's handy to get multiple pages in parallel, while limiting the number of simultaneous connections. WARNING:This class is a quick hack and has been specialized for myRadioPlayer. - If an URL is wrong (wrong URL, HTTP error 404, etc.) the URL will simply be dropped. - Data returned is limited to 200 kb. Example: urls = ['http://google.com','http://sebsauvage.net','http://spamgourmet.com'] p = parallelFetcher(urls) for (url,data) in p.fetch(): print url,len(data) # Data if the data returned from the URL. ''' def __init__(self,urls,numberOfThreads=6): '''Go and get these URLs. Input: urls (list of string): The list of URLs to get. numberOfThreads (integer): The number of simultaneous threads It is not recommended to go over 8. ''' self.inQueue = Queue.Queue() # The list of URLs to process self.outQueue = Queue.Queue() # Processed URLs (list of tuples in the form (url,data)) self.numberOfThreads = numberOfThreads for url in urls: # Put all URLs in the queue. self.inQueue.put(url) def fetch(self): ''' Start to fetch all URLs in parallel ''' # Create and start all threads. threads = [] for i in range(self.numberOfThreads): thread = threading.Thread(target=self._fetchURL) thread.setDaemon(True) thread.start() threads.append(thread) # Wait for all URLs to be processed. self.inQueue.join() # Wait until the queue of URLs to process is empty for thread in threads: # And wait for threads to die (in order to avoid nasty error message when program exits) thread.join() # Get all processed URLs results = [] while True: try: results.append(self.outQueue.get_nowait()) except Queue.Empty: # Empty exception means the Queue is empty. break # Exit the while loop. return results def _fetchURL(self): ''' Fetch data from an URL. This method will be called in parallel by several threads. ''' headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache' } while True: try: url = self.inQueue.get(block=True,timeout=1) # Get an URL to process except Queue.Empty: # No more URLs to process in the Queue break # Exit this thread sys.stdout.write(".") data = None urlfile = None try: # Get the page: request = urllib2.Request(url, None, headers) urlfile = urllib2.urlopen(request) data = urlfile.read(200000) except urllib2.HTTPError,ex: pass except urllib2.URLError,ex: pass if urlfile: urlfile.close() if data: self.outQueue.put((url,data)) self.inQueue.task_done() # Tell the Queue I'm done with this URL. class GoogleHarvester: ''' From http://www.sebsauvage.net/python/snyppets/#google_search ''' re_links = re.compile(r'Next
" in page: # Is there a "Next" link for next page of results ? currentPage += 100 # Yes, go to next page of results. else: break # No, break out of the while True loop. print "Google: Found %d links." % len(links) return sorted(links.keys()) class GoogleScraper: '''Get music URL from Google.com Example: scraper = GoogleScraper() print scraper.scrap("u2") ''' def __init__(self): self.status = "Idle." # Status of this scraper. self.name = "Google.com" # Name of this scraper def setStatus(self,text): self.status = text print self.name+": "+text def scrap(self,searchTerms): ''' Returns a list of music URLs for the terms you provided. ''' self.setStatus(u"Querying Google...") musicsUrls = [] # URL of music files found. # Get URL of pages containing music files: #searchFor = '-inurl:(htm|html|php) intitle:"index of" +"last modified" +"parent directory" +description +size +(ogg|wma|mp3) %s' % ' '.join(['+'+term for term in searchTerms.split(' ')]) searchFor = 'intitle:index.of (mp3|ogg|wma) -inurl:(htm|html|php|asp) "Last Modified" %s' % ' '.join(['+'+term for term in searchTerms.split(' ')]) urls = GoogleHarvester().harvest(searchFor) # Then open these pages and search for links to music files. if len(urls) == 0: self.setStatus(u"Nothing found.") re_links = re.compile(r'href="(.+?)"',re.IGNORECASE|re.DOTALL) self.setStatus(u"Scanning %d pages returned by Google..." % len(urls)) p = parallelFetcher(urls) for (url,data) in p.fetch(): # Fetch all these pages in parallel. for link in re_links.findall(data): # Extract all links in the page. link = urllib.unquote(link) # Make sure link is a music file: if not link.split('.')[-1].lower() in ('mp3','wma','ogg'): continue # Skip to next link # Then make sure each and every entered search word is in the file name. allWordsFound = True for term in searchTerms.split(' '): if term.lower() not in link.lower(): allWordsFound = False if allWordsFound: # If all words were found in file name, keep this mp3 file. sys.stdout.write("+") musicsUrls.append(urlparse.urljoin(url,link)) print if len(musicsUrls) == 0: self.setStatus(u"Nothing found.") self.setStatus("Idle.") return musicsUrls class RadioBlogClubScraper: '''Get music URL from RadioBlogClub.com. It automatically logs in RadioBlogClub.com with a BugMeNot account if not logged in. Example: scraper = RadioBlogClubScraper() print scraper.scrap("u2") ''' def __init__(self): self.cookiejar = None # Contains the cookies (session, etc.) self.urlOpener = None # Our URL opener (which uses self.cookiejar) self.loginStatus = 0 # Status of login in RadioBlogClub.com (0=not logged in,1=logged in,2=login failed) self.status = "Idle." # Status of this scraper. self.name = "RadioBlogClub.com" # Name of this scraper def setStatus(self,text): self.status = text print self.name+": "+text def scrap(self,searchTerms): ''' Returns a list of music URLs for the terms you provided. ''' if self.loginStatus == 0: # 0 = not logged in. self._login() # Let's login. if self.loginStatus == 2: # 2 = Login failed. raise scrapError() self.setStatus(u"Searching for %s..." % searchTerms) currentPage = 0 # 0,50,100,150,200,250... to search multiple pages results musicsUrls = [] # URL of .rbl (.mp3) files found. while True: address = 'http://radioblogclub.com/search/%d/%s' % (currentPage ,urllib.quote(searchTerms.replace(' ','_').encode('latin-1','ignore'))) request = urllib2.Request(address) url = self.urlOpener.open(request) page = url.read(200000) if not '
Next',re.IGNORECASE|re.DOTALL) if re_nextPage.search(page): currentPage += 50 # Search next page. else: break # break out of the while True loop. if len(musicsUrls) == 0: self.setStatus(u"Nothing found.") return [] musicsUrls = [url+'?dummy=.mp3' for url in musicsUrls] # Add '.mp3' so that players are happy with the format # (.rbs files are truely .mp3 files) self.setStatus("Idle.") return musicsUrls def _login(self): ''' Log into RadioBlogClub using a login/password from BugMeNot. ''' # Install cookie support (necessary for RadioBlogClub.com) cookiejar = cookielib.CookieJar() self.urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) # Get a login from BugMeNot: self.setStatus(u"Getting login/password from BugMeNot...") headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache' } request = urllib2.Request('http://www.bugmenot.com/view/www.radioblogclub.com', None, headers) page = self.urlOpener.open(request).read(50000) # Log in BugMeNot # Note: BugMeNot seems to have an erratic behaviour. # For this same URL, it sometimes responds with a 404. (login,password) = ('bugmenot','bugmenot') # Default login if BugMeNot does not return a login/password. re_loginpwd = re.compile(u'Username (.+?).*?Password (.+?)',re.IGNORECASE|re.DOTALL) match = re_loginpwd.search(page) if match: (login,password) = match.groups() # Log into radioblogclub.com: self.setStatus(u"Login in with %s/%s..." % (login,password)) values = {'LOGIN':login, 'PSW':password, 'action':'login' } data = urllib.urlencode(values) request = urllib2.Request("http://radioblogclub.com/login.php", data) url = self.urlOpener.open(request) page = url.read(500000) # Make sure we are logged in. if not 'lg' in [cookie.name for cookie in cookiejar]: self.loginStatus = 2 # Login failed. self.setStatus(u"Login '%s'/'%s' rejected. Please remove this login from BugMeNot." % (login,password)) raise scrapError() self.loginStatus = 1 # Login ok. def __repr__(self): return '' % self.loginStatus def main(): # Parse the command-line: parser = optparse.OptionParser() parser.add_option("-p","--player",action="store",type="string",dest="player",help="Path of you .m3u file handle (MP3 player). If not specified, will use the default .3mu handle (Windows) or you browser (Linux). Example: --player /usr/bin/xmms") (options, args) = parser.parse_args() root = Tkinter.Tk() root.title('myRadioPlayer 1.0.6') print u"myRadioPlayer 1.0.6" app = myRadioPlayer(root,player=options.player) root.mainloop() if __name__ == "__main__": main()