Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions PyLyrics/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class PyLyrics:
@staticmethod
def getAlbums(singer):
singer = singer.replace(' ', '_')
s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text)
s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text,"lxml")
spans = s.findAll('span',{'class':'mw-headline'})

als = []
Expand All @@ -60,13 +60,17 @@ def getAlbums(singer):
@staticmethod
def getTracks(album):
url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist())
soup = BeautifulSoup(requests.get(url).text)
soup = BeautifulSoup(requests.get(url).text,"lxml")

for al in soup.find_all('album'):
if al.text.lower().strip() == album.name.strip().lower():
currentAlbum = al
break
songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
if currentAlbum!=None:
#currentAlbum=""
songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
else:
songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
return songs

@staticmethod
Expand All @@ -75,7 +79,7 @@ def getLyrics(singer, song):
singer = singer.replace(' ', '_')
song = song.replace(' ', '_')
r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song))
s = BeautifulSoup(r.text)
s = BeautifulSoup(r.text,"lxml")
#Get main lyrics holder
lyrics = s.find("div",{'class':'lyricbox'})
if lyrics is None:
Expand Down
14 changes: 14 additions & 0 deletions build/lib.linux-x86_64-2.7/PyLyrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
__author__ = "Pradipta"
__version__ = '1.0.0'

try:
#Python 3 Imports
from .classes import *
from .functions import *
except:
#Python 2 imports
from classes import *
from functions import *



5 changes: 5 additions & 0 deletions build/lib.linux-x86_64-2.7/PyLyrics/classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#Classes for Scrapers
try:
from .functions import *
except:
from functions import *
110 changes: 110 additions & 0 deletions build/lib.linux-x86_64-2.7/PyLyrics/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import requests
from bs4 import BeautifulSoup, Comment, NavigableString
import sys, codecs, json

class Track(object):
def __init__(self,trackName,album,artist):
self.name = trackName
self.album = album
self.artist = artist
def __repr__(self):
return self.name
def link(self):
return 'http://lyrics.wikia.com/{0}:{1}'.format(self.artist.replace(' ', '-'),self.name.replace(' ','-'))
def getLyrics(self):
return PyLyrics.getLyrics(self.artist,self.name)
class Artist(object):
def __init__(self, name):
self.name = name
def getAlbums(self):
return PyLyrics.getAlbums(self.name)
def __repr__(self):
return self.name.encode('utf-8')
class Album(object):
def __init__(self, name, link,singer):
self.year = name.split(' ')[-1]
self.name = name.replace(self.year,' ').rstrip()
self.url = link
self.singer = singer
def link(self):
return self.url
def __repr__(self):
if sys.version_info[0] == 2:
return self.name.encode('utf-8','replace')
return self.name
def artist(self):
return self.singer
def tracks(self):
return PyLyrics.getTracks(self)

class PyLyrics:
@staticmethod
def getAlbums(singer):
singer = singer.replace(' ', '_')
s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text,"lxml")
spans = s.findAll('span',{'class':'mw-headline'})

als = []

for tag in spans:
try:
a = tag.findAll('a')[0]
als.append(Album(a.text,'http://lyrics.wikia.com' + a['href'],singer))
except:
pass

if als == []:
raise ValueError("Unknown Artist Name given")
return None
return als
@staticmethod
def getTracks(album):
url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist())
soup = BeautifulSoup(requests.get(url).text,"lxml")

for al in soup.find_all('album'):
if al.text.lower().strip() == album.name.strip().lower():
currentAlbum = al
break
songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
return songs

@staticmethod
def getLyrics(singer, song):
#Replace spaces with _
singer = singer.replace(' ', '_')
song = song.replace(' ', '_')
r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song))
s = BeautifulSoup(r.text,"lxml")
#Get main lyrics holder
lyrics = s.find("div",{'class':'lyricbox'})
if lyrics is None:
raise ValueError("Song or Singer does not exist or the API does not have Lyrics")
return None
#Remove Scripts
[s.extract() for s in lyrics('script')]

#Remove Comments
comments = lyrics.findAll(text=lambda text:isinstance(text, Comment))
[comment.extract() for comment in comments]

#Remove unecessary tags
for tag in ['div','i','b','a']:
for match in lyrics.findAll(tag):
match.replaceWithChildren()
#Get output as a string and remove non unicode characters and replace <br> with newlines
output = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('<br/>','\n')
try:
return output
except:
return output.encode('utf-8')

def main():
albums = PyLyrics.getAlbums('OneRepublic')
print (albums)
tracks = PyLyrics.getTracks(albums[-1])
print (tracks[7].getLyrics())


if __name__=='__main__':
main()
22 changes: 22 additions & 0 deletions build/lib.linux-x86_64-2.7/PyLyrics/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest
try:
from .__init__ import * #Python 3
except:
from __init__ import *

try:
basestring = basestring
except NameError:
basestring = (str, bytes)

albums = PyLyrics.getAlbums('Taylor Swift')
class PyLyricsTest(unittest.TestCase):
def testAlbums(self):
self.assertIsInstance(albums,list)
def testTracks(self):
self.assertIsInstance(albums[0].tracks(),list)
def testLyrics(self):
self.assertIsInstance(PyLyrics.getLyrics('Eminem','The Monster'),basestring)

if __name__=='__main__':
unittest.main()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
packages=['PyLyrics'],
url="http://github.com/geekpradd/PyLyrics",
install_requires=[
'beautifulsoup4','requests',],
'beautifulsoup4','requests','lxml'],
classifiers=[
"Development Status :: 5 - Production/Stable",
"Topic :: Internet",
Expand Down