...
 
Commits (4)
......@@ -55,26 +55,8 @@ class ReleaseScraper(GetReleaseResultMixin, Scraper, RequestMixin, ExceptionMixi
def get_url(self):
return self._base_url % self.store + self._release_name + '/' + self.id
def get_params(self):
return {'ign-mpt': 'uo%3D4'}
def _build_store_index(self):
if self._index is None:
self._index = {}
for obj in self.data['included']:
self._index[(obj['type'], obj['id'])] = obj
def _get_store_obj_by_id(self, type, store_id):
if self._index is None:
self._build_store_index()
return self._index.get((type, store_id))
def _get_store_data(self):
return self.data.get('data', {})
def _check_if_release_artist_equals_track_artist(self):
data = self._get_store_data()
release_artist = data.get('attributes', {}).get('artistName')
release_artist = self.data.get('attributes', {}).get('artistName')
discs = self.get_disc_containers()
self._release_artist_equal_track_artists = True
for song in itertools.chain(*discs.values()):
......@@ -88,8 +70,7 @@ class ReleaseScraper(GetReleaseResultMixin, Scraper, RequestMixin, ExceptionMixi
return lxml.html.document_fromstring(initial_data)
def add_release_event(self):
data = self._get_store_data()
release_date = data.get('attributes', {}).get('releaseDate')
release_date = self.data.get('attributes', {}).get('releaseDate')
if release_date:
release_event = self.result.create_release_event()
release_event.set_date(release_date)
......@@ -98,16 +79,14 @@ class ReleaseScraper(GetReleaseResultMixin, Scraper, RequestMixin, ExceptionMixi
self.log_warning('no release date in store data')
def add_release_title(self):
data = self._get_store_data()
release_title = data.get('attributes', {}).get('name')
release_title = self.data.get('attributes', {}).get('name')
if release_title:
self.result.set_title(release_title)
else:
self.log_warning('no release title in store data')
def add_release_artists(self):
data = self._get_store_data()
artist_name = data.get('attributes', {}).get('artistName')
artist_name = self.data.get('attributes', {}).get('artistName')
if artist_name is None:
self.log_warning('no release artist in store data')
elif artist_name == 'Various Artists':
......@@ -122,37 +101,30 @@ class ReleaseScraper(GetReleaseResultMixin, Scraper, RequestMixin, ExceptionMixi
self.result.append_release_artist(artist)
def add_genres(self):
data = self._get_store_data()
genres = data.get('relationships', {}).get('genres', {}).get('data', [])
for genre in genres:
if genre['type'] == 'genre':
genre_data = self._get_store_obj_by_id(genre['type'], genre['id'])
if genre_data:
genre_name = genre_data.get('attributes', {}).get('name')
if not genre_name.lower() in self.exclude_genres:
self.result.append_genre(genre_name)
genres = self.data.get('attributes', {}).get('genreNames', [])
for genre_name in genres:
if not genre_name.lower() in self.exclude_genres:
self.result.append_genre(genre_name)
def get_disc_containers(self):
discs = {1: []}
old_track_num = 0
disc_num = 1
data = self._get_store_data()
songs = data.get('relationships', {}).get('songs', {}).get('data', [])
songs = self.data.get('relationships', {}).get('tracks', {}).get('data', [])
if not songs:
self.log_warning('no songs in store data')
for song in songs:
if song['type'] == 'product/album/song':
song_data = self._get_store_obj_by_id(song['type'], song['id'])
attributes = song_data.get('attributes', {})
if attributes.get('kind') == 'song':
if song['type'] == 'songs':
attributes = song.get('attributes', {})
if attributes:
track_num = attributes.get('trackNumber')
if not track_num:
self.log_warning('no track number for song: {!r}'.format(song_data))
self.log_warning('no track number for song: {!r}'.format(song))
continue
if track_num < old_track_num + 1:
disc_num += 1
discs[disc_num] = []
discs[disc_num].append(song_data)
discs[disc_num].append(song)
old_track_num = track_num
return discs
......@@ -179,29 +151,26 @@ class ReleaseScraper(GetReleaseResultMixin, Scraper, RequestMixin, ExceptionMixi
def get_track_length(self, track_container):
track_length = None
offers = track_container.get('relationships', {}).get('offers', {}).get('data', [])
if len(offers) > 0:
offer = self._get_store_obj_by_id(offers[0]['type'], offers[0]['id'])
if offer:
assets = offer.get('attributes', {}).get('assets', [])
if len(assets) > 0:
track_length = assets[0].get('duration')
duration_milliseconds = track_container.get('attributes', {}).get('durationInMillis')
if duration_milliseconds:
track_length = duration_milliseconds // 1000
return track_length
def initialize_data(self):
super(ReleaseScraper, self).initialize_data()
if self.data is not None:
#if the release does not exist, the website wants to connect to iTunes
warning_div = self.data.cssselect('div.loadingbox')
if len(warning_div) == 1:
store_data = self.data.cssselect('script#shoebox-media-api-cache-amp-music')
if len(store_data) != 1:
# if the release does not exist there won't be this script tag on the page
self.result = self.instantiate_result(NotFoundResult)
return
store_data = self.data.cssselect('script#shoebox-ember-data-store')
if len(store_data) != 1:
self.raise_exception(u'could not retrieve store data')
store_data = store_data[0].text_content()
try:
self.data = json.loads(store_data)
toplevel_data = json.loads(store_data)
for key, value in toplevel_data.items():
if self.id in key:
data = json.loads(value)
self.data = data['d'][0]
except:
self.raise_exception(u'could not parse store data')
self._check_if_release_artist_equals_track_artist()
......
This diff is collapsed.