diff --git a/Contents/Code/__init__.py b/Contents/Code/__init__.py index 7577087..993324b 100644 --- a/Contents/Code/__init__.py +++ b/Contents/Code/__init__.py @@ -1,21 +1,16 @@ # Audiobooks (Audible) # coding: utf-8 import json +import Queue import re import types +# Import internal tools +from logging import Logging +from search_tools import SearchTool +from update_tools import UpdateTool +from urls import SiteUrl -import Queue - - -def json_decode(output): - try: - return json.loads(output, encoding="utf-8") - except: - return None - - -# URLs -VERSION_NO = '1.2021.08.24.1' +VERSION_NO = '2021.08.27.1' # Delay used when requesting HTML, # may be good to have to prevent being banned from the site @@ -30,164 +25,12 @@ def json_decode(output): THREAD_MAX = 20 -intl_sites = { - 'en': { - 'url': 'www.audible.com', - 'urltitle': u'title=', - 'rel_date': u'Release date', - 'nar_by': u'Narrated By', - 'nar_by2': u'Narrated by' - }, - 'fr': { - 'url': 'www.audible.fr', - 'urltitle': u'title=', - 'rel_date': u'Date de publication', - 'nar_by': u'Narrateur(s)', - 'nar_by2': u'Lu par' - }, - 'de': { - 'url': 'www.audible.de', - 'urltitle': u'title=', - 'rel_date': u'Erscheinungsdatum', - 'nar_by': u'Gesprochen von', - 'rel_date2': u'Veröffentlicht' - }, - 'it': { - 'url': 'www.audible.it', - 'urltitle': u'title=', - 'rel_date': u'Data di Pubblicazione', - 'nar_by': u'Narratore' - }, -} - -sites_langs = { - 'www.audible.com': {'lang': 'en'}, - 'www.audible.co.uk': {'lang': 'en'}, - 'www.audible.com.au': {'lang': 'en'}, - 'www.audible.fr': {'lang': 'fr'}, - 'www.audible.de': {'lang': 'de'}, - 'www.audible.it': {'lang': 'it'}, -} - - -def SetupUrls(sitetype, base, lang='en'): - Log('Library/Search language is : %s', lang) - ctx = dict() - if sitetype: - Log('Manual Site Selection Enabled : %s', base) - Log('Language being ignored due to manual site selection') - if base in sites_langs: - Log('Pulling language from sites array') - lang = sites_langs[base]['lang'] - if lang in intl_sites: - base = intl_sites[lang]['url'] - urlsearchtitle = intl_sites[lang]['urltitle'] - ctx['REL_DATE'] = intl_sites[lang]['rel_date'] - ctx['NAR_BY'] = intl_sites[lang]['nar_by'] - if 'rel_date2' in intl_sites[lang]: - ctx['REL_DATE_INFO'] = intl_sites[lang]['rel_date2'] - else: - ctx['REL_DATE_INFO'] = ctx['REL_DATE'] - if 'nar_by2' in intl_sites[lang]: - ctx['NAR_BY_INFO'] = intl_sites[lang]['nar_by2'] - else: - ctx['NAR_BY_INFO'] = ctx['NAR_BY'] - else: - ctx['REL_DATE'] = 'Release date' - ctx['REL_DATE_INFO'] = ctx['REL_DATE'] - ctx['NAR_BY'] = 'Narrated By' - ctx['NAR_BY_INFO'] = 'Narrated by' - Log( - 'Sites language is : %s', lang - ) - Log( - '/************************************' - 'LANG DEBUGGING' - '************************************/' - ) - Log( - '/* REL_DATE = %s', ctx['REL_DATE'] - ) - Log( - '/* REL_DATE_INFO = %s', ctx['REL_DATE_INFO'] - ) - Log( - '/* NAR_BY = %s', ctx['NAR_BY'] - ) - Log( - '/* NAR_BY_INFO = %s', ctx['NAR_BY_INFO'] - ) - Log( - '/****************************************' - '****************************************/' - ) - else: - Log( - 'Audible site will be chosen by library language' - ) - Log( - 'Library Language is %s', lang - ) - if base is None: - base = 'www.audible.com' - if lang in intl_sites: - base = intl_sites[lang]['url'] - urlsearchtitle = intl_sites[lang]['urltitle'] - ctx['REL_DATE'] = intl_sites[lang]['rel_date'] - ctx['NAR_BY'] = intl_sites[lang]['nar_by'] - if 'rel_date2' in intl_sites[lang]: - ctx['REL_DATE_INFO'] = intl_sites[lang]['rel_date2'] - else: - ctx['REL_DATE_INFO'] = ctx['REL_DATE'] - if 'nar_by2' in intl_sites[lang]: - ctx['NAR_BY_INFO'] = intl_sites[lang]['nar_by2'] - else: - ctx['NAR_BY_INFO'] = ctx['NAR_BY'] - else: - ctx['REL_DATE'] = 'Release date' - ctx['REL_DATE_INFO'] = ctx['REL_DATE'] - ctx['NAR_BY'] = 'Narrated By' - ctx['NAR_BY_INFO'] = 'Narrated by' - - AUD_BASE_URL = 'https://' + str(base) + '/' - AUD_TITLE_URL = urlsearchtitle +# Setup logger +log = Logging() - AUD_BOOK_INFO_ARR = [ - AUD_BASE_URL, - 'pd/%s?ipRedirectOverride=true', - ] - ctx['AUD_BOOK_INFO'] = ''.join(AUD_BOOK_INFO_ARR) - AUD_ARTIST_SEARCH_URL_ARR = [ - AUD_BASE_URL, - 'search?searchAuthor=%s&ipRedirectOverride=true', - ] - ctx['AUD_ARTIST_SEARCH_URL'] = ''.join(AUD_ARTIST_SEARCH_URL_ARR) - - AUD_ALBUM_SEARCH_URL_ARR = [ - AUD_BASE_URL, - 'search?', - AUD_TITLE_URL, - '%s&x=41&ipRedirectOverride=true', - ] - ctx['AUD_ALBUM_SEARCH_URL'] = ''.join(AUD_ALBUM_SEARCH_URL_ARR) - - AUD_KEYWORD_SEARCH_URL_ARR = [ - AUD_BASE_URL, - ('search?filterby=field-keywords&advsearchKeywords=%s' - '&x=41&ipRedirectOverride=true'), - ] - ctx['AUD_KEYWORD_SEARCH_URL'] = ''.join(AUD_KEYWORD_SEARCH_URL_ARR) - - AUD_SEARCH_URL_ARR = [ - AUD_BASE_URL, - 'search?', - AUD_TITLE_URL, - '{0}&searchAuthor={1}&x=41&ipRedirectOverride=true', - ] - ctx['AUD_SEARCH_URL'] = ''.join(AUD_SEARCH_URL_ARR) - - return ctx +def ValidatePrefs(): + log.debug('ValidatePrefs function call') def Start(): @@ -199,6 +42,12 @@ def Start(): 'Media Center PC 6.0' ) HTTP.Headers['Accept-Encoding'] = 'gzip' + log.separator( + msg=( + "Audible Audiobooks Agent v" + VERSION_NO + ), + log_level="info" + ) class AudiobookArtist(Agent.Artist): @@ -209,14 +58,10 @@ class AudiobookArtist(Agent.Artist): prev_search_provider = 0 - def Log(self, message, *args): - if Prefs['debug']: - Log(message, *args) - def getDateFromString(self, string): try: return Datetime.ParseDate(string).date() - except: + except AttributeError: return None def getStringContentFromXPath(self, source, query): @@ -244,10 +89,8 @@ def findDateInTitle(self, title): return Datetime.ParseDate(result.group(0)).date() return None - def doSearch(self, url, ctx): - + def doSearch(self, ctx, url): html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY) - found = [] for r in html.xpath('//div[a/img[@class="yborder"]]'): @@ -265,36 +108,24 @@ def doSearch(self, url, ctx): return found def search(self, results, media, lang, manual=False): - # Author data is pulling from last.fm automatically. # This will probably never be built out unless a good # author source is identified. # Log some stuff - self.Log( - '------------------------------------------------' - 'ARTIST SEARCH' - '------------------------------------------------' - ) - self.Log( + log.separator(msg='ARTIST SEARCH', log_level='debug') + log.debug( '* Album: %s', media.album ) - self.Log( + log.debug( '* Artist: %s', media.artist ) - self.Log( + log.debug( '****************************************' 'Not Ready For Artist Search Yet' '****************************************' ) - self.Log( - '------------------------------------------------' - '------------------------------------------------' - ) - return - - def update(self, metadata, media, lang, force=False): - return + log.separator(log_level='debug') def hasProxy(self): return Prefs['imageproxyurl'] is not None @@ -309,7 +140,7 @@ def worker(self, queue, stoprequest): try: func(*args, **kargs) except Exception as e: - self.Log(e) + log.info(e) queue.task_done() except Queue.Empty: continue @@ -331,50 +162,208 @@ class AudiobookAlbum(Agent.Album): prev_search_provider = 0 - def Log(self, message, *args): - if Prefs['debug']: - Log(message, *args) + def search(self, results, media, lang, manual): + url_info = SiteUrl(Prefs['sitetype'], Prefs['site'], lang) + ctx = url_info.SetupUrls() + + # Instantiate search helper + search_helper = SearchTool(lang, manual, media, results) + + search_helper.pre_search_logging() + + # Run helper before passing to SearchTool + normalizedName = self.normalize_name(media.album) + # Strip title of things like unabridged and spaces + search_helper.strip_title(normalizedName) + # Generate search url + searchUrl = self.create_search_url(ctx, media, search_helper.normalizedName) + # Run actual search, and set the variable to it's return + result = self.doSearch(ctx, searchUrl) + + # Write search result status to log + if not result: + log.info( + 'No results found for query "%s"', + normalizedName + ) + return + log.debug( + 'Found %s result(s) for query "%s"', + len(result), + normalizedName + ) + + info = self.run_search(search_helper, media, result) + + # Output the final results. + log.separator(log_level="debug") + log.debug('Final result:') + for i, r in enumerate(info): + description = '\"%s\" by %s [%s]' % ( + r['title'], r['artist'], r['year'] + ) + log.debug( + ' [%s] %s. %s (%s) %s {%s} [%s]', + r['score'], (i + 1), r['title'], r['year'], + r['artist'], r['id'], r['thumb'] + ) + results.Append( + MetadataSearchResult( + id=r['id'], + name=description, + score=r['score'], + thumb=r['thumb'], + lang=lang + ) + ) + + """ + If there are more than one result, + and this one has a score that is >= GOOD SCORE, + then ignore the rest of the results + """ + if not manual and len(info) > 1 and r['score'] >= GOOD_SCORE: + log.info( + ' *** The score for these results are great, ' + 'so we will use them, and ignore the rest. ***' + ) + break + + def update(self, metadata, media, lang, force=False): + url_info = SiteUrl(Prefs['sitetype'], Prefs['site'], lang) + ctx = url_info.SetupUrls() + + log.separator( + msg=( + "UPDATING" + media.title + ( + "ID: " + metadata.id + ) + ), + log_level="info" + ) + + # Make url + url = ctx['AUD_BOOK_INFO'] % metadata.id - def getDateFromString(self, string): try: - return Datetime.ParseDate(string).date() - except: - return None + html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY) + except Exception as e: + log.info(e) + # Instantiate update helper + update_helper = UpdateTool(force, lang, media, metadata, url) - def getStringContentFromXPath(self, source, query): - return source.xpath('string(' + query + ')') + self.scrape_book_metadata(ctx, update_helper, html) - def getAnchorUrlFromXPath(self, source, query): - anchor = source.xpath(query) + if not update_helper.date: + self.date_missing(update_helper, html) - if len(anchor) == 0: - return None + # prefer copyright year over datePublished + if Prefs['copyyear']: + self.use_copyright_date(update_helper, html) - return anchor[0].get('href') + update_helper.date = self.getDateFromString(update_helper.date) - def getImageUrlFromXPath(self, source, query): - img = source.xpath(query) + self.handle_series(update_helper, html) - if len(img) == 0: - return None + # cleanup synopsis + update_helper.synopsis = ( + update_helper.synopsis.replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + .replace("
", "") + .replace("
", "\n") + ) - return img[0].get('src') + # Setup logging of all data in the array + data_to_log = [ + {'date': update_helper.date}, + {'title': update_helper.title}, + {'author': update_helper.author}, + {'narrator': update_helper.narrator}, + {'series': update_helper.series}, + {'genres': update_helper.genre_parent + ', ' + update_helper.genre_child}, + {'studio': update_helper.studio}, + {'thumb': update_helper.thumb}, + {'rating': update_helper.rating}, + {'synopsis': update_helper.synopsis}, + {'volume': update_helper.volume}, + {'series2': update_helper.series2}, + {'volume2': update_helper.volume2}, + {'series def': update_helper.series_def}, + {'volume def': update_helper.volume_def}, + ] + log.metadata(data_to_log, log_level="debug") - def findDateInTitle(self, title): - result = re.search(r'(\d+-\d+-\d+)', title) - if result is not None: - return Datetime.ParseDate(result.group(0)).date() - return None + self.compile_metadata(update_helper) + + """ + Search functions that require PMS imports, + thus we cannot 'outsource' them to SearchTool + Sorted by position in the search process + """ + + def normalize_name(self, input_name): + # Normalize the name + normalizedName = String.StripDiacritics( + input_name + ) + return normalizedName + + def create_search_url(self, ctx, media, normalizedName): + # Make the URL + if media.artist: + searchUrl = ctx['AUD_SEARCH_URL'].format( + ( + String.Quote((normalizedName).encode('utf-8'), usePlus=True) + ), + ( + String.Quote((media.artist).encode('utf-8'), usePlus=True) + ) + ) + else: + searchUrl = ctx['AUD_KEYWORD_SEARCH_URL'] % ( + String.Quote((normalizedName).encode('utf-8'), usePlus=True) + ) + return searchUrl - def doSearch(self, url, ctx): + def doSearch(self, ctx, url): html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY) found = [] - self.Log( - '-----------------------------------------' - 'just before new xpath line' - '-----------------------------------------' + + log.separator(msg='just before new xpath line', log_level="debug") + # Set append to the returned array from this function + found = self.before_xpath(ctx, found, html) + + log.separator(msg='just after new xpath line', log_level="debug") + # Set append to the returned array from this function + found = self.after_xpath(ctx, found, html) + + return found + + def before_xpath(self, ctx, found, html): + for r in html.xpath( + '//ul//li[contains(@class,"productListItem")]' + ): + author = self.getStringContentFromXPath( + r, ( + 'div/div/div/div/div/div/span/ul' + '/li[contains (@class,"authorLabel")]/span/a[1]' + ) ) - for r in html.xpath('//ul//li[contains(@class,"productListItem")]'): datetext = self.getStringContentFromXPath( r, ( u'div/div/div/div/div/div/span/ul/li' @@ -383,55 +372,49 @@ def doSearch(self, url, ctx): ) datetext = re.sub(r'[^0-9\-]', '', datetext) date = self.getDateFromString(datetext) + narrator = self.getStringContentFromXPath( + r, ( + u'div/div/div/div/div/div/span/ul/li' + '[contains (@class,"narratorLabel")]/span//a[1]' + ).format(ctx['NAR_BY']) + ) + murl = self.getAnchorUrlFromXPath( + r, 'div/div/div/div/div/div/span/ul/li/h3//a[1]' + ) title = self.getStringContentFromXPath( r, ( 'div/div/div/div/div/div/span/ul//a' '[contains (@class,"bc-link")][1]' ) ) - murl = self.getAnchorUrlFromXPath( - r, 'div/div/div/div/div/div/span/ul/li/h3//a[1]' - ) thumb = self.getImageUrlFromXPath( r, 'div/div/div/div/div/div/div' '[contains(@class,"responsive-product-square")]/div/a/img' ) - author = self.getStringContentFromXPath( - r, ( - 'div/div/div/div/div/div/span/ul' - '/li[contains (@class,"authorLabel")]/span/a[1]' - ) - ) - narrator = self.getStringContentFromXPath( - r, ( - u'div/div/div/div/div/div/span/ul/li' - '[contains (@class,"narratorLabel")]/span//a[1]' - ).format(ctx['NAR_BY']) - ) - self.Log( - '-----------------------------------------------' - 'XPATH SEARCH HIT' - '-----------------------------------------------' - ) + log.separator(msg='XPATH SEARCH HIT', log_level="debug") found.append( { - 'url': murl, - 'title': title, + 'author': author, 'date': date, + 'narrator': narrator, 'thumb': thumb, - 'author': author, - 'narrator': narrator + 'title': title, + 'url': murl, } ) + return found - self.Log( - '-----------------------------------------' - 'just after new xpath line' - '-----------------------------------------' + def after_xpath(self, ctx, found, html): + for r in html.xpath( + '//div[contains (@class, "adbl-search-result")]' + ): + author = self.getStringContentFromXPath( + r, ( + 'div/div/ul/li/' + '/a[contains (@class,"author-profile-link")][1]' + ) ) - - for r in html.xpath('//div[contains (@class, "adbl-search-result")]'): date = self.getDateFromString( self.getStringContentFromXPath( r, ( @@ -442,201 +425,43 @@ def doSearch(self, url, ctx): ) ) ) - title = self.getStringContentFromXPath( - r, 'div/div/div/div/a[1]' - ) murl = self.getAnchorUrlFromXPath( r, 'div/div/div/div/a[1]' ) - thumb = self.getImageUrlFromXPath( - r, 'div[contains (@class,"adbl-prod-image-sample-cont")]/a/img' - ) - author = self.getStringContentFromXPath( - r, ( - 'div/div/ul/li/' - '/a[contains (@class,"author-profile-link")][1]' - ) - ) narrator = self.getStringContentFromXPath( r, u'div/div/ul/li[contains (., "{0}")]//a[1]'.format( ctx['NAR_BY'] ) ) - self.Log( - '-----------------------------------------------' - 'XPATH SEARCH HIT' - '-----------------------------------------------' + thumb = self.getImageUrlFromXPath( + r, 'div[contains (@class,"adbl-prod-image-sample-cont")]/a/img' ) + title = self.getStringContentFromXPath( + r, 'div/div/div/div/a[1]' + ) + log.separator(msg='XPATH SEARCH HIT', log_level="debug") found.append( { - 'url': murl, - 'title': title, + 'author': author, 'date': date, + 'narrator': narrator, 'thumb': thumb, - 'author': author, - 'narrator': narrator + 'title': title, + 'url': murl, } ) - return found - def search(self, results, media, lang, manual): - ctx = SetupUrls(Prefs['sitetype'], Prefs['site'], lang) - LCL_IGNORE_SCORE = IGNORE_SCORE - - self.Log( - '-----------------------------------------------' - 'ALBUM SEARCH' - '-----------------------------------------------' - ) - self.Log('* ID: %s', media.parent_metadata.id) - self.Log('* Title: %s', media.title) - self.Log('* Name: %s', media.name) - self.Log('* Album: %s', media.album) - self.Log('* Artist: %s', media.artist) - self.Log( - '-------------------------------------------------' - '-------------------------------------------------' - ) - - # Handle a couple of edge cases where - # album search will give bad results. - if media.album is None and not manual: - self.Log('Album Title is NULL on an automatic search. Returning') - return - if media.album == '[Unknown Album]' and not manual: - self.Log( - 'Album Title is [Unknown Album]' - ' on an automatic search. Returning' - ) - return - - if manual: - Log( - 'You clicked \'fix match\'. ' - 'This may have returned no useful results because ' - 'it\'s searching using the title of the first track.' - ) - Log( - 'There\'s not currently a way around this initial failure. ' - 'But clicking \'Search Options\' and ' - 'entering the title works just fine.' - ) - Log( - 'This message will appear during the initial ' - 'search and the actual manual search.' - ) - # If this is a custom search, - # use the user-entered name instead of the scanner hint. - if media.name: - Log( - 'Custom album search for: ' + media.name - ) - media.album = media.name - else: - Log('Album search: ' + media.title) - - # Log some stuff for troubleshooting detail - self.Log( - '-----------------------------------' - '------------------------------------' - ) - self.Log('* ID: %s', media.parent_metadata.id) - self.Log('* Title: %s', media.title) - self.Log('* Name: %s', media.name) - self.Log('* Album: %s', media.album) - self.Log( - '-----------------------------------' - '------------------------------------' - ) - - # Normalize the name - normalizedName = String.StripDiacritics(media.album) - if len(normalizedName) == 0: - normalizedName = media.album - Log( - 'normalizedName = %s', normalizedName - ) - - # Chop off "unabridged" - normalizedName = re.sub(r"[\(\[].*?[\)\]]", "", normalizedName) - Log( - 'chopping bracketed text = %s', normalizedName - ) - normalizedName = normalizedName.strip() - Log( - 'normalizedName stripped = %s', normalizedName - ) - - self.Log( - '***** SEARCHING FOR "%s" - AUDIBLE v.%s *****', - normalizedName, VERSION_NO - ) - - # Make the URL - if media.artist is not None: - searchUrl = ctx['AUD_SEARCH_URL'].format( - ( - String.Quote((normalizedName).encode('utf-8'), usePlus=True) - ), - ( - String.Quote((media.artist).encode('utf-8'), usePlus=True) - ) - ) - else: - searchUrl = ctx['AUD_KEYWORD_SEARCH_URL'] % ( - String.Quote((normalizedName).encode('utf-8'), usePlus=True) - ) - found = self.doSearch(searchUrl, ctx) - - # Write search result status to log - if len(found) == 0: - self.Log('No results found for query "%s"', normalizedName) - return - else: - self.Log( - 'Found %s result(s) for query "%s"', len(found), normalizedName - ) - i = 1 - for f in found: - self.Log( - ' %s. (title) %s (author) %s (url)[%s]' - ' (date)(%s) (thumb){%s}', - i, f['title'], f['author'], - f['url'], str(f['date']), f['thumb'] - ) - i += 1 - - self.Log( - '-----------------------------------' - '------------------------------------' - ) + def run_search(self, helper, media, result): # Walk the found items and gather extended information info = [] - i = 1 - for f in found: - url = f['url'] - self.Log('URL For Breakdown: %s', url) - - # Get the id - for itemId in url.split('/'): - # IDs No longer start with just 'B0' - if re.match(r'^[0-9A-Z]{10,10}', itemId): - break - itemId = None - - # New Search results contain question marks after the ID - for itemId in itemId.split('?'): - # IDs No longer start with just 'B0' - if re.match(r'^[0-9A-Z]{10,10}', itemId): - break - - if len(itemId) == 0: - Log('No Match: %s', url) - continue - self.Log('* ID is %s', itemId) + log.separator(msg="Search results", log_level="info") + for i, f in enumerate(result): + valid_itemId = helper.get_id_from_url(item=f) + if not valid_itemId: + continue title = f['title'] thumb = f['thumb'] @@ -651,8 +476,6 @@ def search(self, results, media, lang, manual): # Score the album name scorebase1 = media.album scorebase2 = title.encode('utf-8') - # self.Log('scorebase1: %s', scorebase1) - # self.Log('scorebase2: %s', scorebase2) score = INITIAL_SCORE - Util.LevenshteinDistance( scorebase1, scorebase2 @@ -661,23 +484,27 @@ def search(self, results, media, lang, manual): if media.artist: scorebase3 = media.artist scorebase4 = author - # self.Log('scorebase3: %s', scorebase3) - # self.Log('scorebase4: %s', scorebase4) score = INITIAL_SCORE - Util.LevenshteinDistance( scorebase3, scorebase4 ) - self.Log('* Title is %s', title) - self.Log('* Author is %s', author) - self.Log('* Narrator is %s', narrator) - self.Log('* Date is %s', str(date)) - self.Log('* Score is %s', str(score)) - self.Log('* Thumb is %s', thumb) - - if score >= LCL_IGNORE_SCORE: + log.info("Result #" + str(i + 1)) + # Log basic metadata + data_to_log = [ + {'ID is': valid_itemId}, + {'Title is': title}, + {'Author is': author}, + {'Narrator is': narrator}, + {'Date is ': str(date)}, + {'Score is': str(score)}, + {'Thumb is': thumb}, + ] + log.metadata(data_to_log, log_level="info") + + if score >= IGNORE_SCORE: info.append( { - 'id': itemId, + 'id': valid_itemId, 'title': title, 'year': year, 'date': date, @@ -687,85 +514,29 @@ def search(self, results, media, lang, manual): } ) else: - self.Log( + log.info( '# Score is below ignore boundary (%s)... Skipping!', - LCL_IGNORE_SCORE - ) - - if i != len(found): - self.Log( - '-----------------------------------' - '------------------------------------' + IGNORE_SCORE ) - i += 1 + # Print separators for easy reading + if i <= len(result): + log.separator(log_level="info") info = sorted(info, key=lambda inf: inf['score'], reverse=True) + return info - # Output the final results. - self.Log( - '***********************************' - '************************************' - ) - self.Log('Final result:') - i = 1 - for r in info: - description = '\"%s\" by %s [%s]' % ( - r['title'], r['artist'], r['year'] - ) - self.Log( - ' [%s] %s. %s (%s) %s {%s} [%s]', - r['score'], i, r['title'], r['year'], - r['artist'], r['id'], r['thumb'] - ) - results.Append( - MetadataSearchResult( - id=r['id'], - name=description, - score=r['score'], - thumb=r['thumb'], - lang=lang - ) - ) - - # If there are more than one result, - # and this one has a score that is >= GOOD SCORE, - # then ignore the rest of the results - if not manual and len(info) > 1 and r['score'] >= GOOD_SCORE: - self.Log( - ' *** The score for these results are great, ' - 'so we will use them, and ignore the rest. ***' - ) - break - i += 1 - - def update(self, metadata, media, lang, force=False): - self.Log( - '***** UPDATING "%s" ID: %s - AUDIBLE v.%s *****', - media.title, metadata.id, VERSION_NO - ) - ctx = SetupUrls(Prefs['sitetype'], Prefs['site'], lang) - - # Make url - url = ctx['AUD_BOOK_INFO'] % metadata.id - - try: - html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY) - except NetworkError: - pass - - date = None - rating = None - series = '' - series2 = '' - series_def = '' - genre1 = None - genre2 = None - volume = '' - volume2 = '' - volume_def = '' + """ + Update functions that require PMS imports, + thus we cannot 'outsource' them to UpdateTool + Sorted by position in the update process + """ + def scrape_book_metadata(self, ctx, helper, html): for r in html.xpath('//div[contains (@id, "adbl_page_content")]'): + author = self.getStringContentFromXPath( + r, '//li//a[contains (@class,"author-profile-link")][1]' + ) date = self.getDateFromString( self.getStringContentFromXPath( r, u'//li[contains (., "{0}")]/span[2]//text()'.format( @@ -773,299 +544,298 @@ def update(self, metadata, media, lang, force=False): ) ) ) - title = self.getStringContentFromXPath( - r, '//h1[contains (@class, "adbl-prod-h1-title")]/text()' + genre_child = self.getStringContentFromXPath( + r, ( + '//div[contains(@class,"adbl-pd-breadcrumb")]' + '/div[3]/a/span/text()' + ) + ) + genre_parent = self.getStringContentFromXPath( + r, ( + '//div[contains(@class,"adbl-pd-breadcrumb")]' + '/div[2]/a/span/text()' + ) ) murl = self.getAnchorUrlFromXPath( r, 'div/div/div/div/a[1]' ) - thumb = self.getImageUrlFromXPath( - r, 'div/div/div/div/div/img' - ) - author = self.getStringContentFromXPath( - r, '//li//a[contains (@class,"author-profile-link")][1]' - ) narrator = self.getStringContentFromXPath( r, '//li[contains (., "{0}")]//span[2]'.format( ctx['NAR_BY_INFO'] ) ).strip().decode('utf-8') + series = self.getStringContentFromXPath( + r, '//div[contains (@class, "adbl-series-link")]//a[1]' + ) studio = self.getStringContentFromXPath( r, '//li//a[contains (@id,"PublisherSearchLink")][1]' ) synopsis = self.getStringContentFromXPath( r, '//div[contains (@class, "disc-summary")]/div[*]' ).strip() - series = self.getStringContentFromXPath( - r, '//div[contains (@class, "adbl-series-link")]//a[1]' + thumb = self.getImageUrlFromXPath( + r, 'div/div/div/div/div/img' ) - genre1 = self.getStringContentFromXPath( - r, ( - '//div[contains(@class,"adbl-pd-breadcrumb")]' - '/div[2]/a/span/text()' - ) + title = self.getStringContentFromXPath( + r, '//h1[contains (@class, "adbl-prod-h1-title")]/text()' ) - genre2 = self.getStringContentFromXPath( - r, ( - '//div[contains(@class,"adbl-pd-breadcrumb")]' - '/div[3]/a/span/text()' - ) + log.separator(msg='XPATH SEARCH HIT', log_level="debug") + + # Set values in helper object + helper.author = author + helper.date = date + helper.genre_child = genre_child + helper.genre_parent = genre_parent + # helper.url = murl + helper.narrator = narrator + helper.series = series + helper.studio = studio + helper.synopsis = synopsis + helper.thumb = thumb + helper.title = title + + def date_missing(self, helper, html): + for r in html.xpath( + '//script[contains (@type, "application/ld+json")]' + ): + page_content = r.text_content() + page_content = page_content.replace('\n', '') + # Remove any backslashes that aren't + # escaping a character JSON needs escaped + remove_inv_json_esc = re.compile( + r'([^\\])(\\(?![bfnrt\'\"\\/]|u[A-Fa-f0-9]{4}))' + ) + page_content = remove_inv_json_esc.sub(r'\1\\\2', page_content) + log.debug(page_content) + json_data = self.json_decode(page_content) + + helper.re_parse_with_date_published(json_data) + + def use_copyright_date(self, helper, html): + cstring = None + + for r in html.xpath(u'//span[contains(text(), "\xA9")]'): + cstring = self.getStringContentFromXPath( + r, u'normalize-space(//span[contains(text(), "\xA9")])' + ) + # only contains Audible copyright + if cstring.startswith(u"\xA9 "): + cstring = "" + helper.date = helper.date[:4] + + if cstring: + if "Public Domain" in cstring: + helper.date = re.match(".*\(P\)(\d{4})", cstring).group(1) + else: + if cstring.startswith(u'\xA9'): + cstring = cstring[1:] + if "(P)" in cstring: + cstring = re.match("(.*)\(P\).*", cstring).group(1) + if ";" in cstring: + helper.date = str( + min( + [int(i) for i in cstring.split() if i.isdigit()] + ) + ) + else: + helper.date = re.match(".?(\d{4}).*", cstring).group(1) + + def handle_series(self, helper, html): + for r in html.xpath('//span[contains(@class, "seriesLabel")]'): + helper.series = self.getStringContentFromXPath( + r, '//li[contains(@class, "seriesLabel")]//a[1]' ) - self.Log( - '-----------------------------------------------' - 'XPATH SEARCH HIT' - '-----------------------------------------------' + helper.series2 = self.getStringContentFromXPath( + r, '//li[contains(@class, "seriesLabel")]//a[2]' ) - if date is None: - for r in html.xpath( - '//script[contains (@type, "application/ld+json")]' - ): - page_content = r.text_content() - page_content = page_content.replace('\n', '') - # Remove any backslashes that aren't - # escaping a character JSON needs escaped - remove_inv_json_esc = re.compile( - r'([^\\])(\\(?![bfnrt\'\"\\/]|u[A-Fa-f0-9]{4}))' - ) - page_content = remove_inv_json_esc.sub(r'\1\\\2', page_content) - self.Log(page_content) - json_data = json_decode(page_content) - for json_data in json_data: - if 'datePublished' in json_data: - date = json_data['datePublished'] - title = json_data['name'] - thumb = json_data['image'] - # Set rating when available - if 'aggregateRating' in json_data: - rating = ( - json_data['aggregateRating']['ratingValue'] - ) - author = '' - counter = 0 - for c in json_data['author']: - counter += 1 - if counter > 1: - author += ', ' - author += c['name'] - narrator = '' - counter = 0 - for c in json_data['readBy']: - counter += 1 - if counter > 1: - narrator += ',' - narrator += c['name'] - studio = json_data['publisher'] - synopsis = json_data['description'] - if 'itemListElement' in json_data: - genre1 = ( - json_data['itemListElement'][1]['item']['name'] - ) - try: - genre2 = ( - json_data['itemListElement'][2]['item']['name'] - ) - except: - continue + helper.series_def = helper.series2 if helper.series2 else helper.series - # prefer copyright year over datePublished - if Prefs['copyyear']: - cstring = None + helper.volume = self.getStringContentFromXPath( + r, '//li[contains(@class, "seriesLabel")]/text()[2]' + ).strip() + if helper.volume == ",": + helper.helper.volume = "" + helper.volume2 = self.getStringContentFromXPath( + r, '//li[contains(@class, "seriesLabel")]/text()[3]' + ).strip() + if helper.volume2 == ",": + helper.volume2 = "" + + helper.volume_def = helper.helper.volume2 if helper.volume2 else helper.volume + + # fix series when audible 'forgets' the series link… + if not helper.series_def: + for r in html.xpath('//div[contains(@class, "adbl-main")]'): + subtitle = self.getStringContentFromXPath( + r, 'normalize-space(//li[contains' + '(@class, "authorLabel")]' + '//preceding::li[1]//span//text())' + ).strip() - for r in html.xpath(u'//span[contains(text(), "\xA9")]'): - cstring = self.getStringContentFromXPath( - r, u'normalize-space(//span[contains(text(), "\xA9")])' - ) - # only contains Audible copyright - if cstring.startswith(u"\xA9 "): - cstring = "" - date = date[:4] - - if cstring: - if "Public Domain" in cstring: - date = re.match(".*\(P\)(\d{4})", cstring).group(1) - else: - if cstring.startswith(u'\xA9'): - cstring = cstring[1:] - if "(P)" in cstring: - cstring = re.match("(.*)\(P\).*", cstring).group(1) - if ";" in cstring: - date = str( - min( - [int(i) for i in cstring.split() if i.isdigit()] - ) - ) - else: - date = re.match(".?(\d{4}).*", cstring).group(1) - - date = self.getDateFromString(date) - - for r in html.xpath('//span[contains(@class, "seriesLabel")]'): - series = self.getStringContentFromXPath( - r, '//li[contains(@class, "seriesLabel")]//a[1]' - ) - series2 = self.getStringContentFromXPath( - r, '//li[contains(@class, "seriesLabel")]//a[2]' - ) + w = re.match("(.*)(, Book \d+)", subtitle) + if not helper.series_def and w: + helper.series_def = w.group(1) + helper.volume_def = w.group(2) - series_def = series2 if series2 else series + def compile_metadata(self, helper): + # Set the date and year if found. + if helper.date is not None: + helper.metadata.originally_available_at = helper.date - volume = self.getStringContentFromXPath( - r, '//li[contains(@class, "seriesLabel")]/text()[2]' - ).strip() - if volume == ",": - volume = "" - volume2 = self.getStringContentFromXPath( - r, '//li[contains(@class, "seriesLabel")]/text()[3]' - ).strip() - if volume2 == ",": - volume2 = "" + # Add the genres + if not Prefs['no_overwrite_genre']: + helper.metadata.genres.clear() + helper.metadata.genres.add(helper.genre_parent) + helper.metadata.genres.add(helper.genre_child) - volume_def = volume2 if volume2 else volume + self.parse_author_narrator(helper) - # fix series when audible 'forgets' the series link… - if not series_def: - for r in html.xpath('//div[contains(@class, "adbl-main")]'): - subtitle = self.getStringContentFromXPath( - r, 'normalize-space(//li[contains' - '(@class, "authorLabel")]' - '//preceding::li[1]//span//text())' - ).strip() + self.parse_series(helper) - w = re.match("(.*)(, Book \d+)", subtitle) - if not series_def and w: - series_def = w.group(1) - volume_def = w.group(2) + # Other metadata + helper.metadata.title = helper.title + helper.metadata.title_sort = ' - '.join( + filter(None, [(helper.series_def + helper.volume_def), helper.title]) + ) + helper.metadata.studio = helper.studio + helper.metadata.summary = helper.synopsis - # cleanup synopsis - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("", "") - synopsis = synopsis.replace("
", "\n") - - self.Log('date: %s', date) - self.Log('title: %s', title) - self.Log('author: %s', author) - self.Log('series: %s', series) - self.Log('narrator: %s', narrator) - self.Log('studio: %s', studio) - self.Log('thumb: %s', thumb) - self.Log('rating: %s', rating) - self.Log('genres: %s, %s', genre1, genre2) - self.Log('synopsis: %s', synopsis) - self.Log('Series: %s', series) - self.Log('Volume: %s', volume) - self.Log('Series2: %s', series2) - self.Log('Volume2: %s', volume2) - self.Log('Series_def: %s', series_def) - self.Log('Volume_def: %s', volume_def) + if Prefs['cover_options'] == "Use Audible cover": + helper.metadata.posters[1] = Proxy.Media(HTTP.Request(helper.thumb)) + helper.metadata.posters.validate_keys(helper.thumb) + elif Prefs['cover_options'] == "Download cover but don't overwrite existing": + helper.metadata.posters[helper.thumb] = Proxy.Media( + HTTP.Request(helper.thumb), sort_order=1 + ) - # Set the date and year if found. - if date is not None: - metadata.originally_available_at = date + # Use rating only when available + if helper.rating: + helper.metadata.rating = float(helper.rating) * 2 - # Add the genres - metadata.genres.clear() - metadata.genres.add(genre1) - metadata.genres.add(genre2) + # Collections if/when Plex supports them + # https://github.com/seanap/Audiobooks.bundle/issues/1#issuecomment-713191070 + helper.metadata.collections.clear() + helper.metadata.collections.add(helper.series) + if helper.series2: + helper.metadata.collections.add(helper.series2) + helper.writeInfo() + def parse_author_narrator(self, helper): # Add Narrators to Styles - narrators_list = narrator.split(",") - contributors_list = ['full cast'] - metadata.styles.clear() - for narrators in narrators_list: + narrators_list = helper.narrator.split(",") + narr_contributors_list = [ + 'full cast' + ] + helper.metadata.styles.clear() + # Loop through narrators to check if it has contributor wording + for narrator in narrators_list: if not [ - item for item in contributors_list if item in narrators.lower() + contrib for contrib in narr_contributors_list if ( + contrib in narrator.lower() + ) ]: - metadata.styles.add(narrators.strip()) + helper.metadata.styles.add(narrator.strip()) # Add Authors to Moods - author_list = author.split(",") - contributers_list = [ + author_list = helper.author.split(",") + author_contributers_list = [ 'contributor', 'translator', 'foreword', 'translated', 'full cast', ] - metadata.moods.clear() - for authors in author_list: - metadata.moods.add(authors.strip()) - for contributors in contributers_list: - if not [ - item for item in contributers_list if item in authors.lower() - ]: - metadata.moods.add(authors) + helper.metadata.moods.clear() + # Loop through authors to check if it has contributor wording + for author in author_list: + if not [ + contrib for contrib in author_contributers_list if ( + contrib in author.lower() + ) + ]: + helper.metadata.moods.add(author.strip()) + def parse_series(self, helper): # Clean series - x = re.match("(.*)(: A .* Series)", series_def) + x = re.match("(.*)(: A .* Series)", helper.series_def) if x: - series_def = x.group(1) + helper.series_def = x.group(1) # Clean title - seriesshort = series_def + seriesshort = helper.series_def checkseries = " Series" # Handle edge cases in titles - if series_def.endswith(checkseries): - seriesshort = series_def[:-len(checkseries)] + if helper.series_def.endswith(checkseries): + seriesshort = helper.series_def[:-len(checkseries)] y = re.match( - "(.*)((: .* " + volume_def[2:] + ": A .* Series)|" - "(((:|,|-) )((" + seriesshort + volume_def + ")|" - "((? 0: - self.Log('|\\') - for i in range(len(metadata.collections)): - self.Log('| * Collection: %s', metadata.collections[i]) - - if len(metadata.genres) > 0: - self.Log('|\\') - for i in range(len(metadata.genres)): - self.Log('| * Genre: %s', metadata.genres[i]) - - if len(metadata.moods) > 0: - self.Log('|\\') - for i in range(len(metadata.moods)): - self.Log('| * Moods: %s', metadata.moods[i]) - - if len(metadata.styles) > 0: - self.Log('|\\') - for i in range(len(metadata.styles)): - self.Log('| * Styles: %s', metadata.styles[i]) - - if len(metadata.posters) > 0: - self.Log('|\\') - for poster in metadata.posters.keys(): - self.Log('| * Poster URL: %s', poster) - - if len(metadata.art) > 0: - self.Log('|\\') - for art in metadata.art.keys(): - self.Log('| * Fan art URL: %s', art) - - self.Log( - '***********************************' - '************************************' - ) - - -def safe_unicode(s, encoding='utf-8'): - if s is None: - return None - if isinstance(s, basestring): - if isinstance(s, types.UnicodeType): - return s - else: - return s.decode(encoding) - else: - return str(s).decode(encoding) diff --git a/Contents/Code/logging.py b/Contents/Code/logging.py new file mode 100644 index 0000000..d876a50 --- /dev/null +++ b/Contents/Code/logging.py @@ -0,0 +1,59 @@ +class Logging: + # Only prints message with debug mode + def debug(self, message, *args): + if Prefs['debug']: + return Log(message, *args) + + # Prints any message you give + def info(self, message, *args): + return Log(message, *args) + + # For the below logging: + # Default level is info + # Set debug by calling ('sometext', 'debug') + + # Prints a bunch of divider chars like --- + def separator(self, msg=None, log_level="info"): + divider = "-" * 35 + output = divider + divider + # Override output with message if passed + if msg: + output = divider + msg + divider + + if log_level.lower() == "debug": + return self.debug(output) + return self.info(output) + + # Loops through array of dictionaries and logs them + def metadata(self, dict_arr, log_level="info"): + # Loop through dicts in array + for log_type in dict_arr: + # Loop through each key/value + for key, val in log_type.items(): + if val: + output = "{key:<20}{val}".format( + key=key, + val=val + ) + if log_level.lower() == "debug": + self.debug(output) + else: + self.info(output) + + def metadata_arrs(self, dict_arr, log_level="info"): + # Loop through dicts in array + for log_type in dict_arr: + # Loop through each key/value + for key, val in log_type.items(): + if val: + # Loop through dict's array + for item in val: + output = ("{key:<20}{val}".format( + key=key, + val=item + ) + ) + if log_level.lower() == "debug": + self.debug(output) + else: + self.info(output) diff --git a/Contents/Code/search_tools.py b/Contents/Code/search_tools.py new file mode 100644 index 0000000..168fe28 --- /dev/null +++ b/Contents/Code/search_tools.py @@ -0,0 +1,103 @@ +import re +# Import internal tools +from logging import Logging + +# Setup logger +log = Logging() + + +class SearchTool: + def __init__(self, lang, manual, media, results): + self.lang = lang + self.manual = manual + self.media = media + self.results = results + + def get_id_from_url(self, item): + url = item['url'] + log.debug('URL For Breakdown: %s', url) + + # Find ASIN before ? in URL + asin = re.search(r'[0-9A-Z]{9}.+?(?=\?)', url).group(0) + if asin: + return asin + + log.info('No Match: %s', url) + return None + + def pre_search_logging(self): + log.separator(msg='ALBUM SEARCH', log_level="info") + # Log basic metadata + data_to_log = [ + {'ID': self.media.parent_metadata.id}, + {'Title': self.media.title}, + {'Name': self.media.name}, + {'Album': self.media.album}, + {'Artist': self.media.artist}, + ] + log.metadata(data_to_log) + log.separator(log_level="info") + + # Handle a couple of edge cases where + # album search will give bad results. + if self.media.album is None and not self.manual: + log.info('Album Title is NULL on an automatic search. Returning') + return + if self.media.album == '[Unknown Album]' and not self.manual: + log.info( + 'Album Title is [Unknown Album]' + ' on an automatic search. Returning' + ) + return + + if self.manual: + log.separator(msg="NOTE", log_level="info") + log.info( + 'You clicked \'fix match\'. ' + 'This may have returned no useful results because ' + 'it\'s searching using the title of the first track.' + ) + log.info( + 'There\'s not currently a way around this initial failure. ' + 'But clicking \'Search Options\' and ' + 'entering the title works just fine.' + ) + log.info( + 'This message will appear during the initial ' + 'search and the actual manual search.' + ) + # If this is a custom search, + # use the user-entered name instead of the scanner hint. + if self.media.name: + log.info( + 'Custom album search for: ' + self.media.name + ) + self.media.album = self.media.name + + def strip_title(self, normalizedName): + if len(normalizedName) == 0: + normalizedName = self.media.album + log.debug( + 'normalizedName = %s', normalizedName + ) + + # Chop off "unabridged" + normalizedName = re.sub( + r"[\(\[].*?[\)\]]", "", normalizedName + ) + log.debug( + 'chopping bracketed text = %s', normalizedName + ) + normalizedName = normalizedName.strip() + log.debug( + 'normalizedName stripped = %s', normalizedName + ) + + log.separator( + msg=( + "SEARCHING FOR " + '"' + normalizedName + '"' + ), + log_level="info" + ) + # Give access of this variable to the class + self.normalizedName = normalizedName diff --git a/Contents/Code/update_tools.py b/Contents/Code/update_tools.py new file mode 100644 index 0000000..569849f --- /dev/null +++ b/Contents/Code/update_tools.py @@ -0,0 +1,85 @@ +# Import internal tools +from logging import Logging + +# Setup logger +log = Logging() + + +class UpdateTool: + def __init__(self, force, lang, media, metadata, url): + self.date = None + self.force = force + self.genre_child = None + self.genre_parent = None + self.lang = lang + self.media = media + self.metadata = metadata + self.rating = None + self.series = '' + self.series2 = '' + self.series_def = '' + self.url = url + self.volume = '' + self.volume2 = '' + self.volume_def = '' + + def re_parse_with_date_published(self, json_data): + for data in json_data: + if 'datePublished' in data: + self.date = data['datePublished'] + self.title = data['name'] + self.thumb = data['image'] + # Set rating when available + if 'aggregateRating' in data: + self.rating = ( + data['aggregateRating']['ratingValue'] + ) + author_array = [] + for c in data['author']: + author_array.append(c['name']) + self.author = ",".join(author_array) + + narrator_array = [] + for c in data['readBy']: + narrator_array.append(c['name']) + self.narrator = ",".join(narrator_array) + self.studio = data['publisher'] + self.synopsis = data['description'] + if 'itemListElement' in data: + self.genre_parent = ( + data['itemListElement'][1]['item']['name'] + ) + try: + self.genre_child = ( + data['itemListElement'][2]['item']['name'] + ) + except AttributeError: + continue + + # Writes metadata information to log. + def writeInfo(self): + log.separator(msg='New data', log_level="info") + + # Log basic metadata + data_to_log = [ + {'ID': self.metadata.id}, + {'URL': self.url}, + {'Title': self.metadata.title}, + {'Release date': str(self.metadata.originally_available_at)}, + {'Studio': self.metadata.studio}, + {'Summary': self.metadata.summary}, + {'Poster URL': self.thumb}, + ] + log.metadata(data_to_log, log_level="info") + + # Log basic metadata stored in arrays + multi_arr = [ + # {'Collection': self.metadata.collections}, + {'Genre': self.metadata.genres}, + {'Moods(Authors)': self.metadata.moods}, + {'Styles(Narrators)': self.metadata.styles}, + # {'Fan art URL': self.metadata.art}, + ] + log.metadata_arrs(multi_arr, log_level="info") + + log.separator(log_level="info") \ No newline at end of file diff --git a/Contents/Code/urls.py b/Contents/Code/urls.py new file mode 100644 index 0000000..bba9006 --- /dev/null +++ b/Contents/Code/urls.py @@ -0,0 +1,196 @@ +from logging import Logging + + +class SiteUrl: + intl_sites = { + 'en': { + 'url': 'www.audible.com', + 'urltitle': u'title=', + 'rel_date': u'Release date', + 'nar_by': u'Narrated By', + 'nar_by2': u'Narrated by' + }, + 'fr': { + 'url': 'www.audible.fr', + 'urltitle': u'title=', + 'rel_date': u'Date de publication', + 'nar_by': u'Narrateur(s)', + 'nar_by2': u'Lu par' + }, + 'de': { + 'url': 'www.audible.de', + 'urltitle': u'title=', + 'rel_date': u'Erscheinungsdatum', + 'nar_by': u'Gesprochen von', + 'rel_date2': u'Veröffentlicht' + }, + 'it': { + 'url': 'www.audible.it', + 'urltitle': u'title=', + 'rel_date': u'Data di Pubblicazione', + 'nar_by': u'Narratore' + }, + } + + sites_langs = { + 'www.audible.com': {'lang': 'en'}, + 'www.audible.ca': {'lang': 'en'}, + 'www.audible.co.uk': {'lang': 'en'}, + 'www.audible.com.au': {'lang': 'en'}, + 'www.audible.fr': {'lang': 'fr'}, + 'www.audible.de': {'lang': 'de'}, + 'www.audible.it': {'lang': 'it'}, + } + + def __init__(self, sitetype, base, lang='en'): + self.sitetype = sitetype + self.base = base + self.lang = lang + + def set_context_urls(self): + AUD_BASE_URL = 'https://' + str(self.base) + '/' + AUD_TITLE_URL = self.urlsearchtitle + + AUD_BOOK_INFO_ARR = [ + AUD_BASE_URL, + 'pd/%s?ipRedirectOverride=true', + ] + self.context['AUD_BOOK_INFO'] = ''.join( + AUD_BOOK_INFO_ARR + ) + + AUD_ARTIST_SEARCH_URL_ARR = [ + AUD_BASE_URL, + 'search?searchAuthor=%s&ipRedirectOverride=true', + ] + self.context['AUD_ARTIST_SEARCH_URL'] = ''.join( + AUD_ARTIST_SEARCH_URL_ARR + ) + + AUD_ALBUM_SEARCH_URL_ARR = [ + AUD_BASE_URL, + 'search?', + AUD_TITLE_URL, + '%s&x=41&ipRedirectOverride=true', + ] + self.context['AUD_ALBUM_SEARCH_URL'] = ''.join( + AUD_ALBUM_SEARCH_URL_ARR + ) + + AUD_KEYWORD_SEARCH_URL_ARR = [ + AUD_BASE_URL, + ('search?filterby=field-keywords&advsearchKeywords=%s' + '&x=41&ipRedirectOverride=true'), + ] + self.context['AUD_KEYWORD_SEARCH_URL'] = ''.join( + AUD_KEYWORD_SEARCH_URL_ARR + ) + + AUD_SEARCH_URL_ARR = [ + AUD_BASE_URL, + 'search?', + AUD_TITLE_URL, + '{0}&searchAuthor={1}&x=41&ipRedirectOverride=true', + ] + self.context['AUD_SEARCH_URL'] = ''.join(AUD_SEARCH_URL_ARR) + + def base_is_manual(self): + if self.base in self.sites_langs: + log.debug('Pulling language from sites array') + self.lang = self.sites_langs[self.base]['lang'] + if self.lang in self.intl_sites: + self.base = self.intl_sites[self.lang]['url'] + self.urlsearchtitle = ( + self.intl_sites[self.lang]['urltitle'] + ) + self.context['REL_DATE'] = ( + self.intl_sites[self.lang]['rel_date'] + ) + self.context['NAR_BY'] = ( + self.intl_sites[self.lang]['nar_by'] + ) + if 'rel_date2' in self.intl_sites[self.lang]: + self.context['REL_DATE_INFO'] = ( + self.intl_sites[self.lang]['rel_date2'] + ) + else: + self.context['REL_DATE_INFO'] = ( + self.context['REL_DATE'] + ) + if 'nar_by2' in self.intl_sites[self.lang]: + self.context['NAR_BY_INFO'] = ( + self.intl_sites[self.lang]['nar_by2'] + ) + else: + self.context['NAR_BY_INFO'] = self.context['NAR_BY'] + else: + self.context['REL_DATE'] = 'Release date' + self.context['REL_DATE_INFO'] = self.context['REL_DATE'] + self.context['NAR_BY'] = 'Narrated By' + self.context['NAR_BY_INFO'] = 'Narrated by' + + # Log translations of certain terms + log.separator(msg='LANG DEBUGGING', log_level="debug") + data_to_log = [ + {'Sites language is': self.lang}, + {'REL_DATE': self.context['REL_DATE']}, + {'REL_DATE_INFO': self.context['REL_DATE_INFO']}, + {'NAR_BY date': self.context['NAR_BY']}, + {'NAR_BY_INFO': self.context['NAR_BY_INFO']}, + ] + log.metadata(data_to_log, log_level="debug") + log.separator(log_level="debug") + + def base_is_auto(self): + log.debug( + 'Audible site will be chosen by library language' + ) + log.debug( + 'Library Language is %s', self.lang + ) + if self.base is None: + self.base = 'www.audible.com' + if self.lang in self.intl_sites: + self.base = self.intl_sites[self.lang]['url'] + self.urlsearchtitle = self.intl_sites[self.lang]['urltitle'] + self.context['REL_DATE'] = ( + self.intl_sites[self.lang]['rel_date'] + ) + self.context['NAR_BY'] = self.intl_sites[self.lang]['nar_by'] + if 'rel_date2' in self.intl_sites[self.lang]: + self.context['REL_DATE_INFO'] = ( + self.intl_sites[self.lang]['rel_date2'] + ) + else: + self.context['REL_DATE_INFO'] = ( + self.context['REL_DATE'] + ) + if 'nar_by2' in self.intl_sites[self.lang]: + self.context['NAR_BY_INFO'] = ( + self.intl_sites[self.lang]['nar_by2'] + ) + else: + self.context['NAR_BY_INFO'] = self.context['NAR_BY'] + else: + self.context['REL_DATE'] = 'Release date' + self.context['REL_DATE_INFO'] = self.context['REL_DATE'] + self.context['NAR_BY'] = 'Narrated By' + self.context['NAR_BY_INFO'] = 'Narrated by' + + def SetupUrls(self): + log.debug('Library/Search language is : %s', self.lang) + self.context = {} + if self.sitetype: + log.debug('Manual Site Selection Enabled : %s', self.base) + log.debug('Language being ignored due to manual site selection') + self.base_is_manual() + else: + self.base_is_auto() + + self.set_context_urls() + + return self.context + + +# Setup logger +log = Logging() diff --git a/Contents/DefaultPrefs.json b/Contents/DefaultPrefs.json index c7ee786..4a15b75 100644 --- a/Contents/DefaultPrefs.json +++ b/Contents/DefaultPrefs.json @@ -6,11 +6,34 @@ "id" : "site", "label" : "Select Audible site to use: ", "type" : "enum", - "values" : ["www.audible.com","www.audible.co.uk","www.audible.com.au","www.audible.de","www.audible.fr","www.audible.it"], + "values" : [ + "www.audible.com", + "www.audible.ca", + "www.audible.co.uk", + "www.audible.com.au", + "www.audible.de", + "www.audible.fr", + "www.audible.it" + ], "default" : "www.audible.com" +},{ + "id": "cover_options", + "label": "How to handle artwork from Audible: ", + "type": "enum", + "values": [ + "Use Audible cover", + "Download cover but don't overwrite existing", + "Don't download cover" + ], + "default": "Use Audible cover" +},{ + "id": "no_overwrite_genre", + "label": "Leave existing genres in place", + "type": "bool", + "default": "false" },{ "id": "copyyear", - "label": "Uses copyright year instead of datePublished", + "label": "Use copyright year instead of datePublished", "type": "bool", "default": "false" },{