From 38a771eec307fd1c110477a3503acbfa16510db2 Mon Sep 17 00:00:00 2001 From: Levi Rizki Saputra Date: Tue, 2 Apr 2019 19:17:01 +0700 Subject: [PATCH 1/3] support download using ids and fix some mistake --- .gitignore | 9 +++++++ README.md | 1 + config.py | 6 +++++ main.py | 72 ++++++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 72 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 894a44c..3ccf38f 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,12 @@ venv.bak/ # mypy .mypy_cache/ + +# VSCode +.vscode + +# SonarLint +.sonarlint + +# Downloaded Media +media/ \ No newline at end of file diff --git a/README.md b/README.md index acd0b82..4e8da23 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Since PacktPub restructured their website [packtpub-library-downloader](https:// - *-s*, *--separate* = Create a separate directory for each book - *-v*, *--verbose* = Show more detailed information - *-q*, *--quiet* = Don't show information or progress bars +- *-i*, *--ids* = Products to download by id (If it is not specified, it will download all products that you have purchased) **Book File Types** diff --git a/config.py b/config.py index e0861fd..093358e 100644 --- a/config.py +++ b/config.py @@ -9,12 +9,18 @@ # this is base url where i do the requests BASE_URL = "https://services.packtpub.com/" +# this is base url for static content +BASE_STATIC_URL = "https://static.packt-cdn.com/" + # URL to request jwt token, params by post are user and pass, return jwt token AUTH_ENDPOINT = "auth-v1/users/tokens" # URL to get all your books, two params that i change are offset and limit, method GET PRODUCTS_ENDPOINT = "entitlements-v1/users/me/products?sort=createdAt:DESC&offset={offset}&limit={limit}" +# URL to get book information from id +PRODUCT_FROM_ID_ENDPOINT = "products/{id}/summary" + # URL to get types , param is book id, method GET URL_BOOK_TYPES_ENDPOINT = "products-v1/products/{book_id}/types" diff --git a/main.py b/main.py index 817afe2..a0dd798 100755 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ import getopt import requests from tqdm import tqdm, trange -from config import BASE_URL, PRODUCTS_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT +from config import BASE_URL, BASE_STATIC_URL, PRODUCTS_ENDPOINT, PRODUCT_FROM_ID_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT from user import User @@ -18,12 +18,24 @@ def book_request(user, offset=0, limit=10, verbose=False): data = [] url = BASE_URL + PRODUCTS_ENDPOINT.format(offset=offset, limit=limit) if verbose: - print(url) + tqdm.write(url) r = requests.get(url, headers=user.get_header()) data += r.json().get('data', []) return url, r, data +def book_from_id_request(id, verbose=False): + url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id = id) + if verbose: + tqdm.write(url) + + r = requests.get(url) + rjson = r.json() + data = {'productId': id, 'productName': rjson.get('title')} + + return url, r, data + + def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): ''' Request all your books, return json with info of all your books @@ -50,6 +62,27 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): data += book_request(user, offset, limit, is_verbose)[2] return data +def get_books_from_ids(ids, is_verbose=False, is_quiet=False): + ''' + Get all boooks from id + Params + ... + ids : list + ''' + + data = [] + + print("Getting list of books...") + + if not is_quiet: + id_iter = tqdm(ids, unit="Pages") + else: + id_iter = ids + + for id in id_iter: + data.append(book_from_id_request(id, is_verbose)[2]) + + return data def get_url_book(user, book_id, format='pdf'): ''' @@ -66,9 +99,9 @@ def get_url_book(user, book_id, format='pdf'): user.refresh_header() # refresh token get_url_book(user, book_id, format) # call recursive - print('ERROR (please copy and paste in the issue)') - print(r.json()) - print(r.status_code) + tqdm.write('ERROR (please copy and paste in the issue)') + tqdm.write(r.json()) + tqdm.write(r.status_code) return '' @@ -87,9 +120,9 @@ def get_book_file_types(user, book_id): user.refresh_header() # refresh token get_book_file_types(user, book_id, format) # call recursive - print('ERROR (please copy and paste in the issue)') - print(r.json()) - print(r.status_code) + tqdm.write('ERROR (please copy and paste in the issue)') + tqdm.write(r.json()) + tqdm.write(r.status_code) return [] @@ -98,7 +131,7 @@ def download_book(filename, url): ''' Download your book ''' - print('Starting to download ' + filename) + tqdm.write('Starting to download ' + filename) with open(filename, 'wb') as f: r = requests.get(url, stream=True) @@ -112,7 +145,7 @@ def download_book(filename, url): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() - print('Finished ' + filename) + tqdm.write('Finished ' + filename) def make_zip(filename): @@ -129,8 +162,8 @@ def move_current_files(root, book): except OSError: os.rename(f, f'{sub_dir}/{book}' + '_1' + f[f.index('.'):]) except ValueError as e: - print(e) - print('Skipping') + tqdm.write(e) + tqdm.write('Skipping') def does_dir_exist(directory): @@ -151,12 +184,13 @@ def main(argv): separate = None verbose = None quiet = None - errorMessage = 'Usage: main.py -e -p [-d -b -s -v -q]' + download_ids = None + errorMessage = 'Usage: main.py -e -p [-d -b -i -s -v -q]' # get the command line arguments/options try: opts, args = getopt.getopt( - argv, 'e:p:d:b:svq', ['email=', 'pass=', 'directory=', 'books=', 'separate', 'verbose', 'quiet']) + argv, 'e:p:d:b:i:svq', ['email=', 'pass=', 'directory=', 'books=', 'ids=', 'separate', 'verbose', 'quiet']) except getopt.GetoptError: print(errorMessage) sys.exit(2) @@ -178,6 +212,9 @@ def main(argv): verbose = True elif opt in ('-q', '--quiet'): quiet = True + elif opt in ('-i', '--ids'): + download_ids = arg.split(',') + if verbose and quiet: print("Verbose and quiet cannot be used together.") @@ -195,8 +232,11 @@ def main(argv): user = User(email, password) # get all your books - books = get_books(user, is_verbose=verbose, is_quiet=quiet) - print('Downloading books...') + if (download_ids): + books = get_books_from_ids(download_ids, is_verbose=verbose, is_quiet=quiet) + else: + books = get_books(user, is_verbose=verbose, is_quiet=quiet) + tqdm.write('Downloading books...') if not quiet: books_iter = tqdm(books, unit='Book') else: From 002f9f9ff8ee320b93dff8713809a46512775428 Mon Sep 17 00:00:00 2001 From: Levi Rizki Saputra Date: Wed, 3 Apr 2019 10:15:28 +0700 Subject: [PATCH 2/3] improving code quolity --- main.py | 205 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 137 insertions(+), 68 deletions(-) diff --git a/main.py b/main.py index a0dd798..47e92ea 100755 --- a/main.py +++ b/main.py @@ -12,8 +12,10 @@ from config import BASE_URL, BASE_STATIC_URL, PRODUCTS_ENDPOINT, PRODUCT_FROM_ID_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT from user import User +error_message = 'Usage: main.py -e -p [-d -b -i -s -v -q]' -#TODO: I should do a function that his only purpose is to request and return data + +# TODO: I should do a function that his only purpose is to request and return data def book_request(user, offset=0, limit=10, verbose=False): data = [] url = BASE_URL + PRODUCTS_ENDPOINT.format(offset=offset, limit=limit) @@ -24,15 +26,16 @@ def book_request(user, offset=0, limit=10, verbose=False): return url, r, data + def book_from_id_request(id, verbose=False): - url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id = id) + url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id=id) if verbose: tqdm.write(url) r = requests.get(url) rjson = r.json() data = {'productId': id, 'productName': rjson.get('title')} - + return url, r, data @@ -47,12 +50,12 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): how many book wanna get by request ''' # TODO: given x time jwt expired and should refresh the header, user.refresh_header() - + url, r, data = book_request(user, offset, limit) - + print(f'You have {str(r.json()["count"])} books') print("Getting list of books...") - + if not is_quiet: pages_list = trange(r.json()['count'] // limit, unit='Pages') else: @@ -62,6 +65,7 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): data += book_request(user, offset, limit, is_verbose)[2] return data + def get_books_from_ids(ids, is_verbose=False, is_quiet=False): ''' Get all boooks from id @@ -73,32 +77,33 @@ def get_books_from_ids(ids, is_verbose=False, is_quiet=False): data = [] print("Getting list of books...") - + if not is_quiet: - id_iter = tqdm(ids, unit="Pages") + id_iter = tqdm(ids, unit="Pages") else: - id_iter = ids + id_iter = ids for id in id_iter: data.append(book_from_id_request(id, is_verbose)[2]) - + return data + def get_url_book(user, book_id, format='pdf'): ''' Return url of the book to download ''' - + url = BASE_URL + URL_BOOK_ENDPOINT.format(book_id=book_id, format=format) r = requests.get(url, headers=user.get_header()) - if r.status_code == 200: # success + if r.status_code == 200: # success return r.json().get('data', '') - elif r.status_code == 401: # jwt expired - user.refresh_header() # refresh token - get_url_book(user, book_id, format) # call recursive - + elif r.status_code == 401: # jwt expired + user.refresh_header() # refresh token + get_url_book(user, book_id, format) # call recursive + tqdm.write('ERROR (please copy and paste in the issue)') tqdm.write(r.json()) tqdm.write(r.status_code) @@ -113,13 +118,13 @@ def get_book_file_types(user, book_id): url = BASE_URL + URL_BOOK_TYPES_ENDPOINT.format(book_id=book_id) r = requests.get(url, headers=user.get_header()) - if (r.status_code == 200): # success + if (r.status_code == 200): # success return r.json()['data'][0].get('fileTypes', []) - - elif (r.status_code == 401): # jwt expired - user.refresh_header() # refresh token - get_book_file_types(user, book_id, format) # call recursive - + + elif (r.status_code == 401): # jwt expired + user.refresh_header() # refresh token + get_book_file_types(user, book_id, format) # call recursive + tqdm.write('ERROR (please copy and paste in the issue)') tqdm.write(r.json()) tqdm.write(r.status_code) @@ -127,9 +132,9 @@ def get_book_file_types(user, book_id): # TODO: i'd like that this functions be async and download faster -def download_book(filename, url): +def download_file(filename, url): ''' - Download your book + Download file ''' tqdm.write('Starting to download ' + filename) @@ -141,13 +146,27 @@ def download_book(filename, url): else: total = int(total) # TODO: read more about tqdm - for chunk in tqdm(r.iter_content(chunk_size=1024), total=math.ceil(total//1024), unit='KB', unit_scale=True): + for chunk in tqdm( + r.iter_content(chunk_size=1024), + total=math.ceil(total//1024), + unit='KB', + unit_scale=True + ): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() tqdm.write('Finished ' + filename) +def get_book_name(book, file_type): + book_name = book['productName'].replace( + ' ', '_').replace('.', '_').replace(':', '_') + if file_type == 'video' or file_type == 'code': + return book_name, book_name + '.' + file_type + '.zip' + else: + return book_name, book_name + '.' + file_type + + def make_zip(filename): if filename[-4:] == 'code': os.replace(filename, filename[:-4] + 'zip') @@ -166,34 +185,91 @@ def move_current_files(root, book): tqdm.write('Skipping') +def download_book_by_type(user, book, file_type, separate, root_directory, verbose=False): + book_name, book_filename = get_book_name(book, file_type) + if separate: + filename = f'{root_directory}/{book_name}/{book_filename}' + move_current_files(root_directory, book_name) + else: + filename = f'{root_directory}/{book_filename}' + # get url of the book to download + url = get_url_book(user, book['productId'], file_type) + if not os.path.exists(filename): + download_file(filename, url) + else: + if verbose: + tqdm.write(f'{filename} already exists, skipping.') + + +def downlaod_all_books(user, books, book_file_types, separate, root_directory, verbose=False, quiet=False): + tqdm.write('Downloading books...') + if not quiet: + books_iter = tqdm(books, unit='Book') + else: + books_iter = books + for book in books_iter: + # get the different file type of current book + file_types = get_book_file_types(user, book['productId']) + for file_type in file_types: + if file_type in book_file_types: # check if the file type entered is available by the current book + download_book_by_type( + user, book, file_type, separate, root_directory, verbose) + + def does_dir_exist(directory): + # Check if directory not exists if not os.path.exists(directory): try: + # try making dir if not exists os.makedirs(directory) except Exception as e: print(e) sys.exit(2) -def main(argv): - # thanks to https://github.com/ozzieperez/packtpub-library-downloader/blob/master/downloader.py +def get_opts_args(argv): + try: + return getopt.getopt( + argv, + 'e:p:d:b:i:svq', + [ + 'email=', + 'pass=', + 'directory=', + 'books=', + 'ids=', + 'separate', + 'verbose', + 'quiet' + ] + ) + except getopt.GetoptError: + print(error_message) + sys.exit(2) + +def check_arg(email, password, verbose, quiet): + # Is this true? + if verbose and quiet: + print("Verbose and quiet cannot be used together.") + sys.exit(2) + + # do we have the minimum required info? + if not email or not password: + print(error_message) + sys.exit(2) + +def parse_args(argv): email = None password = None - root_directory = 'media' + root_directory = 'media' book_file_types = ['pdf', 'mobi', 'epub', 'code'] separate = None verbose = None quiet = None download_ids = None - errorMessage = 'Usage: main.py -e -p [-d -b -i -s -v -q]' - # get the command line arguments/options - try: - opts, args = getopt.getopt( - argv, 'e:p:d:b:i:svq', ['email=', 'pass=', 'directory=', 'books=', 'ids=', 'separate', 'verbose', 'quiet']) - except getopt.GetoptError: - print(errorMessage) - sys.exit(2) + # get all options from argument + opts, args = get_opts_args(argv) # hold the values of the command line options for opt, arg in opts: @@ -215,15 +291,27 @@ def main(argv): elif opt in ('-i', '--ids'): download_ids = arg.split(',') + check_arg(email, password, verbose, quiet) - if verbose and quiet: - print("Verbose and quiet cannot be used together.") - sys.exit(2) + return email, \ + password, \ + root_directory, \ + book_file_types, \ + separate, verbose, \ + quiet, \ + download_ids - # do we have the minimum required info? - if not email or not password: - print(errorMessage) - sys.exit(2) + +def main(argv): + # thanks to https://github.com/ozzieperez/packtpub-library-downloader/blob/master/downloader.py + email, \ + password, \ + root_directory, \ + book_file_types, \ + separate, \ + verbose, \ + quiet, \ + download_ids = parse_args(argv) # check if not exists dir and create does_dir_exist(root_directory) @@ -233,33 +321,14 @@ def main(argv): # get all your books if (download_ids): - books = get_books_from_ids(download_ids, is_verbose=verbose, is_quiet=quiet) + books = get_books_from_ids( + download_ids, is_verbose=verbose, is_quiet=quiet) else: books = get_books(user, is_verbose=verbose, is_quiet=quiet) - tqdm.write('Downloading books...') - if not quiet: - books_iter = tqdm(books, unit='Book') - else: - books_iter = books - for book in books_iter: - # get the different file type of current book - file_types = get_book_file_types(user, book['productId']) - for file_type in file_types: - if file_type in book_file_types: # check if the file type entered is available by the current book - book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_') - if separate: - filename = f'{root_directory}/{book_name}/{book_name}.{file_type}' - move_current_files(root_directory, book_name) - else: - filename = f'{root_directory}/{book_name}.{file_type}' - # get url of the book to download - url = get_url_book(user, book['productId'], file_type) - if not os.path.exists(filename) and not os.path.exists(filename.replace('.code', '.zip')): - download_book(filename, url) - make_zip(filename) - else: - if verbose: - tqdm.write(f'{filename} already exists, skipping.') + + # downloading all books + downlaod_all_books(user, books, book_file_types, + separate, root_directory, verbose, quiet) if __name__ == '__main__': From dffe04a382bcca896cd77112ec6ca6afe9df121b Mon Sep 17 00:00:00 2001 From: Levi Rizki Saputra Date: Wed, 3 Apr 2019 11:18:25 +0700 Subject: [PATCH 3/3] fix downloading file size progress --- main.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 47e92ea..cdf2075 100755 --- a/main.py +++ b/main.py @@ -145,16 +145,18 @@ def download_file(filename, url): f.write(response.content) else: total = int(total) - # TODO: read more about tqdm - for chunk in tqdm( - r.iter_content(chunk_size=1024), - total=math.ceil(total//1024), + progress = tqdm( + total=math.ceil(total), unit='KB', - unit_scale=True - ): + unit_scale=True, + mininterval=1 + ) + for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() + progress.update(1024) + progress.close() tqdm.write('Finished ' + filename)