diff --git a/Downloader.py b/Downloader.py index adfe7f3..51ecbcf 100644 --- a/Downloader.py +++ b/Downloader.py @@ -7,8 +7,12 @@ from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm import aiohttp import asyncio +import configparser -MAX_RETRIES = 2 +config = configparser.ConfigParser() +config.read('settings.ini') + +MAX_RETRIES = int(config['Retries'].get('max_retries', fallback=2)) url = input("Enter the URL: ") @@ -37,7 +41,6 @@ failed_urls = [] # Lock to prevent concurrent printing of error messages print_lock = asyncio.Lock() - # Function to fetch HTML content asynchronously async def async_get_html_content(url): headers = { @@ -52,9 +55,8 @@ async def async_get_html_content(url): except aiohttp.ClientError as e: return None - # Function to find and save FLAC, MP3, or M4A URLs and album name -def find_audio_urls_and_album_name(html_content): +def find_audio_urls_and_album_name(html_content, priority_order): audio_urls = [] album_name = None @@ -63,14 +65,15 @@ def find_audio_urls_and_album_name(html_content): # Find all links in the page links = soup.find_all("a", href=True) - for link in links: - href = link.get("href") - if href.endswith(".flac"): - audio_url = href - audio_urls = [audio_url] - break - elif href.endswith(".mp3") or href.endswith(".m4a"): - audio_urls.append(href) + + for file_type in priority_order: + for link in links: + href = link.get("href") + if href.endswith(f".{file_type}"): + audio_urls.append(href) + + if audio_urls: + break # If any URLs are found for the current file type, break the loop # Extract album name album_name_element = soup.select_one("#pageContent > p:nth-child(6) > b:nth-child(1)") @@ -79,7 +82,6 @@ def find_audio_urls_and_album_name(html_content): return audio_urls, album_name - # Function to download a file asynchronously with retry async def async_download_audio_file(session, url, directory, total_progress): retries = 0 @@ -106,16 +108,22 @@ async def async_download_audio_file(session, url, directory, total_progress): failed_urls.append(url) break # Break the loop if max retries reached - # Function to process a single URL asynchronously -async def async_process_url(session, url, total_progress): +async def async_process_url(session, url, total_progress, priority_order, download_path): html_content = await async_get_html_content(url) - audio_urls, album_name = find_audio_urls_and_album_name(html_content) + audio_urls, album_name = find_audio_urls_and_album_name(html_content, priority_order) if audio_urls and album_name: illegal_characters = ['<', '>', ':', '"', '/', '\\', '|', '?', '*'] sanitized_album_name = "".join(c if c.isalnum() or c not in illegal_characters else ' ' for c in album_name) - album_directory = os.path.join('Audio files', sanitized_album_name) + + # Use custom download path if provided, otherwise default to 'Audio files' + if download_path: + album_directory = os.path.join(download_path, sanitized_album_name) + else: + script_directory = os.path.dirname(os.path.realpath(__file__)) + album_directory = os.path.join(script_directory, 'Audio files', sanitized_album_name) + os.makedirs(album_directory, exist_ok=True) for audio_url in audio_urls: @@ -131,12 +139,19 @@ def get_cpu_threads(): return num_threads - async def main(): cpu_threads = get_cpu_threads() + max_workers = int(config['Threads'].get('max_workers', fallback=cpu_threads)) + + if max_workers == 0: + print(f"Using all available CPU threads: {cpu_threads}") + max_workers = None # Set to None for ThreadPoolExecutor to use all available threads + else: + print(f"Number of CPU threads: {cpu_threads}") + print(f"Max workers for ThreadPoolExecutor: {max_workers}") async with aiohttp.ClientSession() as session: - with ThreadPoolExecutor(max_workers=cpu_threads) as executor: + with ThreadPoolExecutor(max_workers=max_workers) as executor: total_items = len(urls) total_progress = tqdm(total=total_items, desc="Total Progress", position=0) @@ -144,21 +159,19 @@ async def main(): loop = asyncio.get_event_loop() for url in urls: - # Corrected: use loop.create_task() to ensure the coroutine is awaited properly - future = loop.create_task(async_process_url(session, url, total_progress)) + future = loop.create_task(async_process_url(session, url, total_progress, priority_order, download_path)) futures.append(future) - # Await all the futures await asyncio.gather(*futures) total_progress.close() - # Display error messages for failed URLs after the download is complete if failed_urls: print("\nThe following files encountered errors during download:") for failed_url in failed_urls: print(f"- {failed_url}") - if __name__ == "__main__": + priority_order = config.get('Download', 'priority_order', fallback='mp3, flac, m4a').split(', ') + download_path = config.get('Download', 'download_path', fallback='') asyncio.run(main()) diff --git a/settings.ini b/settings.ini new file mode 100644 index 0000000..5d9f24f --- /dev/null +++ b/settings.ini @@ -0,0 +1,13 @@ +[Retries] ; Amount of retries per file before the script gives an error +max_retries = 2 + +[Threads] ; Setting this value to 0, every thread on the cpu will be used +max_workers = 1 + +[Download] +priority_order = flac, mp3, m4a +; C:\example\for\windows +; /example/for/linux +; If empty, the downloads will be inside a folder called- +; "Audio files" in the same location as the script +download_path = \ No newline at end of file