Added a settings file

Added a settings file to change the download path, amount of threads, priority of the downloads and amount of retries before giving an error
2024-01-17 13:00:02 +01:00 · 2024-01-17 13:00:02 +01:00 · f13212d408
commit f13212d408
parent 8c3aadf845
2 changed files with 50 additions and 24 deletions
--- a/Downloader.py
+++ b/Downloader.py
@ -7,8 +7,12 @@ from concurrent.futures import ThreadPoolExecutor
 from tqdm import tqdm
 import aiohttp
 import asyncio
 import configparser
-MAX_RETRIES = 2
+config = configparser.ConfigParser()
 config.read('settings.ini')
 MAX_RETRIES = int(config['Retries'].get('max_retries', fallback=2))
 url = input("Enter the URL: ")
@ -37,7 +41,6 @@ failed_urls = []
 # Lock to prevent concurrent printing of error messages
 print_lock = asyncio.Lock()
 # Function to fetch HTML content asynchronously
 async def async_get_html_content(url):
    headers = {
@ -52,9 +55,8 @@ async def async_get_html_content(url):
    except aiohttp.ClientError as e:
        return None
 # Function to find and save FLAC, MP3, or M4A URLs and album name
-def find_audio_urls_and_album_name(html_content):
+def find_audio_urls_and_album_name(html_content, priority_order):
    audio_urls = []
    album_name = None
@ -63,14 +65,15 @@ def find_audio_urls_and_album_name(html_content):
        # Find all links in the page
        links = soup.find_all("a", href=True)
-        for link in links:
+
-            href = link.get("href")
+        for file_type in priority_order:
-            if href.endswith(".flac"):
+            for link in links:
-                audio_url = href
+                href = link.get("href")
-                audio_urls = [audio_url]
+                if href.endswith(f".{file_type}"):
-                break
+                    audio_urls.append(href)
-            elif href.endswith(".mp3") or href.endswith(".m4a"):
+
-                audio_urls.append(href)
+            if audio_urls:
                break  # If any URLs are found for the current file type, break the loop
        # Extract album name
        album_name_element = soup.select_one("#pageContent > p:nth-child(6) > b:nth-child(1)")
@ -79,7 +82,6 @@ def find_audio_urls_and_album_name(html_content):
    return audio_urls, album_name
 # Function to download a file asynchronously with retry
 async def async_download_audio_file(session, url, directory, total_progress):
    retries = 0
@ -106,16 +108,22 @@ async def async_download_audio_file(session, url, directory, total_progress):
                    failed_urls.append(url)
                break  # Break the loop if max retries reached
 # Function to process a single URL asynchronously
-async def async_process_url(session, url, total_progress):
+async def async_process_url(session, url, total_progress, priority_order, download_path):
    html_content = await async_get_html_content(url)
-    audio_urls, album_name = find_audio_urls_and_album_name(html_content)
+    audio_urls, album_name = find_audio_urls_and_album_name(html_content, priority_order)
    if audio_urls and album_name:
        illegal_characters = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
        sanitized_album_name = "".join(c if c.isalnum() or c not in illegal_characters else ' ' for c in album_name)
-        album_directory = os.path.join('Audio files', sanitized_album_name)
+
        # Use custom download path if provided, otherwise default to 'Audio files'
        if download_path:
            album_directory = os.path.join(download_path, sanitized_album_name)
        else:
            script_directory = os.path.dirname(os.path.realpath(__file__))
            album_directory = os.path.join(script_directory, 'Audio files', sanitized_album_name)
        os.makedirs(album_directory, exist_ok=True)
        for audio_url in audio_urls:
@ -131,12 +139,19 @@ def get_cpu_threads():
    return num_threads
 async def main():
    cpu_threads = get_cpu_threads()
    max_workers = int(config['Threads'].get('max_workers', fallback=cpu_threads))
    if max_workers == 0:
        print(f"Using all available CPU threads: {cpu_threads}")
        max_workers = None  # Set to None for ThreadPoolExecutor to use all available threads
    else:
        print(f"Number of CPU threads: {cpu_threads}")
        print(f"Max workers for ThreadPoolExecutor: {max_workers}")
    async with aiohttp.ClientSession() as session:
-        with ThreadPoolExecutor(max_workers=cpu_threads) as executor:
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            total_items = len(urls)
            total_progress = tqdm(total=total_items, desc="Total Progress", position=0)
@ -144,21 +159,19 @@ async def main():
            loop = asyncio.get_event_loop()
            for url in urls:
-                # Corrected: use loop.create_task() to ensure the coroutine is awaited properly
+                future = loop.create_task(async_process_url(session, url, total_progress, priority_order, download_path))
                future = loop.create_task(async_process_url(session, url, total_progress))
                futures.append(future)
            # Await all the futures
            await asyncio.gather(*futures)
            total_progress.close()
    # Display error messages for failed URLs after the download is complete
    if failed_urls:
        print("\nThe following files encountered errors during download:")
        for failed_url in failed_urls:
            print(f"- {failed_url}")
 if __name__ == "__main__":
    priority_order = config.get('Download', 'priority_order', fallback='mp3, flac, m4a').split(', ')
    download_path = config.get('Download', 'download_path', fallback='')
    asyncio.run(main())
--- a/settings.ini
+++ b/settings.ini
@ -0,0 +1,13 @@
 [Retries] ; Amount of retries per file before the script gives an error 
 max_retries = 2
 [Threads] ; Setting this value to 0, every thread on the cpu will be used
 max_workers = 1
 [Download]
 priority_order = flac, mp3, m4a
 ; C:\example\for\windows
 ; /example/for/linux
 ; If empty, the downloads will be inside a folder called-
 ; "Audio files" in the same location as the script
 download_path =