Added a settings file
Added a settings file to change the download path, amount of threads, priority of the downloads and amount of retries before giving an error
This commit is contained in:
parent
8c3aadf845
commit
f13212d408
2 changed files with 50 additions and 24 deletions
|
@ -7,8 +7,12 @@ from concurrent.futures import ThreadPoolExecutor
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import configparser
|
||||||
|
|
||||||
MAX_RETRIES = 2
|
config = configparser.ConfigParser()
|
||||||
|
config.read('settings.ini')
|
||||||
|
|
||||||
|
MAX_RETRIES = int(config['Retries'].get('max_retries', fallback=2))
|
||||||
|
|
||||||
url = input("Enter the URL: ")
|
url = input("Enter the URL: ")
|
||||||
|
|
||||||
|
@ -37,7 +41,6 @@ failed_urls = []
|
||||||
# Lock to prevent concurrent printing of error messages
|
# Lock to prevent concurrent printing of error messages
|
||||||
print_lock = asyncio.Lock()
|
print_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
# Function to fetch HTML content asynchronously
|
# Function to fetch HTML content asynchronously
|
||||||
async def async_get_html_content(url):
|
async def async_get_html_content(url):
|
||||||
headers = {
|
headers = {
|
||||||
|
@ -52,9 +55,8 @@ async def async_get_html_content(url):
|
||||||
except aiohttp.ClientError as e:
|
except aiohttp.ClientError as e:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Function to find and save FLAC, MP3, or M4A URLs and album name
|
# Function to find and save FLAC, MP3, or M4A URLs and album name
|
||||||
def find_audio_urls_and_album_name(html_content):
|
def find_audio_urls_and_album_name(html_content, priority_order):
|
||||||
audio_urls = []
|
audio_urls = []
|
||||||
album_name = None
|
album_name = None
|
||||||
|
|
||||||
|
@ -63,14 +65,15 @@ def find_audio_urls_and_album_name(html_content):
|
||||||
|
|
||||||
# Find all links in the page
|
# Find all links in the page
|
||||||
links = soup.find_all("a", href=True)
|
links = soup.find_all("a", href=True)
|
||||||
for link in links:
|
|
||||||
href = link.get("href")
|
for file_type in priority_order:
|
||||||
if href.endswith(".flac"):
|
for link in links:
|
||||||
audio_url = href
|
href = link.get("href")
|
||||||
audio_urls = [audio_url]
|
if href.endswith(f".{file_type}"):
|
||||||
break
|
audio_urls.append(href)
|
||||||
elif href.endswith(".mp3") or href.endswith(".m4a"):
|
|
||||||
audio_urls.append(href)
|
if audio_urls:
|
||||||
|
break # If any URLs are found for the current file type, break the loop
|
||||||
|
|
||||||
# Extract album name
|
# Extract album name
|
||||||
album_name_element = soup.select_one("#pageContent > p:nth-child(6) > b:nth-child(1)")
|
album_name_element = soup.select_one("#pageContent > p:nth-child(6) > b:nth-child(1)")
|
||||||
|
@ -79,7 +82,6 @@ def find_audio_urls_and_album_name(html_content):
|
||||||
|
|
||||||
return audio_urls, album_name
|
return audio_urls, album_name
|
||||||
|
|
||||||
|
|
||||||
# Function to download a file asynchronously with retry
|
# Function to download a file asynchronously with retry
|
||||||
async def async_download_audio_file(session, url, directory, total_progress):
|
async def async_download_audio_file(session, url, directory, total_progress):
|
||||||
retries = 0
|
retries = 0
|
||||||
|
@ -106,16 +108,22 @@ async def async_download_audio_file(session, url, directory, total_progress):
|
||||||
failed_urls.append(url)
|
failed_urls.append(url)
|
||||||
break # Break the loop if max retries reached
|
break # Break the loop if max retries reached
|
||||||
|
|
||||||
|
|
||||||
# Function to process a single URL asynchronously
|
# Function to process a single URL asynchronously
|
||||||
async def async_process_url(session, url, total_progress):
|
async def async_process_url(session, url, total_progress, priority_order, download_path):
|
||||||
html_content = await async_get_html_content(url)
|
html_content = await async_get_html_content(url)
|
||||||
audio_urls, album_name = find_audio_urls_and_album_name(html_content)
|
audio_urls, album_name = find_audio_urls_and_album_name(html_content, priority_order)
|
||||||
|
|
||||||
if audio_urls and album_name:
|
if audio_urls and album_name:
|
||||||
illegal_characters = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
|
illegal_characters = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
|
||||||
sanitized_album_name = "".join(c if c.isalnum() or c not in illegal_characters else ' ' for c in album_name)
|
sanitized_album_name = "".join(c if c.isalnum() or c not in illegal_characters else ' ' for c in album_name)
|
||||||
album_directory = os.path.join('Audio files', sanitized_album_name)
|
|
||||||
|
# Use custom download path if provided, otherwise default to 'Audio files'
|
||||||
|
if download_path:
|
||||||
|
album_directory = os.path.join(download_path, sanitized_album_name)
|
||||||
|
else:
|
||||||
|
script_directory = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
album_directory = os.path.join(script_directory, 'Audio files', sanitized_album_name)
|
||||||
|
|
||||||
os.makedirs(album_directory, exist_ok=True)
|
os.makedirs(album_directory, exist_ok=True)
|
||||||
|
|
||||||
for audio_url in audio_urls:
|
for audio_url in audio_urls:
|
||||||
|
@ -131,12 +139,19 @@ def get_cpu_threads():
|
||||||
|
|
||||||
return num_threads
|
return num_threads
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
cpu_threads = get_cpu_threads()
|
cpu_threads = get_cpu_threads()
|
||||||
|
max_workers = int(config['Threads'].get('max_workers', fallback=cpu_threads))
|
||||||
|
|
||||||
|
if max_workers == 0:
|
||||||
|
print(f"Using all available CPU threads: {cpu_threads}")
|
||||||
|
max_workers = None # Set to None for ThreadPoolExecutor to use all available threads
|
||||||
|
else:
|
||||||
|
print(f"Number of CPU threads: {cpu_threads}")
|
||||||
|
print(f"Max workers for ThreadPoolExecutor: {max_workers}")
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
with ThreadPoolExecutor(max_workers=cpu_threads) as executor:
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
total_items = len(urls)
|
total_items = len(urls)
|
||||||
total_progress = tqdm(total=total_items, desc="Total Progress", position=0)
|
total_progress = tqdm(total=total_items, desc="Total Progress", position=0)
|
||||||
|
|
||||||
|
@ -144,21 +159,19 @@ async def main():
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
# Corrected: use loop.create_task() to ensure the coroutine is awaited properly
|
future = loop.create_task(async_process_url(session, url, total_progress, priority_order, download_path))
|
||||||
future = loop.create_task(async_process_url(session, url, total_progress))
|
|
||||||
futures.append(future)
|
futures.append(future)
|
||||||
|
|
||||||
# Await all the futures
|
|
||||||
await asyncio.gather(*futures)
|
await asyncio.gather(*futures)
|
||||||
|
|
||||||
total_progress.close()
|
total_progress.close()
|
||||||
|
|
||||||
# Display error messages for failed URLs after the download is complete
|
|
||||||
if failed_urls:
|
if failed_urls:
|
||||||
print("\nThe following files encountered errors during download:")
|
print("\nThe following files encountered errors during download:")
|
||||||
for failed_url in failed_urls:
|
for failed_url in failed_urls:
|
||||||
print(f"- {failed_url}")
|
print(f"- {failed_url}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
priority_order = config.get('Download', 'priority_order', fallback='mp3, flac, m4a').split(', ')
|
||||||
|
download_path = config.get('Download', 'download_path', fallback='')
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
|
|
13
settings.ini
Normal file
13
settings.ini
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
[Retries] ; Amount of retries per file before the script gives an error
|
||||||
|
max_retries = 2
|
||||||
|
|
||||||
|
[Threads] ; Setting this value to 0, every thread on the cpu will be used
|
||||||
|
max_workers = 1
|
||||||
|
|
||||||
|
[Download]
|
||||||
|
priority_order = flac, mp3, m4a
|
||||||
|
; C:\example\for\windows
|
||||||
|
; /example/for/linux
|
||||||
|
; If empty, the downloads will be inside a folder called-
|
||||||
|
; "Audio files" in the same location as the script
|
||||||
|
download_path =
|
Loading…
Reference in a new issue