import os import requests import multiprocessing from bs4 import BeautifulSoup from urllib.parse import unquote from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm url = input("Enter the URL: ") # Fetch HTML content from the specified URL response = requests.get(url) html_content = response.text # Parse the HTML content soup = BeautifulSoup(html_content, 'html.parser') # Find all elements with class 'playlistDownloadSong' elements = soup.find_all(class_='playlistDownloadSong') # Store URLs in a list urls = [] for index, element in enumerate(elements): link = element.find('a') if link: url = link.get('href') full_url = f'https://downloads.khinsider.com{url}' urls.append(full_url) # Function to fetch and parse HTML content def get_html_content(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } try: response = requests.get(url, headers=headers) response.raise_for_status() return response.text except requests.exceptions.RequestException as e: print(f"Error fetching {url}: {e}") return None # Function to find and save MP3 URLs and album name def find_mp3_urls_and_album_name(html_content): mp3_urls = [] album_name = None if html_content: soup = BeautifulSoup(html_content, "html.parser") # Find all links in the page links = soup.find_all("a", href=True) for link in links: href = link.get("href") if href.endswith(".mp3"): mp3_url = href mp3_urls.append(mp3_url) # Extract album name album_name_element = soup.select_one("#pageContent > p:nth-child(6) > b:nth-child(1)") if album_name_element: album_name = album_name_element.text.strip() return mp3_urls, album_name # Function to download a file def download_file(url, directory, total_progress): try: response = requests.get(url, stream=True) response.raise_for_status() # Unquote the filename to convert %20 back to spaces filename = unquote(os.path.join(directory, os.path.basename(url))) with open(filename, 'wb') as file: for data in response.iter_content(chunk_size=1024): file.write(data) total_progress.update(1) # Update the total progress by 1 for each file downloaded # print(f"Downloaded: {filename}") except requests.exceptions.RequestException as e: print(f"Error downloading {url}: {e}") # Function to process a single URL def process_url(url, total_progress): # print(f"Scraping {url} for MP3 files...") html_content = get_html_content(url) mp3_urls, album_name = find_mp3_urls_and_album_name(html_content) if mp3_urls and album_name: # Sanitize album name for creating a directory sanitized_album_name = "".join(c if c.isalnum() or c in [' ', '-', '_'] else '' for c in album_name) album_directory = os.path.join('MP3 files', sanitized_album_name) os.makedirs(album_directory, exist_ok=True) # print(f"MP3 files found for album '{album_name}':") for mp3_url in mp3_urls: download_file(mp3_url, album_directory, total_progress) else: # No need to print an error message here pass def get_cpu_threads(): try: # For Linux/Unix/MacOS num_threads = os.cpu_count() or 1 except NotImplementedError: # For Windows num_threads = multiprocessing.cpu_count() or 1 return num_threads if __name__ == "__main__": cpu_threads = get_cpu_threads() # Use ThreadPoolExecutor to run the process_url function concurrently with ThreadPoolExecutor(max_workers=cpu_threads) as executor: total_items = len(urls) total_progress = tqdm(total=total_items, desc="Total Progress", position=0) futures = [] for url in urls: future = executor.submit(process_url, url, total_progress) futures.append(future) # Wait for all futures to complete for future in futures: future.result() total_progress.close() # Display the final message based on the download results downloaded_files = total_progress.n error_message = None if downloaded_files == 0: error_message = "Album name missing from site." elif downloaded_files < total_items: error_message = f"{total_items - downloaded_files} files not downloaded. Missing MP3 files." if error_message: print(error_message)