昨天,我们学习了这篇2025 年1月22号发表在Nature杂志上的文献,标题为《Tissue-resident memory CD8 T cell diversity is spatiotemporally imprinted》,里面给了代码和处理好的数据,但是数据呢,那是相当大啊,而且还有很多文件。如下,下载可能是个难题!
import os import requests import gzip import shutil from tqdm import tqdm defdownload_file(file: str, base_url: str, force: bool = False): """Downloads a file from a specified base URL. Args: file (str): Name of the file to download. base_url (str): Base URL where the file is located. force (bool, optional): If True, re-downloads the file even if it exists locally. Defaults to False. Raises: requests.exceptions.RequestException: If there is an error during download. (OSError, ValueError): If there is an error during extraction. """ print(f"Checking file {file}") # Ensure base_url ends with a slash ifnot base_url.endswith("/"): base_url += "/" full_url = f"{base_url}{file}" download_path = f"{file}" # Check if file exists locally and compare sizes local_file_exists = os.path.exists(download_path) should_download = force if local_file_exists andnot force: try: # Get the size of the online file response = requests.head(full_url) response.raise_for_status() online_size = int(response.headers.get("content-length", 0)) # Get the size of the local file local_size = os.path.getsize(download_path) if online_size != local_size: print( f" Local file size ({local_size} bytes) differs from online file size ({online_size} bytes)." ) should_download = True else: print(f" File {file} is already downloaded and has the correct size.") return except requests.exceptions.RequestException as e: print(f" Error checking online file: {e}") return else: should_download = True if should_download: if local_file_exists: os.remove(download_path) print(f" Removing existing file: {download_path}") print(f" Downloading file: {file}") else: print(f" File {file} is already downloaded and up to date.") return # Create the destination folder if it doesn't exist os.makedirs(os.path.dirname(download_path), exist_ok=True) try: response = requests.get(full_url, stream=True) response.raise_for_status() # Raise an exception for error status codes total_size = int(response.headers.get("content-length", 0)) with open(download_path, "wb") as f, tqdm( desc=file, total=total_size, unit="iB", unit_scale=True, unit_divisor=1024
, ) as progress_bar: for chunk in response.iter_content(chunk_size=8192): size = f.write(chunk) progress_bar.update(size) except requests.exceptions.RequestException as e: print(f" Error downloading file: {e}") except (OSError, ValueError) as e: print(f" Error extracting file: {e}")