diff --git a/download-model.py b/download-model.py index e2a951cb..993792e9 100644 --- a/download-model.py +++ b/download-model.py @@ -77,7 +77,6 @@ class ModelDownloader: if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None: self.s.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS')) - def sanitize_model_and_branch_names(self, model, branch): if model[-1] == '/': model = model[:-1] @@ -92,7 +91,6 @@ class ModelDownloader: return model, branch - def get_download_links_from_huggingface(self, model, branch, text_only=False): base = "https://huggingface.co" page = f"/api/models/{model}/tree/{branch}" @@ -163,7 +161,6 @@ class ModelDownloader: return links, sha256, is_lora - def get_output_folder(self, model, branch, is_lora, base_folder=None): if base_folder is None: base_folder = 'models' if not is_lora else 'loras' @@ -174,10 +171,11 @@ class ModelDownloader: output_folder = Path(base_folder) / output_folder return output_folder - def get_single_file(self, url, output_folder, start_from_scratch=False): filename = Path(url.rsplit('/', 1)[1]) output_path = output_folder / filename + headers = {} + mode = 'wb' if output_path.exists() and not start_from_scratch: # Check if the file has already been downloaded completely r = self.s.get(url, stream=True, timeout=20) @@ -187,50 +185,45 @@ class ModelDownloader: # Otherwise, resume the download from where it left off headers = {'Range': f'bytes={output_path.stat().st_size}-'} mode = 'ab' - else: - headers = {} - mode = 'wb' - r = self.s.get(url, stream=True, headers=headers, timeout=20) - with open(output_path, mode) as f: + with self.s.get(url, stream=True, headers=headers, timeout=20) as r: + r.raise_for_status() # Do not continue the download if the request was unsuccessful total_size = int(r.headers.get('content-length', 0)) - # Every 4MB we report an update - block_size = 4*1024*1024 - - with tqdm.tqdm(total=total_size, unit='iB', unit_scale=True, bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}') as t: - count = 0 - for data in r.iter_content(block_size): - t.update(len(data)) - f.write(data) - if self.progress_bar is not None: - count += len(data) - self.progress_bar(float(count)/float(total_size), f"Downloading {filename}") - + block_size = 1024 * 1024 # 1MB + with open(output_path, mode) as f: + with tqdm.tqdm(total=total_size, + unit='iB', + unit_scale=True, + bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}' + ) as t: + count = 0 + for data in r.iter_content(block_size): + t.update(len(data)) + f.write(data) + if self.progress_bar is not None: + count += len(data) + self.progress_bar(float(count) / float(total_size), f"Downloading {filename}") def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=1): thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True) - - def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar = None, start_from_scratch=False, threads=1): + def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=1): self.progress_bar = progress_bar # Creating the folder and writing the metadata - if not output_folder.exists(): - output_folder.mkdir(parents=True, exist_ok=True) - with open(output_folder / 'huggingface-metadata.txt', 'w') as f: - f.write(f'url: https://huggingface.co/{model}\n') - f.write(f'branch: {branch}\n') - f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n') - sha256_str = '' - for i in range(len(sha256)): - sha256_str += f' {sha256[i][1]} {sha256[i][0]}\n' - if sha256_str != '': - f.write(f'sha256sum:\n{sha256_str}') + output_folder.mkdir(parents=True, exist_ok=True) + metadata = f'url: https://huggingface.co/{model}\n' \ + f'branch: {branch}\n' \ + f'download date: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n' + sha256_str = '\n'.join([f' {item[1]} {item[0]}' for item in sha256]) + if sha256_str: + metadata += f'sha256sum:\n{sha256_str}' + metadata += '\n' + (output_folder / 'huggingface-metadata.txt').write_text(metadata) # Downloading the files print(f"Downloading the model to {output_folder}") self.start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads) - def check_model_files(self, model, branch, links, sha256, output_folder): # Validate the checksums validated = True