From d66bf2f768df1eb25aa94d81d0bc8b2b57f61bb4 Mon Sep 17 00:00:00 2001 From: Jeffrey Serio <23226432+hyperreal64@users.noreply.github.com> Date: Sat, 16 Sep 2023 15:41:09 -0500 Subject: [PATCH] refactor: use rich.status and console.log --- archive-fandom-wiki | 34 ++++++++++++++++++++-------------- pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/archive-fandom-wiki b/archive-fandom-wiki index e219560..26bafa1 100755 --- a/archive-fandom-wiki +++ b/archive-fandom-wiki @@ -66,7 +66,7 @@ class FandomWiki: starting_url = f"{self.canonical_url}{mw_allpages_nav.find_all('a')[1].get('href')}" hop0_urls.append(starting_url) - console.print(f"[[bold]HOP 0[/bold]] {starting_url}") + console.log(starting_url) return hop0_urls @@ -85,7 +85,7 @@ class FandomWiki: ) and "Special:" not in item.get("href"): new_url = f"{self.breezewiki_url}{item.get('href')}" hop1_urls.append(new_url) - console.print(f"[[bold]HOP 1[/bold]] {new_url}") + console.log(new_url) return hop1_urls @@ -108,7 +108,7 @@ class FandomWiki: with open(css_filename, "wb") as outfile: outfile.write(response.content) - console.print(f"[[bold green]CSS[/bold green]] {css_filename}") + console.log(css_filename) def save_img(self, img_url: str): filename = self.images_dir.joinpath(Path(img_url.split("/revision")[0]).name) @@ -120,9 +120,7 @@ class FandomWiki: for chunk in response.iter_content(chunk_size=8192): outfile.write(chunk) - console.print(f"[[bold green]IMG[/bold green]] {filename}") - else: - console.print(f"[[bold yellow]IMG (EXISTS)[/bold yellow]] {filename}") + console.log(filename) def fetch_all_images(self, page_url: str): response = requests.get(page_url) @@ -175,13 +173,9 @@ class FandomWiki: with open(filename, "w") as outfile: outfile.write(soup.prettify()) - console.print(f"[[bold green]HTML[/bold green]] {filename}") - else: - console.print(f"[[bold yellow]HTML (EXISTS)[/bold yellow]] {filename}") + console.log(filename) def fetch_all_pages(self, hop1_urls: list): - self.save_css() - with concurrent.futures.ThreadPoolExecutor() as executor: executor.map(self.save_page, hop1_urls) @@ -236,13 +230,25 @@ class FandomWiki: shutil.rmtree(self.site_dir, ignore_errors=True) - console.print(f"\nTotal web files scraped: {len(web_files)}") - console.print(f"Total images scraped: {len(img_files)}") + console.log(f"\nTotal web files scraped: {len(web_files)}") + console.log(f"Total images scraped: {len(img_files)}") def archive_site(name: str): site = FandomWiki(name) - site.fetch_all_pages(site.get_hop1_urls(site.get_hop0_urls())) + + with console.status("Fetching hop 0 URLs...", spinner="aesthetic"): + hop0_urls = site.get_hop0_urls() + + with console.status("Fetching hop 1 URLs...", spinner="aesthetic"): + hop1_urls = site.get_hop1_urls(hop0_urls) + + with console.status("Saving CSS files...", spinner="aesthetic"): + site.save_css() + + with console.status("Downloading images and web pages...", spinner="aesthetic"): + site.fetch_all_pages(hop1_urls) + site.archive() diff --git a/pyproject.toml b/pyproject.toml index a932e19..97f7d23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archive-fandom-wiki" -version = "0.1.1" +version = "0.1.2" description = "Archive fandom wikis" authors = ["Jeffrey Serio "] license = "GPL-3.0"