mirror of
https://codeberg.org/hyperreal/archive-fandom-wiki
synced 2024-11-25 09:33:41 +01:00
refactor: use rich.status and console.log
This commit is contained in:
parent
eec1bc7211
commit
d66bf2f768
@ -66,7 +66,7 @@ class FandomWiki:
|
|||||||
starting_url = f"{self.canonical_url}{mw_allpages_nav.find_all('a')[1].get('href')}"
|
starting_url = f"{self.canonical_url}{mw_allpages_nav.find_all('a')[1].get('href')}"
|
||||||
|
|
||||||
hop0_urls.append(starting_url)
|
hop0_urls.append(starting_url)
|
||||||
console.print(f"[[bold]HOP 0[/bold]] {starting_url}")
|
console.log(starting_url)
|
||||||
|
|
||||||
return hop0_urls
|
return hop0_urls
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ class FandomWiki:
|
|||||||
) and "Special:" not in item.get("href"):
|
) and "Special:" not in item.get("href"):
|
||||||
new_url = f"{self.breezewiki_url}{item.get('href')}"
|
new_url = f"{self.breezewiki_url}{item.get('href')}"
|
||||||
hop1_urls.append(new_url)
|
hop1_urls.append(new_url)
|
||||||
console.print(f"[[bold]HOP 1[/bold]] {new_url}")
|
console.log(new_url)
|
||||||
|
|
||||||
return hop1_urls
|
return hop1_urls
|
||||||
|
|
||||||
@ -108,7 +108,7 @@ class FandomWiki:
|
|||||||
with open(css_filename, "wb") as outfile:
|
with open(css_filename, "wb") as outfile:
|
||||||
outfile.write(response.content)
|
outfile.write(response.content)
|
||||||
|
|
||||||
console.print(f"[[bold green]CSS[/bold green]] {css_filename}")
|
console.log(css_filename)
|
||||||
|
|
||||||
def save_img(self, img_url: str):
|
def save_img(self, img_url: str):
|
||||||
filename = self.images_dir.joinpath(Path(img_url.split("/revision")[0]).name)
|
filename = self.images_dir.joinpath(Path(img_url.split("/revision")[0]).name)
|
||||||
@ -120,9 +120,7 @@ class FandomWiki:
|
|||||||
for chunk in response.iter_content(chunk_size=8192):
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
outfile.write(chunk)
|
outfile.write(chunk)
|
||||||
|
|
||||||
console.print(f"[[bold green]IMG[/bold green]] {filename}")
|
console.log(filename)
|
||||||
else:
|
|
||||||
console.print(f"[[bold yellow]IMG (EXISTS)[/bold yellow]] {filename}")
|
|
||||||
|
|
||||||
def fetch_all_images(self, page_url: str):
|
def fetch_all_images(self, page_url: str):
|
||||||
response = requests.get(page_url)
|
response = requests.get(page_url)
|
||||||
@ -175,13 +173,9 @@ class FandomWiki:
|
|||||||
with open(filename, "w") as outfile:
|
with open(filename, "w") as outfile:
|
||||||
outfile.write(soup.prettify())
|
outfile.write(soup.prettify())
|
||||||
|
|
||||||
console.print(f"[[bold green]HTML[/bold green]] {filename}")
|
console.log(filename)
|
||||||
else:
|
|
||||||
console.print(f"[[bold yellow]HTML (EXISTS)[/bold yellow]] {filename}")
|
|
||||||
|
|
||||||
def fetch_all_pages(self, hop1_urls: list):
|
def fetch_all_pages(self, hop1_urls: list):
|
||||||
self.save_css()
|
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
executor.map(self.save_page, hop1_urls)
|
executor.map(self.save_page, hop1_urls)
|
||||||
|
|
||||||
@ -236,13 +230,25 @@ class FandomWiki:
|
|||||||
|
|
||||||
shutil.rmtree(self.site_dir, ignore_errors=True)
|
shutil.rmtree(self.site_dir, ignore_errors=True)
|
||||||
|
|
||||||
console.print(f"\nTotal web files scraped: {len(web_files)}")
|
console.log(f"\nTotal web files scraped: {len(web_files)}")
|
||||||
console.print(f"Total images scraped: {len(img_files)}")
|
console.log(f"Total images scraped: {len(img_files)}")
|
||||||
|
|
||||||
|
|
||||||
def archive_site(name: str):
|
def archive_site(name: str):
|
||||||
site = FandomWiki(name)
|
site = FandomWiki(name)
|
||||||
site.fetch_all_pages(site.get_hop1_urls(site.get_hop0_urls()))
|
|
||||||
|
with console.status("Fetching hop 0 URLs...", spinner="aesthetic"):
|
||||||
|
hop0_urls = site.get_hop0_urls()
|
||||||
|
|
||||||
|
with console.status("Fetching hop 1 URLs...", spinner="aesthetic"):
|
||||||
|
hop1_urls = site.get_hop1_urls(hop0_urls)
|
||||||
|
|
||||||
|
with console.status("Saving CSS files...", spinner="aesthetic"):
|
||||||
|
site.save_css()
|
||||||
|
|
||||||
|
with console.status("Downloading images and web pages...", spinner="aesthetic"):
|
||||||
|
site.fetch_all_pages(hop1_urls)
|
||||||
|
|
||||||
site.archive()
|
site.archive()
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "archive-fandom-wiki"
|
name = "archive-fandom-wiki"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
description = "Archive fandom wikis"
|
description = "Archive fandom wikis"
|
||||||
authors = ["Jeffrey Serio <hyperreal@fedoraproject.org>"]
|
authors = ["Jeffrey Serio <hyperreal@fedoraproject.org>"]
|
||||||
license = "GPL-3.0"
|
license = "GPL-3.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user