Fix various bugs

This commit is contained in:
Jeffrey Serio 2024-07-16 01:59:21 -05:00
parent 1a06ff4632
commit 310219bf35

26
afw
View File

@ -42,7 +42,7 @@ class FandomWiki:
self.name = name
self.canonical_url = f"https://{name}.fandom.com"
self.breezewiki_url = breezewiki_url
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
self.site_dir = Path(f"{name}.fandom.com")
self.images_dir = self.site_dir.joinpath("images")
try:
@ -100,7 +100,7 @@ class FandomWiki:
if "Local_Sitemap" not in item.get(
"href"
) and "Special:" not in item.get("href"):
new_url = f"{self.breezewiki_url}{item.get('href')}"
new_url = f"{self.breezewiki_url}/{self.name}{item.get('href')}"
hop1_urls.append(new_url)
console.log(new_url)
@ -199,24 +199,15 @@ class FandomWiki:
def archive(self):
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
img_files = [
f
for f in self.images_dir.iterdir()
if self.images_dir.joinpath(f).is_file()
]
img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz"
num_of_imgs = sum(1 for img in self.images_dir.iterdir() if img.is_file())
with Progress() as progress:
task = progress.add_task("[cyan]Archiving images...", total=len(img_files))
task = progress.add_task("[cyan]Archiving images...", total=num_of_imgs)
with tarfile.open(img_archive_filename, "w:xz") as tar:
for img_file in img_files:
if progress.finished:
break
full_file_path = self.images_dir.joinpath(img_file)
tar.add(full_file_path, arcname=img_file)
progress.update(task, advance=1)
tar.add(self.images_dir)
progress.update(task, advance=1)
progress.stop()
@ -239,8 +230,7 @@ class FandomWiki:
for web_file in web_files:
if progress.finished:
break
full_file_path = self.site_dir.joinpath(web_file)
tar.add(full_file_path, arcname=web_file)
tar.add(web_file, arcname=web_file)
progress.update(task, advance=1)
progress.stop()
@ -248,7 +238,7 @@ class FandomWiki:
shutil.rmtree(self.site_dir, ignore_errors=True)
console.log(f"\nTotal web files scraped: {len(web_files)}")
console.log(f"Total images scraped: {len(img_files)}")
console.log(f"Total images scraped: {num_of_imgs}")
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):