Fix various bugs

This commit is contained in:
Jeffrey Serio 2024-07-16 01:59:21 -05:00
parent 1a06ff4632
commit 310219bf35

26
afw
View File

@ -42,7 +42,7 @@ class FandomWiki:
self.name = name self.name = name
self.canonical_url = f"https://{name}.fandom.com" self.canonical_url = f"https://{name}.fandom.com"
self.breezewiki_url = breezewiki_url self.breezewiki_url = breezewiki_url
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com") self.site_dir = Path(f"{name}.fandom.com")
self.images_dir = self.site_dir.joinpath("images") self.images_dir = self.site_dir.joinpath("images")
try: try:
@ -100,7 +100,7 @@ class FandomWiki:
if "Local_Sitemap" not in item.get( if "Local_Sitemap" not in item.get(
"href" "href"
) and "Special:" not in item.get("href"): ) and "Special:" not in item.get("href"):
new_url = f"{self.breezewiki_url}{item.get('href')}" new_url = f"{self.breezewiki_url}/{self.name}{item.get('href')}"
hop1_urls.append(new_url) hop1_urls.append(new_url)
console.log(new_url) console.log(new_url)
@ -199,24 +199,15 @@ class FandomWiki:
def archive(self): def archive(self):
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
img_files = [
f
for f in self.images_dir.iterdir()
if self.images_dir.joinpath(f).is_file()
]
img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz" img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz"
num_of_imgs = sum(1 for img in self.images_dir.iterdir() if img.is_file())
with Progress() as progress: with Progress() as progress:
task = progress.add_task("[cyan]Archiving images...", total=len(img_files)) task = progress.add_task("[cyan]Archiving images...", total=num_of_imgs)
with tarfile.open(img_archive_filename, "w:xz") as tar: with tarfile.open(img_archive_filename, "w:xz") as tar:
for img_file in img_files: tar.add(self.images_dir)
if progress.finished: progress.update(task, advance=1)
break
full_file_path = self.images_dir.joinpath(img_file)
tar.add(full_file_path, arcname=img_file)
progress.update(task, advance=1)
progress.stop() progress.stop()
@ -239,8 +230,7 @@ class FandomWiki:
for web_file in web_files: for web_file in web_files:
if progress.finished: if progress.finished:
break break
full_file_path = self.site_dir.joinpath(web_file) tar.add(web_file, arcname=web_file)
tar.add(full_file_path, arcname=web_file)
progress.update(task, advance=1) progress.update(task, advance=1)
progress.stop() progress.stop()
@ -248,7 +238,7 @@ class FandomWiki:
shutil.rmtree(self.site_dir, ignore_errors=True) shutil.rmtree(self.site_dir, ignore_errors=True)
console.log(f"\nTotal web files scraped: {len(web_files)}") console.log(f"\nTotal web files scraped: {len(web_files)}")
console.log(f"Total images scraped: {len(img_files)}") console.log(f"Total images scraped: {num_of_imgs}")
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"): def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):