mirror of
https://codeberg.org/hyperreal/archive-fandom-wiki
synced 2024-11-01 16:43:07 +01:00
Fix various bugs
This commit is contained in:
parent
1a06ff4632
commit
310219bf35
24
afw
24
afw
@ -42,7 +42,7 @@ class FandomWiki:
|
||||
self.name = name
|
||||
self.canonical_url = f"https://{name}.fandom.com"
|
||||
self.breezewiki_url = breezewiki_url
|
||||
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
|
||||
self.site_dir = Path(f"{name}.fandom.com")
|
||||
self.images_dir = self.site_dir.joinpath("images")
|
||||
|
||||
try:
|
||||
@ -100,7 +100,7 @@ class FandomWiki:
|
||||
if "Local_Sitemap" not in item.get(
|
||||
"href"
|
||||
) and "Special:" not in item.get("href"):
|
||||
new_url = f"{self.breezewiki_url}{item.get('href')}"
|
||||
new_url = f"{self.breezewiki_url}/{self.name}{item.get('href')}"
|
||||
hop1_urls.append(new_url)
|
||||
console.log(new_url)
|
||||
|
||||
@ -199,23 +199,14 @@ class FandomWiki:
|
||||
def archive(self):
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
img_files = [
|
||||
f
|
||||
for f in self.images_dir.iterdir()
|
||||
if self.images_dir.joinpath(f).is_file()
|
||||
]
|
||||
|
||||
img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz"
|
||||
num_of_imgs = sum(1 for img in self.images_dir.iterdir() if img.is_file())
|
||||
|
||||
with Progress() as progress:
|
||||
task = progress.add_task("[cyan]Archiving images...", total=len(img_files))
|
||||
task = progress.add_task("[cyan]Archiving images...", total=num_of_imgs)
|
||||
|
||||
with tarfile.open(img_archive_filename, "w:xz") as tar:
|
||||
for img_file in img_files:
|
||||
if progress.finished:
|
||||
break
|
||||
full_file_path = self.images_dir.joinpath(img_file)
|
||||
tar.add(full_file_path, arcname=img_file)
|
||||
tar.add(self.images_dir)
|
||||
progress.update(task, advance=1)
|
||||
|
||||
progress.stop()
|
||||
@ -239,8 +230,7 @@ class FandomWiki:
|
||||
for web_file in web_files:
|
||||
if progress.finished:
|
||||
break
|
||||
full_file_path = self.site_dir.joinpath(web_file)
|
||||
tar.add(full_file_path, arcname=web_file)
|
||||
tar.add(web_file, arcname=web_file)
|
||||
progress.update(task, advance=1)
|
||||
|
||||
progress.stop()
|
||||
@ -248,7 +238,7 @@ class FandomWiki:
|
||||
shutil.rmtree(self.site_dir, ignore_errors=True)
|
||||
|
||||
console.log(f"\nTotal web files scraped: {len(web_files)}")
|
||||
console.log(f"Total images scraped: {len(img_files)}")
|
||||
console.log(f"Total images scraped: {num_of_imgs}")
|
||||
|
||||
|
||||
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):
|
||||
|
Loading…
Reference in New Issue
Block a user