mirror of
https://codeberg.org/hyperreal/archive-fandom-wiki
synced 2024-11-01 16:43:07 +01:00
Fix various bugs
This commit is contained in:
parent
1a06ff4632
commit
310219bf35
24
afw
24
afw
@ -42,7 +42,7 @@ class FandomWiki:
|
|||||||
self.name = name
|
self.name = name
|
||||||
self.canonical_url = f"https://{name}.fandom.com"
|
self.canonical_url = f"https://{name}.fandom.com"
|
||||||
self.breezewiki_url = breezewiki_url
|
self.breezewiki_url = breezewiki_url
|
||||||
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
|
self.site_dir = Path(f"{name}.fandom.com")
|
||||||
self.images_dir = self.site_dir.joinpath("images")
|
self.images_dir = self.site_dir.joinpath("images")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -100,7 +100,7 @@ class FandomWiki:
|
|||||||
if "Local_Sitemap" not in item.get(
|
if "Local_Sitemap" not in item.get(
|
||||||
"href"
|
"href"
|
||||||
) and "Special:" not in item.get("href"):
|
) and "Special:" not in item.get("href"):
|
||||||
new_url = f"{self.breezewiki_url}{item.get('href')}"
|
new_url = f"{self.breezewiki_url}/{self.name}{item.get('href')}"
|
||||||
hop1_urls.append(new_url)
|
hop1_urls.append(new_url)
|
||||||
console.log(new_url)
|
console.log(new_url)
|
||||||
|
|
||||||
@ -199,23 +199,14 @@ class FandomWiki:
|
|||||||
def archive(self):
|
def archive(self):
|
||||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
|
||||||
img_files = [
|
|
||||||
f
|
|
||||||
for f in self.images_dir.iterdir()
|
|
||||||
if self.images_dir.joinpath(f).is_file()
|
|
||||||
]
|
|
||||||
|
|
||||||
img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz"
|
img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz"
|
||||||
|
num_of_imgs = sum(1 for img in self.images_dir.iterdir() if img.is_file())
|
||||||
|
|
||||||
with Progress() as progress:
|
with Progress() as progress:
|
||||||
task = progress.add_task("[cyan]Archiving images...", total=len(img_files))
|
task = progress.add_task("[cyan]Archiving images...", total=num_of_imgs)
|
||||||
|
|
||||||
with tarfile.open(img_archive_filename, "w:xz") as tar:
|
with tarfile.open(img_archive_filename, "w:xz") as tar:
|
||||||
for img_file in img_files:
|
tar.add(self.images_dir)
|
||||||
if progress.finished:
|
|
||||||
break
|
|
||||||
full_file_path = self.images_dir.joinpath(img_file)
|
|
||||||
tar.add(full_file_path, arcname=img_file)
|
|
||||||
progress.update(task, advance=1)
|
progress.update(task, advance=1)
|
||||||
|
|
||||||
progress.stop()
|
progress.stop()
|
||||||
@ -239,8 +230,7 @@ class FandomWiki:
|
|||||||
for web_file in web_files:
|
for web_file in web_files:
|
||||||
if progress.finished:
|
if progress.finished:
|
||||||
break
|
break
|
||||||
full_file_path = self.site_dir.joinpath(web_file)
|
tar.add(web_file, arcname=web_file)
|
||||||
tar.add(full_file_path, arcname=web_file)
|
|
||||||
progress.update(task, advance=1)
|
progress.update(task, advance=1)
|
||||||
|
|
||||||
progress.stop()
|
progress.stop()
|
||||||
@ -248,7 +238,7 @@ class FandomWiki:
|
|||||||
shutil.rmtree(self.site_dir, ignore_errors=True)
|
shutil.rmtree(self.site_dir, ignore_errors=True)
|
||||||
|
|
||||||
console.log(f"\nTotal web files scraped: {len(web_files)}")
|
console.log(f"\nTotal web files scraped: {len(web_files)}")
|
||||||
console.log(f"Total images scraped: {len(img_files)}")
|
console.log(f"Total images scraped: {num_of_imgs}")
|
||||||
|
|
||||||
|
|
||||||
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):
|
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):
|
||||||
|
Loading…
Reference in New Issue
Block a user