fix make_archive

This commit is contained in:
Jeffrey Serio 2024-08-11 02:46:28 -05:00
parent 56beec24c2
commit 3d82063356

48
afw.py
View File

@ -3,15 +3,18 @@
"""archive-fandom-wiki """archive-fandom-wiki
Usage: Usage:
afw <fandom> [<breezewiki_instance>] afw -f <fandom> [-w <max_workers>] [-b <breezewiki_url>]
afw -h afw -h
Options: Options:
-f <fandom> The fandom to archive. (Required)
-w <max_workers> The maximum number of workers to use for concurrent threads. (Optional; Default is 4)
-b <breezewiki_url> The URL of the BreezeWiki instance to use. (Optional; Default is https://breezewiki.hyperreal.coffee)
-h --help Show this help message. -h --help Show this help message.
Examples: Examples:
afw dishonored https://breezewiki.hyperreal.coffee afw -f dishonored -w 16 -b https://breezewiki.hyperreal.coffee
afw residentevil afw -f residentevil
""" """
# This file is formatted with `black -l 79' to comply with PEP8 standards. # This file is formatted with `black -l 79' to comply with PEP8 standards.
@ -42,9 +45,11 @@ console = Console()
class FandomWiki: class FandomWiki:
def __init__(self, name: str): def __init__(self, name: str):
self.name = name self.name = name
self.canonical_url = f"https://{name}.fandom.com" self.canonical_name = f"{name}.fandom.com"
self.canonical_url = f"https://{self.canonical_name}"
self.breezewiki_url = "https://breezewiki.hyperreal.coffee" self.breezewiki_url = "https://breezewiki.hyperreal.coffee"
self.site_dir = Path(f"{name}.fandom.com") self.archive_rootdir = Path.cwd()
self.site_dir = self.archive_rootdir.joinpath(f"{self.canonical_name}")
self.images_dir = self.site_dir.joinpath("images") self.images_dir = self.site_dir.joinpath("images")
def set_breezewiki_url(self, breezewiki_url: str): def set_breezewiki_url(self, breezewiki_url: str):
@ -182,7 +187,8 @@ class FandomWiki:
console.log(filename) console.log(filename)
def fetch_all_pages(self, hop1_urls: list): def fetch_all_pages(self, hop1_urls: list):
with concurrent.futures.ThreadPoolExecutor() as executor: max_workers = int(args["-w"]) if args["-w"] else 4
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
executor.map(self.save_page, hop1_urls) executor.map(self.save_page, hop1_urls)
def archive_site(self): def archive_site(self):
@ -204,8 +210,8 @@ class FandomWiki:
with console.status("Fetching hop 1 URLs...", spinner="aesthetic"): with console.status("Fetching hop 1 URLs...", spinner="aesthetic"):
hop1_urls = self.get_hop1_urls(hop0_urls) hop1_urls = self.get_hop1_urls(hop0_urls)
self.site_dir.mkdir() # Creates the parent dirs: self.archive_rootdir > self.site_dir > self.images_dir
self.images_dir.mkdir() self.images_dir.mkdir(parents=True)
with console.status("Saving CSS files...", spinner="aesthetic"): with console.status("Saving CSS files...", spinner="aesthetic"):
self.save_css() self.save_css()
@ -221,24 +227,32 @@ class FandomWiki:
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
with console.status("Archiving images...", spinner="aesthetic"): with console.status("Archiving images...", spinner="aesthetic"):
shutil.make_archive(f"images-{timestamp}", "xztar", self.images_dir) shutil.make_archive(
"images",
"xztar",
root_dir=self.site_dir,
base_dir="images",
)
shutil.move("images.tar.xz", self.site_dir)
shutil.rmtree(self.images_dir) shutil.rmtree(self.images_dir)
shutil.move(f"images-{timestamp}.tar.xz", self.site_dir)
with console.status("Archiving web files...", spinner="aesthetic"): with console.status("Archiving web files...", spinner="aesthetic"):
shutil.make_archive(f"{self.name}-{timestamp}", "gztar", self.site_dir) shutil.make_archive(
f"{self.name}-{timestamp}",
shutil.rmtree(self.site_dir) "gztar",
root_dir=self.archive_rootdir,
base_dir=self.canonical_name,
)
console.log(f"\nTotal web files archived: {total_web_files}") console.log(f"\nTotal web files archived: {total_web_files}")
console.log(f"Total images archived: {total_image_files}") console.log(f"Total images archived: {total_image_files}")
if __name__ == "__main__": if __name__ == "__main__":
args = docopt(__doc__, options_first=True, help=True, version="1.0.1") args = docopt(__doc__, options_first=True, help=True, version="1.0.1") # type: ignore
site = FandomWiki(args["<fandom>"]) site = FandomWiki(args["-f"])
if args["<breezewiki_instance>"]: if args["-b"]:
site.set_breezewiki_url(args["<breezewiki_instance>"]) site.set_breezewiki_url(args["-b"])
site.archive_site() site.archive_site()