diff --git a/README.md b/README.md deleted file mode 100644 index c564a86..0000000 --- a/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# archive-fandom-wiki - -This program archives the content of fandom wikis. It doesn't scrape from the fandom.com wiki sites directly; rather, it uses my [BreezeWiki](https://breezewiki.hyperreal.coffee) instance to avoid downloading unnecessary ads, images, and other junk. - -Each resulting archive is self-contained, meaning one can extract the contents and browse the wiki snapshot locally (offline). The URLs for CSS, images, and links in each page are replaced by the relative `file:///` URLs for their corresponding pages on the local filesystem. - -## Installation - -Make sure Python and Pip are installed. Then run: - -``` bash -git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki -cd archive-fandom-wiki -python -m venv venv -source venv/bin/activate -pip install -r requirements.txt -``` - -## Usage - -``` bash -archive-fandom-wiki dishonored -``` - -## Podman/Docker - -There is also a Containerfile, also known as a Dockerfile. - -``` bash -git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki -cd archive-fandom-wiki -podman build -t localhost/archive-fandom-wiki:latest . - -``` - -To run the container image: - -``` bash -podman run --name archive-fandom-wiki --rm -v "${HOME}/archives:/output:Z" localhost/archive-fandom-wiki dishonored -``` diff --git a/README.org b/README.org index d8b206a..b20a621 100644 --- a/README.org +++ b/README.org @@ -8,7 +8,7 @@ Each resulting archive is self-contained, meaning one can extract the contents a Make sure Python and Pip are installed. Then run: #+begin_src bash -git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki.git +git clone https://codeberg.org/hyperreal/archive-fandom-wiki.git cd archive-fandom-wiki python -m venv venv source venv/bin/activate @@ -16,14 +16,16 @@ pip install -r requirements.txt #+end_src ** Usage +One may specify the BreezeWiki instance URL, or the default value (my BreezeWiki instance URL) will be used. #+begin_src bash -archive-fandom-wiki dishonored +afw dishonored https://breezewiki.instance.url +afw dishonored #+end_src ** Podman/Docker There is also a Containerfile, also known as a Dockerfile. #+begin_src bash -git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki +git clone https://codeberg.org/hyperreal/archive-fandom-wiki cd archive-fandom-wiki podman build -t localhost/archive-fandom-wiki:latest . #+end_src diff --git a/archive-fandom-wiki b/afw similarity index 92% rename from archive-fandom-wiki rename to afw index 5dccd51..3de4c84 100755 --- a/archive-fandom-wiki +++ b/afw @@ -1,11 +1,28 @@ #!/usr/bin/env python +"""archive-fandom-wiki + +Usage: + afw + afw + +Options: + -h --help Show this help message. + -v --version Show version. + +Examples: + afw dishonored https://breezewiki.nirn.quest + afw residentevil +""" + # This file is formatted with `black -l 79' to comply with PEP8 standards. import concurrent.futures import shutil import sys +from docopt import docopt + sys.tracebacklimit = 0 import tarfile from datetime import datetime @@ -21,10 +38,10 @@ console = Console() class FandomWiki: - def __init__(self, name: str): + def __init__(self, name: str, breezewiki_url: str): self.name = name self.canonical_url = f"https://{name}.fandom.com" - self.breezewiki_url = f"https://breezewiki.hyperreal.coffee/{name}" + self.breezewiki_url = breezewiki_url self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com") self.images_dir = self.site_dir.joinpath("images") @@ -234,8 +251,8 @@ class FandomWiki: console.log(f"Total images scraped: {len(img_files)}") -def archive_site(name: str): - site = FandomWiki(name) +def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"): + site = FandomWiki(name, breezewiki_url) with console.status("Fetching hop 0 URLs...", spinner="aesthetic"): hop0_urls = site.get_hop0_urls() @@ -252,14 +269,9 @@ def archive_site(name: str): site.archive() -def usage_message(): - console.print("Usage:\n\tarchive-fandom-wiki [[italic]name[/italic]]\n") - console.print("Example:\n\tarchive-fandom-wiki dishonored\n") - console.print("All wikis on fandom.com are supported.") - - if __name__ == "__main__": - if len(sys.argv) > 1: - archive_site(sys.argv[1]) + args = docopt(__doc__, options_first=True, help=True, version="1.0.1") + if args[""]: + archive_site(args[""], args[""]) else: - usage_message() + archive_site(args[""]) diff --git a/entrypoint.sh b/entrypoint.sh index eab1c68..4beb108 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,4 +4,4 @@ set -e . /venv/bin/activate -exec /archive-fandom-wiki "$@" +exec /afw "$@"