mirror of
https://codeberg.org/hyperreal/archive-fandom-wiki
synced 2024-11-25 09:33:41 +01:00
Fix: allow to specify BreezeWiki instance URL
This commit is contained in:
parent
14d0f4c725
commit
1a06ff4632
40
README.md
40
README.md
@ -1,40 +0,0 @@
|
||||
# archive-fandom-wiki
|
||||
|
||||
This program archives the content of fandom wikis. It doesn't scrape from the fandom.com wiki sites directly; rather, it uses my [BreezeWiki](https://breezewiki.hyperreal.coffee) instance to avoid downloading unnecessary ads, images, and other junk.
|
||||
|
||||
Each resulting archive is self-contained, meaning one can extract the contents and browse the wiki snapshot locally (offline). The URLs for CSS, images, and links in each page are replaced by the relative `file:///` URLs for their corresponding pages on the local filesystem.
|
||||
|
||||
## Installation
|
||||
|
||||
Make sure Python and Pip are installed. Then run:
|
||||
|
||||
``` bash
|
||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki
|
||||
cd archive-fandom-wiki
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
``` bash
|
||||
archive-fandom-wiki dishonored
|
||||
```
|
||||
|
||||
## Podman/Docker
|
||||
|
||||
There is also a Containerfile, also known as a Dockerfile.
|
||||
|
||||
``` bash
|
||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki
|
||||
cd archive-fandom-wiki
|
||||
podman build -t localhost/archive-fandom-wiki:latest .
|
||||
|
||||
```
|
||||
|
||||
To run the container image:
|
||||
|
||||
``` bash
|
||||
podman run --name archive-fandom-wiki --rm -v "${HOME}/archives:/output:Z" localhost/archive-fandom-wiki dishonored
|
||||
```
|
@ -8,7 +8,7 @@ Each resulting archive is self-contained, meaning one can extract the contents a
|
||||
|
||||
Make sure Python and Pip are installed. Then run:
|
||||
#+begin_src bash
|
||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki.git
|
||||
git clone https://codeberg.org/hyperreal/archive-fandom-wiki.git
|
||||
cd archive-fandom-wiki
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
@ -16,14 +16,16 @@ pip install -r requirements.txt
|
||||
#+end_src
|
||||
|
||||
** Usage
|
||||
One may specify the BreezeWiki instance URL, or the default value (my BreezeWiki instance URL) will be used.
|
||||
#+begin_src bash
|
||||
archive-fandom-wiki dishonored
|
||||
afw dishonored https://breezewiki.instance.url
|
||||
afw dishonored
|
||||
#+end_src
|
||||
|
||||
** Podman/Docker
|
||||
There is also a Containerfile, also known as a Dockerfile.
|
||||
#+begin_src bash
|
||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki
|
||||
git clone https://codeberg.org/hyperreal/archive-fandom-wiki
|
||||
cd archive-fandom-wiki
|
||||
podman build -t localhost/archive-fandom-wiki:latest .
|
||||
#+end_src
|
||||
|
@ -1,11 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""archive-fandom-wiki
|
||||
|
||||
Usage:
|
||||
afw <fandom> <breezewiki_instance>
|
||||
afw <fandom>
|
||||
|
||||
Options:
|
||||
-h --help Show this help message.
|
||||
-v --version Show version.
|
||||
|
||||
Examples:
|
||||
afw dishonored https://breezewiki.nirn.quest
|
||||
afw residentevil
|
||||
"""
|
||||
|
||||
# This file is formatted with `black -l 79' to comply with PEP8 standards.
|
||||
|
||||
import concurrent.futures
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
from docopt import docopt
|
||||
|
||||
sys.tracebacklimit = 0
|
||||
import tarfile
|
||||
from datetime import datetime
|
||||
@ -21,10 +38,10 @@ console = Console()
|
||||
|
||||
|
||||
class FandomWiki:
|
||||
def __init__(self, name: str):
|
||||
def __init__(self, name: str, breezewiki_url: str):
|
||||
self.name = name
|
||||
self.canonical_url = f"https://{name}.fandom.com"
|
||||
self.breezewiki_url = f"https://breezewiki.hyperreal.coffee/{name}"
|
||||
self.breezewiki_url = breezewiki_url
|
||||
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
|
||||
self.images_dir = self.site_dir.joinpath("images")
|
||||
|
||||
@ -234,8 +251,8 @@ class FandomWiki:
|
||||
console.log(f"Total images scraped: {len(img_files)}")
|
||||
|
||||
|
||||
def archive_site(name: str):
|
||||
site = FandomWiki(name)
|
||||
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):
|
||||
site = FandomWiki(name, breezewiki_url)
|
||||
|
||||
with console.status("Fetching hop 0 URLs...", spinner="aesthetic"):
|
||||
hop0_urls = site.get_hop0_urls()
|
||||
@ -252,14 +269,9 @@ def archive_site(name: str):
|
||||
site.archive()
|
||||
|
||||
|
||||
def usage_message():
|
||||
console.print("Usage:\n\tarchive-fandom-wiki [[italic]name[/italic]]\n")
|
||||
console.print("Example:\n\tarchive-fandom-wiki dishonored\n")
|
||||
console.print("All wikis on fandom.com are supported.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
archive_site(sys.argv[1])
|
||||
args = docopt(__doc__, options_first=True, help=True, version="1.0.1")
|
||||
if args["<breezewiki_instance>"]:
|
||||
archive_site(args["<fandom>"], args["<breezewiki_instance>"])
|
||||
else:
|
||||
usage_message()
|
||||
archive_site(args["<fandom>"])
|
@ -4,4 +4,4 @@ set -e
|
||||
|
||||
. /venv/bin/activate
|
||||
|
||||
exec /archive-fandom-wiki "$@"
|
||||
exec /afw "$@"
|
||||
|
Loading…
Reference in New Issue
Block a user