mirror of
https://codeberg.org/hyperreal/archive-fandom-wiki
synced 2024-11-25 09:33:41 +01:00
Fix: allow to specify BreezeWiki instance URL
This commit is contained in:
parent
14d0f4c725
commit
1a06ff4632
40
README.md
40
README.md
@ -1,40 +0,0 @@
|
|||||||
# archive-fandom-wiki
|
|
||||||
|
|
||||||
This program archives the content of fandom wikis. It doesn't scrape from the fandom.com wiki sites directly; rather, it uses my [BreezeWiki](https://breezewiki.hyperreal.coffee) instance to avoid downloading unnecessary ads, images, and other junk.
|
|
||||||
|
|
||||||
Each resulting archive is self-contained, meaning one can extract the contents and browse the wiki snapshot locally (offline). The URLs for CSS, images, and links in each page are replaced by the relative `file:///` URLs for their corresponding pages on the local filesystem.
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
Make sure Python and Pip are installed. Then run:
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki
|
|
||||||
cd archive-fandom-wiki
|
|
||||||
python -m venv venv
|
|
||||||
source venv/bin/activate
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
archive-fandom-wiki dishonored
|
|
||||||
```
|
|
||||||
|
|
||||||
## Podman/Docker
|
|
||||||
|
|
||||||
There is also a Containerfile, also known as a Dockerfile.
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki
|
|
||||||
cd archive-fandom-wiki
|
|
||||||
podman build -t localhost/archive-fandom-wiki:latest .
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
To run the container image:
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
podman run --name archive-fandom-wiki --rm -v "${HOME}/archives:/output:Z" localhost/archive-fandom-wiki dishonored
|
|
||||||
```
|
|
@ -8,7 +8,7 @@ Each resulting archive is self-contained, meaning one can extract the contents a
|
|||||||
|
|
||||||
Make sure Python and Pip are installed. Then run:
|
Make sure Python and Pip are installed. Then run:
|
||||||
#+begin_src bash
|
#+begin_src bash
|
||||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki.git
|
git clone https://codeberg.org/hyperreal/archive-fandom-wiki.git
|
||||||
cd archive-fandom-wiki
|
cd archive-fandom-wiki
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
@ -16,14 +16,16 @@ pip install -r requirements.txt
|
|||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** Usage
|
** Usage
|
||||||
|
One may specify the BreezeWiki instance URL, or the default value (my BreezeWiki instance URL) will be used.
|
||||||
#+begin_src bash
|
#+begin_src bash
|
||||||
archive-fandom-wiki dishonored
|
afw dishonored https://breezewiki.instance.url
|
||||||
|
afw dishonored
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** Podman/Docker
|
** Podman/Docker
|
||||||
There is also a Containerfile, also known as a Dockerfile.
|
There is also a Containerfile, also known as a Dockerfile.
|
||||||
#+begin_src bash
|
#+begin_src bash
|
||||||
git clone https://git.sr.ht/~hyperreal/archive-fandom-wiki
|
git clone https://codeberg.org/hyperreal/archive-fandom-wiki
|
||||||
cd archive-fandom-wiki
|
cd archive-fandom-wiki
|
||||||
podman build -t localhost/archive-fandom-wiki:latest .
|
podman build -t localhost/archive-fandom-wiki:latest .
|
||||||
#+end_src
|
#+end_src
|
||||||
|
@ -1,11 +1,28 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""archive-fandom-wiki
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
afw <fandom> <breezewiki_instance>
|
||||||
|
afw <fandom>
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h --help Show this help message.
|
||||||
|
-v --version Show version.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
afw dishonored https://breezewiki.nirn.quest
|
||||||
|
afw residentevil
|
||||||
|
"""
|
||||||
|
|
||||||
# This file is formatted with `black -l 79' to comply with PEP8 standards.
|
# This file is formatted with `black -l 79' to comply with PEP8 standards.
|
||||||
|
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from docopt import docopt
|
||||||
|
|
||||||
sys.tracebacklimit = 0
|
sys.tracebacklimit = 0
|
||||||
import tarfile
|
import tarfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@ -21,10 +38,10 @@ console = Console()
|
|||||||
|
|
||||||
|
|
||||||
class FandomWiki:
|
class FandomWiki:
|
||||||
def __init__(self, name: str):
|
def __init__(self, name: str, breezewiki_url: str):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.canonical_url = f"https://{name}.fandom.com"
|
self.canonical_url = f"https://{name}.fandom.com"
|
||||||
self.breezewiki_url = f"https://breezewiki.hyperreal.coffee/{name}"
|
self.breezewiki_url = breezewiki_url
|
||||||
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
|
self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
|
||||||
self.images_dir = self.site_dir.joinpath("images")
|
self.images_dir = self.site_dir.joinpath("images")
|
||||||
|
|
||||||
@ -234,8 +251,8 @@ class FandomWiki:
|
|||||||
console.log(f"Total images scraped: {len(img_files)}")
|
console.log(f"Total images scraped: {len(img_files)}")
|
||||||
|
|
||||||
|
|
||||||
def archive_site(name: str):
|
def archive_site(name: str, breezewiki_url: str = "https://breezewiki.nirn.quest"):
|
||||||
site = FandomWiki(name)
|
site = FandomWiki(name, breezewiki_url)
|
||||||
|
|
||||||
with console.status("Fetching hop 0 URLs...", spinner="aesthetic"):
|
with console.status("Fetching hop 0 URLs...", spinner="aesthetic"):
|
||||||
hop0_urls = site.get_hop0_urls()
|
hop0_urls = site.get_hop0_urls()
|
||||||
@ -252,14 +269,9 @@ def archive_site(name: str):
|
|||||||
site.archive()
|
site.archive()
|
||||||
|
|
||||||
|
|
||||||
def usage_message():
|
|
||||||
console.print("Usage:\n\tarchive-fandom-wiki [[italic]name[/italic]]\n")
|
|
||||||
console.print("Example:\n\tarchive-fandom-wiki dishonored\n")
|
|
||||||
console.print("All wikis on fandom.com are supported.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) > 1:
|
args = docopt(__doc__, options_first=True, help=True, version="1.0.1")
|
||||||
archive_site(sys.argv[1])
|
if args["<breezewiki_instance>"]:
|
||||||
|
archive_site(args["<fandom>"], args["<breezewiki_instance>"])
|
||||||
else:
|
else:
|
||||||
usage_message()
|
archive_site(args["<fandom>"])
|
@ -4,4 +4,4 @@ set -e
|
|||||||
|
|
||||||
. /venv/bin/activate
|
. /venv/bin/activate
|
||||||
|
|
||||||
exec /archive-fandom-wiki "$@"
|
exec /afw "$@"
|
||||||
|
Loading…
Reference in New Issue
Block a user