diff --git a/archive-to-megasync b/archive-to-megasync new file mode 100755 index 0000000..f4618d0 --- /dev/null +++ b/archive-to-megasync @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +SYNC_DIR="${HOME}/sync" +archive_maybe=( + "${HOME}/sync/org" + "${HOME}/sync/org-roam" + "${HOME}/sync/sites" +) + +for dir in "${archive_maybe[@]}"; do + if [ "$(find "$dir" -type f -mtime -1 | wc -l)" -gt 0 ]; then + create-archive "$dir" + mv -v "$dir-$(date '+%Y%m%d').tar.gz" "${SYNC_DIR}/archived/" + fi +done diff --git a/get-fandom-wiki-urls b/get-fandom-wiki-urls index 50c4325..e497ec9 100755 --- a/get-fandom-wiki-urls +++ b/get-fandom-wiki-urls @@ -50,9 +50,9 @@ import requests from bs4 import BeautifulSoup -def get_urls(fandom: str) -> list(): +def get_hop0_urls(fandom: str) -> list(): starting_url = "https://" + fandom + ".fandom.com/wiki/Local_Sitemap" - urls = [starting_url] + hop0_urls = [starting_url] while True: reqs = requests.get(starting_url) @@ -80,9 +80,24 @@ def get_urls(fandom: str) -> list(): + mw_allpages_nav.find_all("a")[1].get("href") ) - urls.append(starting_url) + hop0_urls.append(starting_url) - return urls + return hop0_urls + + +def get_hop1_urls(hop0_urls: list) -> list(): + hop1_urls = list() + + for url in hop0_urls: + reqs = requests.get(url) + soup = BeautifulSoup(reqs.text, "html.parser") + fandom = url.split(sep="/wiki")[0] + + for item in soup.find_all("a"): + if item.get("href") and item.get("href").startswith("/wiki"): + hop1_urls.append(fandom + item.get("href")) + + return hop1_urls def help_message(): @@ -98,17 +113,17 @@ if __name__ == "__main__": if len(sys.argv) > 1: match sys.argv[1]: case "cyberpunk": - urls = get_urls("cyberpunk") + urls = get_hop1_urls(get_hop0_urls("cyberpunk")) case "dishonored": - urls = get_urls("dishonored") + urls = get_hop1_urls(get_hop0_urls("dishonored")) case "dragonage": - urls = get_urls("dragonage") + urls = get_hop1_urls(get_hop0_urls("dragonage")) case "forgottenrealms": - urls = get_urls("forgottenrealms") + urls = get_hop1_urls(get_hop0_urls("forgottenrealms")) case "masseffect": - urls = get_urls("masseffect") + urls = get_hop1_urls(get_hop0_urls("masseffect")) case "residentevil": - urls = get_urls("residentevil") + urls = get_hop1_urls(get_hop0_urls("residentevil")) case _: help_message()