mirror of
https://codeberg.org/hyperreal/bin
synced 2024-11-01 08:33:06 +01:00
Get all URLs in fandom page
This commit is contained in:
parent
39c7c68be7
commit
8c1e1e3afc
15
archive-to-megasync
Executable file
15
archive-to-megasync
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
SYNC_DIR="${HOME}/sync"
|
||||
archive_maybe=(
|
||||
"${HOME}/sync/org"
|
||||
"${HOME}/sync/org-roam"
|
||||
"${HOME}/sync/sites"
|
||||
)
|
||||
|
||||
for dir in "${archive_maybe[@]}"; do
|
||||
if [ "$(find "$dir" -type f -mtime -1 | wc -l)" -gt 0 ]; then
|
||||
create-archive "$dir"
|
||||
mv -v "$dir-$(date '+%Y%m%d').tar.gz" "${SYNC_DIR}/archived/"
|
||||
fi
|
||||
done
|
@ -50,9 +50,9 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_urls(fandom: str) -> list():
|
||||
def get_hop0_urls(fandom: str) -> list():
|
||||
starting_url = "https://" + fandom + ".fandom.com/wiki/Local_Sitemap"
|
||||
urls = [starting_url]
|
||||
hop0_urls = [starting_url]
|
||||
|
||||
while True:
|
||||
reqs = requests.get(starting_url)
|
||||
@ -80,9 +80,24 @@ def get_urls(fandom: str) -> list():
|
||||
+ mw_allpages_nav.find_all("a")[1].get("href")
|
||||
)
|
||||
|
||||
urls.append(starting_url)
|
||||
hop0_urls.append(starting_url)
|
||||
|
||||
return urls
|
||||
return hop0_urls
|
||||
|
||||
|
||||
def get_hop1_urls(hop0_urls: list) -> list():
|
||||
hop1_urls = list()
|
||||
|
||||
for url in hop0_urls:
|
||||
reqs = requests.get(url)
|
||||
soup = BeautifulSoup(reqs.text, "html.parser")
|
||||
fandom = url.split(sep="/wiki")[0]
|
||||
|
||||
for item in soup.find_all("a"):
|
||||
if item.get("href") and item.get("href").startswith("/wiki"):
|
||||
hop1_urls.append(fandom + item.get("href"))
|
||||
|
||||
return hop1_urls
|
||||
|
||||
|
||||
def help_message():
|
||||
@ -98,17 +113,17 @@ if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
match sys.argv[1]:
|
||||
case "cyberpunk":
|
||||
urls = get_urls("cyberpunk")
|
||||
urls = get_hop1_urls(get_hop0_urls("cyberpunk"))
|
||||
case "dishonored":
|
||||
urls = get_urls("dishonored")
|
||||
urls = get_hop1_urls(get_hop0_urls("dishonored"))
|
||||
case "dragonage":
|
||||
urls = get_urls("dragonage")
|
||||
urls = get_hop1_urls(get_hop0_urls("dragonage"))
|
||||
case "forgottenrealms":
|
||||
urls = get_urls("forgottenrealms")
|
||||
urls = get_hop1_urls(get_hop0_urls("forgottenrealms"))
|
||||
case "masseffect":
|
||||
urls = get_urls("masseffect")
|
||||
urls = get_hop1_urls(get_hop0_urls("masseffect"))
|
||||
case "residentevil":
|
||||
urls = get_urls("residentevil")
|
||||
urls = get_hop1_urls(get_hop0_urls("residentevil"))
|
||||
case _:
|
||||
help_message()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user