mirror of
https://codeberg.org/hyperreal/bin
synced 2024-11-25 10:23:42 +01:00
Get all URLs in fandom page
This commit is contained in:
parent
39c7c68be7
commit
8c1e1e3afc
15
archive-to-megasync
Executable file
15
archive-to-megasync
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
SYNC_DIR="${HOME}/sync"
|
||||||
|
archive_maybe=(
|
||||||
|
"${HOME}/sync/org"
|
||||||
|
"${HOME}/sync/org-roam"
|
||||||
|
"${HOME}/sync/sites"
|
||||||
|
)
|
||||||
|
|
||||||
|
for dir in "${archive_maybe[@]}"; do
|
||||||
|
if [ "$(find "$dir" -type f -mtime -1 | wc -l)" -gt 0 ]; then
|
||||||
|
create-archive "$dir"
|
||||||
|
mv -v "$dir-$(date '+%Y%m%d').tar.gz" "${SYNC_DIR}/archived/"
|
||||||
|
fi
|
||||||
|
done
|
@ -50,9 +50,9 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
def get_urls(fandom: str) -> list():
|
def get_hop0_urls(fandom: str) -> list():
|
||||||
starting_url = "https://" + fandom + ".fandom.com/wiki/Local_Sitemap"
|
starting_url = "https://" + fandom + ".fandom.com/wiki/Local_Sitemap"
|
||||||
urls = [starting_url]
|
hop0_urls = [starting_url]
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
reqs = requests.get(starting_url)
|
reqs = requests.get(starting_url)
|
||||||
@ -80,9 +80,24 @@ def get_urls(fandom: str) -> list():
|
|||||||
+ mw_allpages_nav.find_all("a")[1].get("href")
|
+ mw_allpages_nav.find_all("a")[1].get("href")
|
||||||
)
|
)
|
||||||
|
|
||||||
urls.append(starting_url)
|
hop0_urls.append(starting_url)
|
||||||
|
|
||||||
return urls
|
return hop0_urls
|
||||||
|
|
||||||
|
|
||||||
|
def get_hop1_urls(hop0_urls: list) -> list():
|
||||||
|
hop1_urls = list()
|
||||||
|
|
||||||
|
for url in hop0_urls:
|
||||||
|
reqs = requests.get(url)
|
||||||
|
soup = BeautifulSoup(reqs.text, "html.parser")
|
||||||
|
fandom = url.split(sep="/wiki")[0]
|
||||||
|
|
||||||
|
for item in soup.find_all("a"):
|
||||||
|
if item.get("href") and item.get("href").startswith("/wiki"):
|
||||||
|
hop1_urls.append(fandom + item.get("href"))
|
||||||
|
|
||||||
|
return hop1_urls
|
||||||
|
|
||||||
|
|
||||||
def help_message():
|
def help_message():
|
||||||
@ -98,17 +113,17 @@ if __name__ == "__main__":
|
|||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
match sys.argv[1]:
|
match sys.argv[1]:
|
||||||
case "cyberpunk":
|
case "cyberpunk":
|
||||||
urls = get_urls("cyberpunk")
|
urls = get_hop1_urls(get_hop0_urls("cyberpunk"))
|
||||||
case "dishonored":
|
case "dishonored":
|
||||||
urls = get_urls("dishonored")
|
urls = get_hop1_urls(get_hop0_urls("dishonored"))
|
||||||
case "dragonage":
|
case "dragonage":
|
||||||
urls = get_urls("dragonage")
|
urls = get_hop1_urls(get_hop0_urls("dragonage"))
|
||||||
case "forgottenrealms":
|
case "forgottenrealms":
|
||||||
urls = get_urls("forgottenrealms")
|
urls = get_hop1_urls(get_hop0_urls("forgottenrealms"))
|
||||||
case "masseffect":
|
case "masseffect":
|
||||||
urls = get_urls("masseffect")
|
urls = get_hop1_urls(get_hop0_urls("masseffect"))
|
||||||
case "residentevil":
|
case "residentevil":
|
||||||
urls = get_urls("residentevil")
|
urls = get_hop1_urls(get_hop0_urls("residentevil"))
|
||||||
case _:
|
case _:
|
||||||
help_message()
|
help_message()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user