print-wp-sources/print_wp_sources.py

22 lines
505 B
Python
Raw Normal View History

2024-12-04 19:34:45 +01:00
import sys
2024-12-04 19:49:49 +01:00
from urllib.parse import unquote
2024-12-04 19:34:45 +01:00
from urllib.request import urlopen
from bs4 import BeautifulSoup
def main():
if len(sys.argv) != 2:
exit("Usage: dl_wp_pdf ARTICLE_NAME")
article = sys.argv[1]
url = f"https://en.wikipedia.org/wiki/{article}"
text = urlopen(url).read()
soup = BeautifulSoup(text, "html.parser")
for link in soup.find_all("a", attrs={"class": "external text"}):
2024-12-04 19:49:49 +01:00
print(unquote(link.get("href")))
2024-12-04 19:34:45 +01:00
if __name__ == "__main__":
main()