From 9c331dbdda368e866c0bbc204ffb71abbc0f2437 Mon Sep 17 00:00:00 2001 From: Jeffrey Serio Date: Wed, 4 Dec 2024 12:49:49 -0600 Subject: [PATCH] Use unquote to decode URLs --- print_wp_sources.py | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/print_wp_sources.py b/print_wp_sources.py index 041d57e..1a6ac46 100644 --- a/print_wp_sources.py +++ b/print_wp_sources.py @@ -1,4 +1,5 @@ import sys +from urllib.parse import unquote from urllib.request import urlopen from bs4 import BeautifulSoup @@ -13,7 +14,7 @@ def main(): text = urlopen(url).read() soup = BeautifulSoup(text, "html.parser") for link in soup.find_all("a", attrs={"class": "external text"}): - print(link.get("href")) + print(unquote(link.get("href"))) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 3142f4b..1f9823b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "print-wp-sources" -version = "0.2" +version = "0.3" authors = [ { name="Jeffrey Serio", email="hyperreal@fedoraproject.org" }, ]