Use unquote to decode URLs

This commit is contained in:
Jeffrey Serio 2024-12-04 12:49:49 -06:00
parent a7f0b75a75
commit 9c331dbdda
2 changed files with 3 additions and 2 deletions

View File

@ -1,4 +1,5 @@
import sys import sys
from urllib.parse import unquote
from urllib.request import urlopen from urllib.request import urlopen
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -13,7 +14,7 @@ def main():
text = urlopen(url).read() text = urlopen(url).read()
soup = BeautifulSoup(text, "html.parser") soup = BeautifulSoup(text, "html.parser")
for link in soup.find_all("a", attrs={"class": "external text"}): for link in soup.find_all("a", attrs={"class": "external text"}):
print(link.get("href")) print(unquote(link.get("href")))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,6 +1,6 @@
[project] [project]
name = "print-wp-sources" name = "print-wp-sources"
version = "0.2" version = "0.3"
authors = [ authors = [
{ name="Jeffrey Serio", email="hyperreal@fedoraproject.org" }, { name="Jeffrey Serio", email="hyperreal@fedoraproject.org" },
] ]