fix: pdf_url extraction

This commit is contained in:
Jeffrey Serio 2025-01-22 01:14:28 -06:00
parent 115855d365
commit 8bd44dfb42
3 changed files with 9 additions and 4 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
.venv .venv
dist dist
uv.lock

View File

@ -18,10 +18,14 @@ def main():
print(":(") print(":(")
exit(f"Unfortunately, Sci-Hub doesn't have the requested document: {article}") exit(f"Unfortunately, Sci-Hub doesn't have the requested document: {article}")
elif soup.find("button"): elif soup.find("button"):
pdf_url = soup.find("button")["onclick"].split("//")[1].split("?")[0] pdf_url_stub = soup.find("button")["onclick"].split("location.href='")[1].split("?")[0] # type: ignore
article_url = f"https://{pdf_url}" if pdf_url_stub.startswith("/downloads/"):
pdf_url = f"{scihub}{pdf_url_stub}"
else:
pdf_url = f"https://{pdf_url_stub}"
filename = f"{os.getcwd()}/{os.path.basename(pdf_url)}" filename = f"{os.getcwd()}/{os.path.basename(pdf_url)}"
with requests.get(article_url, stream=True) as r: with requests.get(pdf_url, stream=True) as r:
r.raise_for_status() r.raise_for_status()
with open(filename, "wb") as f: with open(filename, "wb") as f:
for chunk in r.iter_content(chunk_size=8192): for chunk in r.iter_content(chunk_size=8192):

View File

@ -1,6 +1,6 @@
[project] [project]
name = "dl-sh-source" name = "dl-sh-source"
version = "0.3" version = "0.4"
authors = [ authors = [
{ name="Jeffrey Serio", email="hyperreal@fedoraproject.org" }, { name="Jeffrey Serio", email="hyperreal@fedoraproject.org" },
] ]