From 8bd44dfb425a486be9a6594fd9a13c17c5e70323 Mon Sep 17 00:00:00 2001 From: Jeffrey Serio Date: Wed, 22 Jan 2025 01:14:28 -0600 Subject: [PATCH] fix: pdf_url extraction --- .gitignore | 1 + dl_sh_source.py | 10 +++++++--- pyproject.toml | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index ceb0e87..36b9f1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .venv dist +uv.lock diff --git a/dl_sh_source.py b/dl_sh_source.py index 58e9269..1a0741f 100644 --- a/dl_sh_source.py +++ b/dl_sh_source.py @@ -18,10 +18,14 @@ def main(): print(":(") exit(f"Unfortunately, Sci-Hub doesn't have the requested document: {article}") elif soup.find("button"): - pdf_url = soup.find("button")["onclick"].split("//")[1].split("?")[0] - article_url = f"https://{pdf_url}" + pdf_url_stub = soup.find("button")["onclick"].split("location.href='")[1].split("?")[0] # type: ignore + if pdf_url_stub.startswith("/downloads/"): + pdf_url = f"{scihub}{pdf_url_stub}" + else: + pdf_url = f"https://{pdf_url_stub}" + filename = f"{os.getcwd()}/{os.path.basename(pdf_url)}" - with requests.get(article_url, stream=True) as r: + with requests.get(pdf_url, stream=True) as r: r.raise_for_status() with open(filename, "wb") as f: for chunk in r.iter_content(chunk_size=8192): diff --git a/pyproject.toml b/pyproject.toml index daada53..cfcffa0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dl-sh-source" -version = "0.3" +version = "0.4" authors = [ { name="Jeffrey Serio", email="hyperreal@fedoraproject.org" }, ]