mirror of
https://codeberg.org/hyperreal/dl-sh-source
synced 2025-01-30 10:40:50 +01:00
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import os
|
|
import sys
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 3:
|
|
exit("Usage: dl-sh-source SCIHUB_URL ARTICLE_URL")
|
|
scihub = sys.argv[1]
|
|
article = sys.argv[2]
|
|
|
|
url = f"{scihub}/{article}"
|
|
text = requests.get(url).text
|
|
soup = BeautifulSoup(text, "html.parser")
|
|
if str(soup.find("p")) == '<p id="smile">:(</p>':
|
|
print(":(")
|
|
exit(f"Unfortunately, Sci-Hub doesn't have the requested document: {article}")
|
|
elif soup.find("button"):
|
|
pdf_url_stub = soup.find("button")["onclick"].split("location.href='")[1].split("?")[0] # type: ignore
|
|
if pdf_url_stub.startswith("/downloads/"):
|
|
pdf_url = f"{scihub}{pdf_url_stub}"
|
|
else:
|
|
pdf_url = f"https://{pdf_url_stub}"
|
|
|
|
filename = f"{os.getcwd()}/{os.path.basename(pdf_url)}"
|
|
with requests.get(pdf_url, stream=True) as r:
|
|
r.raise_for_status()
|
|
with open(filename, "wb") as f:
|
|
for chunk in r.iter_content(chunk_size=8192):
|
|
f.write(chunk)
|
|
print(f"--> {filename}")
|
|
else:
|
|
exit("An as-yet unknown error occurred. Sorry for the inconvenience.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|