Add scihub_knapsack.py

This commit is contained in:
Jeffrey Serio 2024-08-04 17:18:24 -05:00
parent 54ad29fab8
commit d469edbca1
4 changed files with 241 additions and 11 deletions

42
bin/fetch_scihub_infohashes.py Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env python3
"""fetch_scihub_infohashes.py
Description:
This script fetches the infohashes of all Sci Hub torrents and writes them to a
plaintext file. The plaintext file is intended to be appended to a bittorrent
tracker whitelist. E.g., /etc/opentracker/whitelist.txt.
Optionally set the TORRENT_JSON_URL for the Sci Hub torrent health checker, or
run the script with no arguments to use the default.
Default health check URL:
https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json
Usage:
fetch_scihub_infohashes.py [TORRENT_JSON_URL]
fetch_scihub_infohashes.py -h
Options:
-h, --help show this help message and exit.
"""
import json
from pathlib import Path
import requests
from docopt import docopt
if __name__ == "__main__":
args = docopt(__doc__) # type: ignore
url = (
args["TORRENT_JSON_URL"]
if args["TORRENT_JSON_URL"]
else "https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
)
response = requests.get(url, timeout=60)
json_data = json.loads(response.text)
torrent_infohashes = [f"{x["infohash"]}\n" for x in json_data]
with open(Path.cwd().joinpath("scihub_torrent_infohashes.txt"), "w") as tf:
tf.writelines(torrent_infohashes)

View File

@ -46,7 +46,7 @@ def human_bytes(bites: int) -> str:
if __name__ == "__main__": if __name__ == "__main__":
args = docopt(__doc__) args = docopt(__doc__) # type: ignore
# Initialize client and login # Initialize client and login
qb = Client(args["HOSTNAME"]) qb = Client(args["HOSTNAME"])
@ -55,13 +55,13 @@ if __name__ == "__main__":
# get total_completed_bytes # get total_completed_bytes
completed_torrent_sizes = [] completed_torrent_sizes = []
for torrent in qb.torrents(): for torrent in qb.torrents():
if torrent["state"] == "queuedUP" or torrent["state"] == "pausedUP": # type: ignore if torrent["state"] == "queuedUP" or torrent["state"] == "pausedUP": # type: ignore
completed_torrent_sizes.append(torrent["total_size"]) # type: ignore completed_torrent_sizes.append(torrent["total_size"]) # type: ignore
total_completed_bytes = sum(completed_torrent_sizes) total_completed_bytes = sum(completed_torrent_sizes)
# get total_added_bytes # get total_added_bytes
total_added_bytes = sum([torrent["total_size"] for torrent in torrents]) # type: ignore total_added_bytes = sum([torrent["total_size"] for torrent in qb.torrents()]) # type: ignore
# print the results # print the results
print(f"\nTotal completed size: {human_bytes(total_completed_bytes)}") print(f"\nTotal completed size: {human_bytes(total_completed_bytes)}")

180
bin/scihub_knapsack.py Executable file
View File

@ -0,0 +1,180 @@
#!/usr/bin/env python3
"""scihub_knapsack.py
Description:
This script will add torrents to a qBittorrent instance until a specified size
limit is reached.
By default, the larger torrents are prioritized in descending order, but the
script can be run with the --smaller flag to prioritize smaller torrents in
ascending order.
The script will select only torrents with <max_seeders>.
Usage:
scihub_knapsack.py [--smaller] [--dry-run] -H <hostname> -U <username> -P <password> -S <size> -s <max_seeders>
scihub_knapsack.py -h
Examples:
scihub_knapsack.py -H http://localhost:8080 -U admin -P adminadmin -S 42T
scihub_knapsack.py --smaller -H https://qbt.hello.world -U admin -P adminadmin -S 2.2T
Options:
--smaller Prioritize from the smallest torrent sizes and work upward
to larger sizes. Default is to prioritize larger sizes.
--dry-run Only print the torrent names, total number of torrents, and
their total combined size instead of adding them to the
qBittorrent instance.
-H <hostname> Hostname of the server where the qBittorrent instance is
running.
-U <username> Username of the user to login to the qBittorrent instance.
-P <password> Password of the user to login to the qBittorrent instance.
-S <size> The maximum size, in GiB or TiB, of the knapsack to add Sci
Hub torrents to. Must be a positive integer or float. Must
have either G or T on the end, which represents GiB or TiB.
-s <max_seeders> Select torrents with <max_seeders> seeders. <max_seeders>
is a positive integer argument.
"""
import json
import requests
from docopt import docopt
from qbittorrent import Client
def get_torrent_health_data() -> list[dict]:
"""
Fetch Sci Hub torrent health checker data from the given URL. The URL
should refer to a JSON-formatted file.
"""
TORRENT_HEALTH_URL = (
"https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
)
response = requests.get(TORRENT_HEALTH_URL, timeout=60)
return json.loads(response.text)
def convert_size_to_bytes(size: str) -> int:
"""
Convert the given size string to bytes.
Example: 42G --> 45097156608 bytes
"""
if size.endswith("T"):
total_bytes = int(size.split("T")[0]) * (1024**4)
if size.endswith("G"):
total_bytes = int(size.split("G")[0]) * (1024**3)
return total_bytes
def human_bytes(bites: int) -> str:
"""
Convert bytes to KiB, MiB, GiB, or TiB.
Example: 45097156608 bytes -> 42 GiB
"""
B = float(bites)
KiB = float(1024)
MiB = float(KiB**2)
GiB = float(KiB**3)
TiB = float(KiB**4)
match B:
case B if B < KiB:
return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")
case B if KiB <= B < MiB:
return "{0:.2f} KiB".format(B / KiB)
case B if MiB <= B < GiB:
return "{0:.2f} MiB".format(B / MiB)
case B if GiB <= B < TiB:
return "{0:.2f} GiB".format(B / GiB)
case B if TiB <= B:
return "{0:.2f} TiB".format(B / TiB)
case _:
return ""
def get_knapsack_weight(knapsack: list[dict]) -> str:
"""
Get the weight of the given knapsack in GiB or TiB.
"""
return human_bytes(sum([torrent["size_bytes"] for torrent in knapsack]))
def fill_knapsack(
max_seeders: int, knapsack_size: int, smaller: bool = False
) -> list[dict]:
"""
Fill the knapsack.
Arguments:
max_seeders: int -- Select only torrents with this number of seeders
knapsack_size: int -- The size in bytes of the knapsack
smaller: bool -- Prioritize smaller sized torrents (Default = False)
Return value:
A list of dictionaries that represent the torrents.
"""
# List of torrents with <max_seeders>
torrents = [t for t in get_torrent_health_data() if t["seeders"] <= max_seeders]
# Sorted list of torrents with <max_seeders>. If smaller == True, sort them
# in ascending order by size_bytes. Else sort them in descending order by
# size_bytes.
sorted_torrents = (
sorted(torrents, key=lambda d: d["size_bytes"])
if smaller == True
else sorted(torrents, key=lambda d: d["size_bytes"], reverse=True)
)
# Sum the sizes of each torrent in sorted_torrents and add them to the
# knapsack until it is filled, then return the knapsack.
sum = 0
knapsack = []
for torrent in sorted_torrents:
if sum + torrent["size_bytes"] >= knapsack_size:
break
sum += torrent["size_bytes"]
knapsack.append(torrent)
return knapsack
if __name__ == "__main__":
args = docopt(__doc__) # type: ignore
hostname = args["-H"]
username = args["-U"]
password = args["-P"]
max_seeders = int(args["-s"])
knapsack_size = convert_size_to_bytes(args["-S"])
smaller = args["--smaller"]
dry_run = args["--dry-run"]
# Initialize client and login
qb = Client(hostname)
qb.login(username=username, password=password)
# Fill the knapsack
knapsack = fill_knapsack(max_seeders, knapsack_size, smaller)
# If it's a dry run, only print the knapsack's contents. Otherwise,
# add the knapsack's contents to the qBittorrent instance.
# When finished, print the number of items and the combined weight of all
# items in the knapsack.
if args["--dry-run"]:
for torrent in knapsack:
print(torrent["name"])
else:
for torrent in knapsack:
qb.download_from_link(torrent["link"], category="scihub")
print(f"Added {torrent["name"]}")
print("----------------")
print(f"Count: {len(knapsack)} torrents")
print(f"Total combined size: {get_knapsack_weight(knapsack)}")
print("----------------")

View File

@ -9,14 +9,15 @@ instance.
MAX_SEEDERS is a positive integer argument. MAX_SEEDERS is a positive integer argument.
Usage: Usage:
seed_scihub_max_seeders.py (HOSTNAME) (USERNAME) (PASSWORD) (MAX_SEEDERS) seed_scihub_max_seeders.py [--only-count] (HOSTNAME) (USERNAME) (PASSWORD) (MAX_SEEDERS)
seed_scihub_max_seeders.py -h seed_scihub_max_seeders.py -h
Examples: Examples:
seed_scihub_max_seeders.py "http://localhost:8080" "admin" "adminadmin" 4 seed_scihub_max_seeders.py "http://localhost:8080" "admin" "adminadmin" 4
seed_scihub_max_seeders.py "https://cat.seedhost.eu/lol/qbittorrent" "lol" "pw" 3 seed_scihub_max_seeders.py --only-count "https://cat.seedhost.eu/lol/qbittorrent" "lol" "pw" 3
Options: Options:
--only-count do not add torrents, but only print the number of torrents with MAX_SEEDERS
-h, --help show this help message and exit -h, --help show this help message and exit
""" """
@ -29,7 +30,7 @@ from qbittorrent import Client
if __name__ == "__main__": if __name__ == "__main__":
args = docopt(__doc__) args = docopt(__doc__) # type: ignore
qb = Client(args["HOSTNAME"]) qb = Client(args["HOSTNAME"])
qb.login(username=args["USERNAME"], password=args["PASSWORD"]) qb.login(username=args["USERNAME"], password=args["PASSWORD"])
@ -39,7 +40,14 @@ if __name__ == "__main__":
response = requests.get(TORRENT_HEALTH_URL, timeout=60) response = requests.get(TORRENT_HEALTH_URL, timeout=60)
json_data = json.loads(response.text) json_data = json.loads(response.text)
for item in json_data: if args["--only-count"]:
if item["seeders"] <= int(args["MAX_SEEDERS"]): sum = 0
qb.download_from_link(item["link"], category="scihub") for item in json_data:
print(f"Added {os.path.basename(item["name"])}") if item["seeders"] <= int(args["MAX_SEEDERS"]):
sum += 1
print(f"Number of torrents with <= {int(args["MAX_SEEDERS"])} seeders: {sum}")
else:
for item in json_data:
if item["seeders"] <= int(args["MAX_SEEDERS"]):
qb.download_from_link(item["link"], category="scihub")
print(f"Added {os.path.basename(item["name"])}")