admin-scripts/scihub_knapsack.py

197 lines
6.8 KiB
Python
Raw Normal View History

2024-08-05 00:18:24 +02:00
#!/usr/bin/env python3
"""scihub_knapsack.py
Description:
This script will add torrents to a qBittorrent instance until a specified size
limit is reached.
By default, the larger torrents are prioritized in descending order, but the
script can be run with the --smaller flag to prioritize smaller torrents in
ascending order.
2024-08-06 03:28:43 +02:00
The script will select only torrents with less than or equal to <max_seeders>.
2024-08-05 00:18:24 +02:00
Usage:
scihub_knapsack.py [--smaller] [--dry-run] -H <hostname> -U <username> -P <password> -S <size> -s <max_seeders>
scihub_knapsack.py -h
Examples:
scihub_knapsack.py -H http://localhost:8080 -U admin -P adminadmin -S 42T
scihub_knapsack.py --smaller -H https://qbt.hello.world -U admin -P adminadmin -S 2.2T
Options:
--smaller Prioritize from the smallest torrent sizes and work upward
to larger sizes. Default is to prioritize larger sizes.
--dry-run Only print the torrent names, total number of torrents, and
their total combined size instead of adding them to the
qBittorrent instance.
-H <hostname> Hostname of the server where the qBittorrent instance is
running.
-U <username> Username of the user to login to the qBittorrent instance.
-P <password> Password of the user to login to the qBittorrent instance.
-S <size> The maximum size, in GiB or TiB, of the knapsack to add Sci
Hub torrents to. Must be a positive integer or float. Must
have either G or T on the end, which represents GiB or TiB.
2024-08-06 03:28:43 +02:00
-s <max_seeders> Select torrents with less than or equal to <max_seeders>
seeders. <max_seeders> is a positive integer.
2024-08-05 00:18:24 +02:00
"""
import json
import requests
from docopt import docopt
from qbittorrent import Client
def get_torrent_health_data() -> list[dict]:
"""
Fetch Sci Hub torrent health checker data from the given URL. The URL
should refer to a JSON-formatted file.
"""
TORRENT_HEALTH_URL = (
"https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
)
response = requests.get(TORRENT_HEALTH_URL, timeout=60)
return json.loads(response.text)
def convert_size_to_bytes(size: str) -> int:
"""
Convert the given size string to bytes.
Example: 42G --> 45097156608 bytes
"""
if size.endswith("T"):
total_bytes = int(size.split("T")[0]) * (1024**4)
if size.endswith("G"):
total_bytes = int(size.split("G")[0]) * (1024**3)
return total_bytes
def human_bytes(bites: int) -> str:
"""
Convert bytes to KiB, MiB, GiB, or TiB.
Example: 45097156608 bytes -> 42 GiB
"""
B = float(bites)
KiB = float(1024)
MiB = float(KiB**2)
GiB = float(KiB**3)
TiB = float(KiB**4)
match B:
case B if B < KiB:
return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")
case B if KiB <= B < MiB:
return "{0:.2f} KiB".format(B / KiB)
case B if MiB <= B < GiB:
return "{0:.2f} MiB".format(B / MiB)
case B if GiB <= B < TiB:
return "{0:.2f} GiB".format(B / GiB)
case B if TiB <= B:
return "{0:.2f} TiB".format(B / TiB)
case _:
return ""
def get_knapsack_weight(knapsack: list[dict]) -> str:
"""
Get the weight of the given knapsack in GiB or TiB.
"""
return human_bytes(sum([torrent["size_bytes"] for torrent in knapsack]))
def fill_knapsack(
max_seeders: int, knapsack_size: int, smaller: bool = False
) -> list[dict]:
"""
Fill the knapsack.
Arguments:
2024-08-06 03:28:43 +02:00
max_seeders: int -- Select only torrents with less than or equal to
this number of seeders
2024-08-05 00:18:24 +02:00
knapsack_size: int -- The size in bytes of the knapsack
smaller: bool -- Prioritize smaller sized torrents (Default = False)
Return value:
A list of dictionaries that represent the torrents.
"""
2024-08-06 03:28:43 +02:00
# List of torrents with less than or equal to <max_seeders>
2024-08-05 00:18:24 +02:00
torrents = [t for t in get_torrent_health_data() if t["seeders"] <= max_seeders]
# Sorted list of torrents with <max_seeders>. If smaller == True, sort them
# in ascending order by size_bytes. Else sort them in descending order by
# size_bytes.
sorted_torrents = (
sorted(torrents, key=lambda d: d["size_bytes"])
if smaller == True
else sorted(torrents, key=lambda d: d["size_bytes"], reverse=True)
)
# Sum the sizes of each torrent in sorted_torrents and add them to the
# knapsack until it is filled, then return the knapsack.
sum = 0
knapsack = []
for torrent in sorted_torrents:
if sum + torrent["size_bytes"] >= knapsack_size:
break
sum += torrent["size_bytes"]
knapsack.append(torrent)
return knapsack
if __name__ == "__main__":
args = docopt(__doc__) # type: ignore
hostname = args["-H"]
username = args["-U"]
password = args["-P"]
max_seeders = int(args["-s"])
knapsack_size = convert_size_to_bytes(args["-S"])
smaller = args["--smaller"]
dry_run = args["--dry-run"]
# Initialize client and login
qb = Client(hostname)
qb.login(username=username, password=password)
# Fill the knapsack
knapsack = fill_knapsack(max_seeders, knapsack_size, smaller)
# If it's a dry run, only print the knapsack's contents. Otherwise,
# add the knapsack's contents to the qBittorrent instance.
# When finished, print the number of items and the combined weight of all
2024-08-25 07:20:36 +02:00
# items in the knapsack. Before attempting to add items to the qBittorrent
# instance, check to see if libgen.rs is even working. If libgen.rs is down
# no torrents can be added to the qBittorrent instance, so exit with an
# notice.
if dry_run:
2024-08-05 00:18:24 +02:00
for torrent in knapsack:
2024-08-25 07:20:36 +02:00
print(torrent["link"])
2024-08-05 00:18:24 +02:00
else:
2024-10-15 00:07:17 +02:00
response = requests.get("https://libgen.is/")
2024-08-25 07:20:36 +02:00
if not response.ok:
exit(
2024-10-15 00:07:17 +02:00
"It appears https://libgen.is is currently down. Please try again later."
2024-08-25 07:20:36 +02:00
)
2024-08-05 00:18:24 +02:00
for torrent in knapsack:
2024-10-15 00:07:17 +02:00
for torrent in knapsack:
if "gen.lib.rus.ec" in torrent["link"]:
new_torrent = torrent["link"].replace("gen.lib.rus.ec", "libgen.is")
qb.download_from_link(new_torrent, category="scihub")
if "libgen.rs" in torrent["link"]:
new_torrent = torrent["link"].replace("libgen.rs", "libgen.is")
qb.download_from_link(new_torrent, category="scihub")
# print(f"Added {torrent['name']}")
2024-08-05 00:18:24 +02:00
print("----------------")
print(f"Count: {len(knapsack)} torrents")
print(f"Total combined size: {get_knapsack_weight(knapsack)}")
print("----------------")