Add scihub_knapsack.py

2024-08-04 17:18:24 -05:00 · 2024-08-04 17:18:24 -05:00 · d469edbca1
commit d469edbca1
parent 54ad29fab8
4 changed files with 241 additions and 11 deletions
--- a/bin/fetch_scihub_infohashes.py
+++ b/bin/fetch_scihub_infohashes.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+"""fetch_scihub_infohashes.py
+
+Description:
+This script fetches the infohashes of all Sci Hub torrents and writes them to a
+plaintext file. The plaintext file is intended to be appended to a bittorrent
+tracker whitelist. E.g., /etc/opentracker/whitelist.txt.
+
+Optionally set the TORRENT_JSON_URL for the Sci Hub torrent health checker, or
+run the script with no arguments to use the default.
+
+Default health check URL:
+https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json
+
+Usage:
+    fetch_scihub_infohashes.py [TORRENT_JSON_URL]
+    fetch_scihub_infohashes.py -h
+
+Options:
+    -h, --help      show this help message and exit.
+"""
+
+import json
+from pathlib import Path
+
+import requests
+from docopt import docopt
+
+if __name__ == "__main__":
+    args = docopt(__doc__)  # type: ignore
+    url = (
+        args["TORRENT_JSON_URL"]
+        if args["TORRENT_JSON_URL"]
+        else "https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
+    )
+    response = requests.get(url, timeout=60)
+    json_data = json.loads(response.text)
+    torrent_infohashes = [f"{x["infohash"]}\n" for x in json_data]
+
+    with open(Path.cwd().joinpath("scihub_torrent_infohashes.txt"), "w") as tf:
+        tf.writelines(torrent_infohashes)
--- a/bin/qbt_sum_size.py
+++ b/bin/qbt_sum_size.py
@ -46,7 +46,7 @@ def human_bytes(bites: int) -> str:


 if __name__ == "__main__":
-    args = docopt(__doc__)
+    args = docopt(__doc__)  # type: ignore

    # Initialize client and login
    qb = Client(args["HOSTNAME"])
@ -61,7 +61,7 @@ if __name__ == "__main__":
    total_completed_bytes = sum(completed_torrent_sizes)

    # get total_added_bytes
-    total_added_bytes = sum([torrent["total_size"] for torrent in torrents]) # type: ignore
+    total_added_bytes = sum([torrent["total_size"] for torrent in qb.torrents()])  # type: ignore

    # print the results
    print(f"\nTotal completed size: {human_bytes(total_completed_bytes)}")
--- a/bin/scihub_knapsack.py
+++ b/bin/scihub_knapsack.py
@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+
+"""scihub_knapsack.py
+
+Description:
+This script will add torrents to a qBittorrent instance until a specified size
+limit is reached.
+
+By default, the larger torrents are prioritized in descending order, but the
+script can be run with the --smaller flag to prioritize smaller torrents in
+ascending order.
+
+The script will select only torrents with <max_seeders>.
+
+Usage:
+    scihub_knapsack.py [--smaller] [--dry-run] -H <hostname> -U <username> -P <password> -S <size> -s <max_seeders>
+    scihub_knapsack.py -h
+
+Examples:
+    scihub_knapsack.py -H http://localhost:8080 -U admin -P adminadmin -S 42T
+    scihub_knapsack.py --smaller -H https://qbt.hello.world -U admin -P adminadmin -S 2.2T
+
+Options:
+    --smaller           Prioritize from the smallest torrent sizes and work upward
+                        to larger sizes. Default is to prioritize larger sizes.
+    --dry-run           Only print the torrent names, total number of torrents, and
+                        their total combined size instead of adding them to the
+                        qBittorrent instance.
+    -H <hostname>       Hostname of the server where the qBittorrent instance is
+                        running.
+    -U <username>       Username of the user to login to the qBittorrent instance.
+    -P <password>       Password of the user to login to the qBittorrent instance.
+    -S <size>           The maximum size, in GiB or TiB, of the knapsack to add Sci
+                        Hub torrents to. Must be a positive integer or float. Must
+                        have either G or T on the end, which represents GiB or TiB.
+    -s <max_seeders>    Select torrents with <max_seeders> seeders. <max_seeders>
+                        is a positive integer argument.
+"""
+
+import json
+
+import requests
+from docopt import docopt
+from qbittorrent import Client
+
+
+def get_torrent_health_data() -> list[dict]:
+    """
+    Fetch Sci Hub torrent health checker data from the given URL. The URL
+    should refer to a JSON-formatted file.
+    """
+    TORRENT_HEALTH_URL = (
+        "https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
+    )
+    response = requests.get(TORRENT_HEALTH_URL, timeout=60)
+    return json.loads(response.text)
+
+
+def convert_size_to_bytes(size: str) -> int:
+    """
+    Convert the given size string to bytes.
+
+    Example: 42G --> 45097156608 bytes
+    """
+    if size.endswith("T"):
+        total_bytes = int(size.split("T")[0]) * (1024**4)
+
+    if size.endswith("G"):
+        total_bytes = int(size.split("G")[0]) * (1024**3)
+
+    return total_bytes
+
+
+def human_bytes(bites: int) -> str:
+    """
+    Convert bytes to KiB, MiB, GiB, or TiB.
+
+    Example: 45097156608 bytes -> 42 GiB
+    """
+    B = float(bites)
+    KiB = float(1024)
+    MiB = float(KiB**2)
+    GiB = float(KiB**3)
+    TiB = float(KiB**4)
+
+    match B:
+        case B if B < KiB:
+            return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")
+        case B if KiB <= B < MiB:
+            return "{0:.2f} KiB".format(B / KiB)
+        case B if MiB <= B < GiB:
+            return "{0:.2f} MiB".format(B / MiB)
+        case B if GiB <= B < TiB:
+            return "{0:.2f} GiB".format(B / GiB)
+        case B if TiB <= B:
+            return "{0:.2f} TiB".format(B / TiB)
+        case _:
+            return ""
+
+
+def get_knapsack_weight(knapsack: list[dict]) -> str:
+    """
+    Get the weight of the given knapsack in GiB or TiB.
+    """
+    return human_bytes(sum([torrent["size_bytes"] for torrent in knapsack]))
+
+
+def fill_knapsack(
+    max_seeders: int, knapsack_size: int, smaller: bool = False
+) -> list[dict]:
+    """
+    Fill the knapsack.
+
+    Arguments:
+    max_seeders: int    -- Select only torrents with this number of seeders
+    knapsack_size: int  -- The size in bytes of the knapsack
+    smaller: bool       -- Prioritize smaller sized torrents (Default = False)
+
+    Return value:
+    A list of dictionaries that represent the torrents.
+    """
+
+    # List of torrents with <max_seeders>
+    torrents = [t for t in get_torrent_health_data() if t["seeders"] <= max_seeders]
+
+    # Sorted list of torrents with <max_seeders>. If smaller == True, sort them
+    # in ascending order by size_bytes. Else sort them in descending order by
+    # size_bytes.
+    sorted_torrents = (
+        sorted(torrents, key=lambda d: d["size_bytes"])
+        if smaller == True
+        else sorted(torrents, key=lambda d: d["size_bytes"], reverse=True)
+    )
+
+    # Sum the sizes of each torrent in sorted_torrents and add them to the
+    # knapsack until it is filled, then return the knapsack.
+    sum = 0
+    knapsack = []
+    for torrent in sorted_torrents:
+        if sum + torrent["size_bytes"] >= knapsack_size:
+            break
+        sum += torrent["size_bytes"]
+        knapsack.append(torrent)
+
+    return knapsack
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)  # type: ignore
+    hostname = args["-H"]
+    username = args["-U"]
+    password = args["-P"]
+    max_seeders = int(args["-s"])
+    knapsack_size = convert_size_to_bytes(args["-S"])
+    smaller = args["--smaller"]
+    dry_run = args["--dry-run"]
+
+    # Initialize client and login
+    qb = Client(hostname)
+    qb.login(username=username, password=password)
+
+    # Fill the knapsack
+    knapsack = fill_knapsack(max_seeders, knapsack_size, smaller)
+
+    # If it's a dry run, only print the knapsack's contents. Otherwise,
+    # add the knapsack's contents to the qBittorrent instance.
+    # When finished, print the number of items and the combined weight of all
+    # items in the knapsack.
+    if args["--dry-run"]:
+        for torrent in knapsack:
+            print(torrent["name"])
+    else:
+        for torrent in knapsack:
+            qb.download_from_link(torrent["link"], category="scihub")
+            print(f"Added {torrent["name"]}")
+
+    print("----------------")
+    print(f"Count: {len(knapsack)} torrents")
+    print(f"Total combined size: {get_knapsack_weight(knapsack)}")
+    print("----------------")
--- a/bin/seed_scihub_max_seeders.py
+++ b/bin/seed_scihub_max_seeders.py
@ -9,14 +9,15 @@ instance.
 MAX_SEEDERS is a positive integer argument.

 Usage:
-    seed_scihub_max_seeders.py (HOSTNAME) (USERNAME) (PASSWORD) (MAX_SEEDERS)
+    seed_scihub_max_seeders.py [--only-count] (HOSTNAME) (USERNAME) (PASSWORD) (MAX_SEEDERS)
    seed_scihub_max_seeders.py -h

 Examples:
    seed_scihub_max_seeders.py "http://localhost:8080" "admin" "adminadmin" 4
-    seed_scihub_max_seeders.py "https://cat.seedhost.eu/lol/qbittorrent" "lol" "pw" 3
+    seed_scihub_max_seeders.py --only-count "https://cat.seedhost.eu/lol/qbittorrent" "lol" "pw" 3

 Options:
+    --only-count    do not add torrents, but only print the number of torrents with MAX_SEEDERS
    -h, --help      show this help message and exit
 """

@ -29,7 +30,7 @@ from qbittorrent import Client


 if __name__ == "__main__":
-    args = docopt(__doc__)
+    args = docopt(__doc__)  # type: ignore
    qb = Client(args["HOSTNAME"])
    qb.login(username=args["USERNAME"], password=args["PASSWORD"])

@ -39,6 +40,13 @@ if __name__ == "__main__":
    response = requests.get(TORRENT_HEALTH_URL, timeout=60)
    json_data = json.loads(response.text)

+    if args["--only-count"]:
+        sum = 0
+        for item in json_data:
+            if item["seeders"] <= int(args["MAX_SEEDERS"]):
+                sum += 1
+        print(f"Number of torrents with <= {int(args["MAX_SEEDERS"])} seeders: {sum}")
+    else:
        for item in json_data:
            if item["seeders"] <= int(args["MAX_SEEDERS"]):
                qb.download_from_link(item["link"], category="scihub")