admin-scripts/bin/scihub_knapsack.py

#!/usr/bin/env python3

"""scihub_knapsack.py

Description:
This script will add torrents to a qBittorrent instance until a specified size
limit is reached.

By default, the larger torrents are prioritized in descending order, but the
script can be run with the --smaller flag to prioritize smaller torrents in
ascending order.

The script will select only torrents with less than or equal to <max_seeders>.

Usage:
    scihub_knapsack.py [--smaller] [--dry-run] -H <hostname> -U <username> -P <password> -S <size> -s <max_seeders>
    scihub_knapsack.py -h

Examples:
    scihub_knapsack.py -H http://localhost:8080 -U admin -P adminadmin -S 42T
    scihub_knapsack.py --smaller -H https://qbt.hello.world -U admin -P adminadmin -S 2.2T

Options:
    --smaller           Prioritize from the smallest torrent sizes and work upward
                        to larger sizes. Default is to prioritize larger sizes.
    --dry-run           Only print the torrent names, total number of torrents, and
                        their total combined size instead of adding them to the
                        qBittorrent instance.
    -H <hostname>       Hostname of the server where the qBittorrent instance is
                        running.
    -U <username>       Username of the user to login to the qBittorrent instance.
    -P <password>       Password of the user to login to the qBittorrent instance.
    -S <size>           The maximum size, in GiB or TiB, of the knapsack to add Sci
                        Hub torrents to. Must be a positive integer or float. Must
                        have either G or T on the end, which represents GiB or TiB.
    -s <max_seeders>    Select torrents with less than or equal to <max_seeders>
                        seeders. <max_seeders> is a positive integer.
"""

import json

import requests
from docopt import docopt
from qbittorrent import Client


def get_torrent_health_data() -> list[dict]:
    """
    Fetch Sci Hub torrent health checker data from the given URL. The URL
    should refer to a JSON-formatted file.
    """
    TORRENT_HEALTH_URL = (
        "https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
    )
    response = requests.get(TORRENT_HEALTH_URL, timeout=60)
    return json.loads(response.text)


def convert_size_to_bytes(size: str) -> int:
    """
    Convert the given size string to bytes.

    Example: 42G --> 45097156608 bytes
    """
    if size.endswith("T"):
        total_bytes = int(size.split("T")[0]) * (1024**4)

    if size.endswith("G"):
        total_bytes = int(size.split("G")[0]) * (1024**3)

    return total_bytes


def human_bytes(bites: int) -> str:
    """
    Convert bytes to KiB, MiB, GiB, or TiB.

    Example: 45097156608 bytes -> 42 GiB
    """
    B = float(bites)
    KiB = float(1024)
    MiB = float(KiB**2)
    GiB = float(KiB**3)
    TiB = float(KiB**4)

    match B:
        case B if B < KiB:
            return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")
        case B if KiB <= B < MiB:
            return "{0:.2f} KiB".format(B / KiB)
        case B if MiB <= B < GiB:
            return "{0:.2f} MiB".format(B / MiB)
        case B if GiB <= B < TiB:
            return "{0:.2f} GiB".format(B / GiB)
        case B if TiB <= B:
            return "{0:.2f} TiB".format(B / TiB)
        case _:
            return ""


def get_knapsack_weight(knapsack: list[dict]) -> str:
    """
    Get the weight of the given knapsack in GiB or TiB.
    """
    return human_bytes(sum([torrent["size_bytes"] for torrent in knapsack]))


def fill_knapsack(
    max_seeders: int, knapsack_size: int, smaller: bool = False
) -> list[dict]:
    """
    Fill the knapsack.

    Arguments:
    max_seeders: int    -- Select only torrents with less than or equal to
                           this number of seeders
    knapsack_size: int  -- The size in bytes of the knapsack
    smaller: bool       -- Prioritize smaller sized torrents (Default = False)

    Return value:
    A list of dictionaries that represent the torrents.
    """

    # List of torrents with less than or equal to <max_seeders>
    torrents = [t for t in get_torrent_health_data() if t["seeders"] <= max_seeders]

    # Sorted list of torrents with <max_seeders>. If smaller == True, sort them
    # in ascending order by size_bytes. Else sort them in descending order by
    # size_bytes.
    sorted_torrents = (
        sorted(torrents, key=lambda d: d["size_bytes"])
        if smaller == True
        else sorted(torrents, key=lambda d: d["size_bytes"], reverse=True)
    )

    # Sum the sizes of each torrent in sorted_torrents and add them to the
    # knapsack until it is filled, then return the knapsack.
    sum = 0
    knapsack = []
    for torrent in sorted_torrents:
        if sum + torrent["size_bytes"] >= knapsack_size:
            break
        sum += torrent["size_bytes"]
        knapsack.append(torrent)

    return knapsack


if __name__ == "__main__":
    args = docopt(__doc__)  # type: ignore
    hostname = args["-H"]
    username = args["-U"]
    password = args["-P"]
    max_seeders = int(args["-s"])
    knapsack_size = convert_size_to_bytes(args["-S"])
    smaller = args["--smaller"]
    dry_run = args["--dry-run"]

    # Initialize client and login
    qb = Client(hostname)
    qb.login(username=username, password=password)

    # Fill the knapsack
    knapsack = fill_knapsack(max_seeders, knapsack_size, smaller)

    # If it's a dry run, only print the knapsack's contents. Otherwise,
    # add the knapsack's contents to the qBittorrent instance.
    # When finished, print the number of items and the combined weight of all
    # items in the knapsack.
    if args["--dry-run"]:
        for torrent in knapsack:
            print(torrent["name"])
    else:
        for torrent in knapsack:
            qb.download_from_link(torrent["link"], category="scihub")
            print(f"Added {torrent["name"]}")

    print("----------------")
    print(f"Count: {len(knapsack)} torrents")
    print(f"Total combined size: {get_knapsack_weight(knapsack)}")
    print("----------------")
Add scihub_knapsack.py 2024-08-05 00:18:24 +02:00			`#!/usr/bin/env python3`

			`"""scihub_knapsack.py`

			`Description:`
			`This script will add torrents to a qBittorrent instance until a specified size`
			`limit is reached.`

			`By default, the larger torrents are prioritized in descending order, but the`
			`script can be run with the --smaller flag to prioritize smaller torrents in`
			`ascending order.`

Correct docstrings 2024-08-06 03:28:43 +02:00			`The script will select only torrents with less than or equal to <max_seeders>.`
Add scihub_knapsack.py 2024-08-05 00:18:24 +02:00
			`Usage:`
			`scihub_knapsack.py [--smaller] [--dry-run] -H <hostname> -U <username> -P <password> -S <size> -s <max_seeders>`
			`scihub_knapsack.py -h`

			`Examples:`
			`scihub_knapsack.py -H http://localhost:8080 -U admin -P adminadmin -S 42T`
			`scihub_knapsack.py --smaller -H https://qbt.hello.world -U admin -P adminadmin -S 2.2T`

			`Options:`
			`--smaller Prioritize from the smallest torrent sizes and work upward`
			`to larger sizes. Default is to prioritize larger sizes.`
			`--dry-run Only print the torrent names, total number of torrents, and`
			`their total combined size instead of adding them to the`
			`qBittorrent instance.`
			`-H <hostname> Hostname of the server where the qBittorrent instance is`
			`running.`
			`-U <username> Username of the user to login to the qBittorrent instance.`
			`-P <password> Password of the user to login to the qBittorrent instance.`
			`-S <size> The maximum size, in GiB or TiB, of the knapsack to add Sci`
			`Hub torrents to. Must be a positive integer or float. Must`
			`have either G or T on the end, which represents GiB or TiB.`
Correct docstrings 2024-08-06 03:28:43 +02:00			`-s <max_seeders> Select torrents with less than or equal to <max_seeders>`
			`seeders. <max_seeders> is a positive integer.`
Add scihub_knapsack.py 2024-08-05 00:18:24 +02:00			`"""`

			`import json`

			`import requests`
			`from docopt import docopt`
			`from qbittorrent import Client`


			`def get_torrent_health_data() -> list[dict]:`
			`"""`
			`Fetch Sci Hub torrent health checker data from the given URL. The URL`
			`should refer to a JSON-formatted file.`
			`"""`
			`TORRENT_HEALTH_URL = (`
			`"https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"`
			`)`
			`response = requests.get(TORRENT_HEALTH_URL, timeout=60)`
			`return json.loads(response.text)`


			`def convert_size_to_bytes(size: str) -> int:`
			`"""`
			`Convert the given size string to bytes.`

			`Example: 42G --> 45097156608 bytes`
			`"""`
			`if size.endswith("T"):`
			`total_bytes = int(size.split("T")[0]) * (1024**4)`

			`if size.endswith("G"):`
			`total_bytes = int(size.split("G")[0]) * (1024**3)`

			`return total_bytes`


			`def human_bytes(bites: int) -> str:`
			`"""`
			`Convert bytes to KiB, MiB, GiB, or TiB.`

			`Example: 45097156608 bytes -> 42 GiB`
			`"""`
			`B = float(bites)`
			`KiB = float(1024)`
			`MiB = float(KiB**2)`
			`GiB = float(KiB**3)`
			`TiB = float(KiB**4)`

			`match B:`
			`case B if B < KiB:`
			`return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")`
			`case B if KiB <= B < MiB:`
			`return "{0:.2f} KiB".format(B / KiB)`
			`case B if MiB <= B < GiB:`
			`return "{0:.2f} MiB".format(B / MiB)`
			`case B if GiB <= B < TiB:`
			`return "{0:.2f} GiB".format(B / GiB)`
			`case B if TiB <= B:`
			`return "{0:.2f} TiB".format(B / TiB)`
			`case _:`
			`return ""`


			`def get_knapsack_weight(knapsack: list[dict]) -> str:`
			`"""`
			`Get the weight of the given knapsack in GiB or TiB.`
			`"""`
			`return human_bytes(sum([torrent["size_bytes"] for torrent in knapsack]))`


			`def fill_knapsack(`
			`max_seeders: int, knapsack_size: int, smaller: bool = False`
			`) -> list[dict]:`
			`"""`
			`Fill the knapsack.`

			`Arguments:`
Correct docstrings 2024-08-06 03:28:43 +02:00			`max_seeders: int -- Select only torrents with less than or equal to`
			`this number of seeders`
Add scihub_knapsack.py 2024-08-05 00:18:24 +02:00			`knapsack_size: int -- The size in bytes of the knapsack`
			`smaller: bool -- Prioritize smaller sized torrents (Default = False)`

			`Return value:`
			`A list of dictionaries that represent the torrents.`
			`"""`

Correct docstrings 2024-08-06 03:28:43 +02:00			`# List of torrents with less than or equal to <max_seeders>`
Add scihub_knapsack.py 2024-08-05 00:18:24 +02:00			`torrents = [t for t in get_torrent_health_data() if t["seeders"] <= max_seeders]`

			`# Sorted list of torrents with <max_seeders>. If smaller == True, sort them`
			`# in ascending order by size_bytes. Else sort them in descending order by`
			`# size_bytes.`
			`sorted_torrents = (`
			`sorted(torrents, key=lambda d: d["size_bytes"])`
			`if smaller == True`
			`else sorted(torrents, key=lambda d: d["size_bytes"], reverse=True)`
			`)`

			`# Sum the sizes of each torrent in sorted_torrents and add them to the`
			`# knapsack until it is filled, then return the knapsack.`
			`sum = 0`
			`knapsack = []`
			`for torrent in sorted_torrents:`
			`if sum + torrent["size_bytes"] >= knapsack_size:`
			`break`
			`sum += torrent["size_bytes"]`
			`knapsack.append(torrent)`

			`return knapsack`


			`if __name__ == "__main__":`
			`args = docopt(__doc__) # type: ignore`
			`hostname = args["-H"]`
			`username = args["-U"]`
			`password = args["-P"]`
			`max_seeders = int(args["-s"])`
			`knapsack_size = convert_size_to_bytes(args["-S"])`
			`smaller = args["--smaller"]`
			`dry_run = args["--dry-run"]`

			`# Initialize client and login`
			`qb = Client(hostname)`
			`qb.login(username=username, password=password)`

			`# Fill the knapsack`
			`knapsack = fill_knapsack(max_seeders, knapsack_size, smaller)`

			`# If it's a dry run, only print the knapsack's contents. Otherwise,`
			`# add the knapsack's contents to the qBittorrent instance.`
			`# When finished, print the number of items and the combined weight of all`
			`# items in the knapsack.`
			`if args["--dry-run"]:`
			`for torrent in knapsack:`
			`print(torrent["name"])`
			`else:`
			`for torrent in knapsack:`
			`qb.download_from_link(torrent["link"], category="scihub")`
			`print(f"Added {torrent["name"]}")`

			`print("----------------")`
			`print(f"Count: {len(knapsack)} torrents")`
			`print(f"Total combined size: {get_knapsack_weight(knapsack)}")`
			`print("----------------")`