mirror of
https://codeberg.org/hyperreal/admin-scripts
synced 2024-11-25 09:03:41 +01:00
Add scihub_knapsack.py
This commit is contained in:
parent
54ad29fab8
commit
d469edbca1
42
bin/fetch_scihub_infohashes.py
Executable file
42
bin/fetch_scihub_infohashes.py
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""fetch_scihub_infohashes.py
|
||||||
|
|
||||||
|
Description:
|
||||||
|
This script fetches the infohashes of all Sci Hub torrents and writes them to a
|
||||||
|
plaintext file. The plaintext file is intended to be appended to a bittorrent
|
||||||
|
tracker whitelist. E.g., /etc/opentracker/whitelist.txt.
|
||||||
|
|
||||||
|
Optionally set the TORRENT_JSON_URL for the Sci Hub torrent health checker, or
|
||||||
|
run the script with no arguments to use the default.
|
||||||
|
|
||||||
|
Default health check URL:
|
||||||
|
https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
fetch_scihub_infohashes.py [TORRENT_JSON_URL]
|
||||||
|
fetch_scihub_infohashes.py -h
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h, --help show this help message and exit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from docopt import docopt
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = docopt(__doc__) # type: ignore
|
||||||
|
url = (
|
||||||
|
args["TORRENT_JSON_URL"]
|
||||||
|
if args["TORRENT_JSON_URL"]
|
||||||
|
else "https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
|
||||||
|
)
|
||||||
|
response = requests.get(url, timeout=60)
|
||||||
|
json_data = json.loads(response.text)
|
||||||
|
torrent_infohashes = [f"{x["infohash"]}\n" for x in json_data]
|
||||||
|
|
||||||
|
with open(Path.cwd().joinpath("scihub_torrent_infohashes.txt"), "w") as tf:
|
||||||
|
tf.writelines(torrent_infohashes)
|
@ -46,7 +46,7 @@ def human_bytes(bites: int) -> str:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = docopt(__doc__)
|
args = docopt(__doc__) # type: ignore
|
||||||
|
|
||||||
# Initialize client and login
|
# Initialize client and login
|
||||||
qb = Client(args["HOSTNAME"])
|
qb = Client(args["HOSTNAME"])
|
||||||
@ -61,7 +61,7 @@ if __name__ == "__main__":
|
|||||||
total_completed_bytes = sum(completed_torrent_sizes)
|
total_completed_bytes = sum(completed_torrent_sizes)
|
||||||
|
|
||||||
# get total_added_bytes
|
# get total_added_bytes
|
||||||
total_added_bytes = sum([torrent["total_size"] for torrent in torrents]) # type: ignore
|
total_added_bytes = sum([torrent["total_size"] for torrent in qb.torrents()]) # type: ignore
|
||||||
|
|
||||||
# print the results
|
# print the results
|
||||||
print(f"\nTotal completed size: {human_bytes(total_completed_bytes)}")
|
print(f"\nTotal completed size: {human_bytes(total_completed_bytes)}")
|
||||||
|
180
bin/scihub_knapsack.py
Executable file
180
bin/scihub_knapsack.py
Executable file
@ -0,0 +1,180 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""scihub_knapsack.py
|
||||||
|
|
||||||
|
Description:
|
||||||
|
This script will add torrents to a qBittorrent instance until a specified size
|
||||||
|
limit is reached.
|
||||||
|
|
||||||
|
By default, the larger torrents are prioritized in descending order, but the
|
||||||
|
script can be run with the --smaller flag to prioritize smaller torrents in
|
||||||
|
ascending order.
|
||||||
|
|
||||||
|
The script will select only torrents with <max_seeders>.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
scihub_knapsack.py [--smaller] [--dry-run] -H <hostname> -U <username> -P <password> -S <size> -s <max_seeders>
|
||||||
|
scihub_knapsack.py -h
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
scihub_knapsack.py -H http://localhost:8080 -U admin -P adminadmin -S 42T
|
||||||
|
scihub_knapsack.py --smaller -H https://qbt.hello.world -U admin -P adminadmin -S 2.2T
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--smaller Prioritize from the smallest torrent sizes and work upward
|
||||||
|
to larger sizes. Default is to prioritize larger sizes.
|
||||||
|
--dry-run Only print the torrent names, total number of torrents, and
|
||||||
|
their total combined size instead of adding them to the
|
||||||
|
qBittorrent instance.
|
||||||
|
-H <hostname> Hostname of the server where the qBittorrent instance is
|
||||||
|
running.
|
||||||
|
-U <username> Username of the user to login to the qBittorrent instance.
|
||||||
|
-P <password> Password of the user to login to the qBittorrent instance.
|
||||||
|
-S <size> The maximum size, in GiB or TiB, of the knapsack to add Sci
|
||||||
|
Hub torrents to. Must be a positive integer or float. Must
|
||||||
|
have either G or T on the end, which represents GiB or TiB.
|
||||||
|
-s <max_seeders> Select torrents with <max_seeders> seeders. <max_seeders>
|
||||||
|
is a positive integer argument.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from docopt import docopt
|
||||||
|
from qbittorrent import Client
|
||||||
|
|
||||||
|
|
||||||
|
def get_torrent_health_data() -> list[dict]:
|
||||||
|
"""
|
||||||
|
Fetch Sci Hub torrent health checker data from the given URL. The URL
|
||||||
|
should refer to a JSON-formatted file.
|
||||||
|
"""
|
||||||
|
TORRENT_HEALTH_URL = (
|
||||||
|
"https://zrthstr.github.io/libgen_torrent_cardiography/torrent.json"
|
||||||
|
)
|
||||||
|
response = requests.get(TORRENT_HEALTH_URL, timeout=60)
|
||||||
|
return json.loads(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_size_to_bytes(size: str) -> int:
|
||||||
|
"""
|
||||||
|
Convert the given size string to bytes.
|
||||||
|
|
||||||
|
Example: 42G --> 45097156608 bytes
|
||||||
|
"""
|
||||||
|
if size.endswith("T"):
|
||||||
|
total_bytes = int(size.split("T")[0]) * (1024**4)
|
||||||
|
|
||||||
|
if size.endswith("G"):
|
||||||
|
total_bytes = int(size.split("G")[0]) * (1024**3)
|
||||||
|
|
||||||
|
return total_bytes
|
||||||
|
|
||||||
|
|
||||||
|
def human_bytes(bites: int) -> str:
|
||||||
|
"""
|
||||||
|
Convert bytes to KiB, MiB, GiB, or TiB.
|
||||||
|
|
||||||
|
Example: 45097156608 bytes -> 42 GiB
|
||||||
|
"""
|
||||||
|
B = float(bites)
|
||||||
|
KiB = float(1024)
|
||||||
|
MiB = float(KiB**2)
|
||||||
|
GiB = float(KiB**3)
|
||||||
|
TiB = float(KiB**4)
|
||||||
|
|
||||||
|
match B:
|
||||||
|
case B if B < KiB:
|
||||||
|
return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")
|
||||||
|
case B if KiB <= B < MiB:
|
||||||
|
return "{0:.2f} KiB".format(B / KiB)
|
||||||
|
case B if MiB <= B < GiB:
|
||||||
|
return "{0:.2f} MiB".format(B / MiB)
|
||||||
|
case B if GiB <= B < TiB:
|
||||||
|
return "{0:.2f} GiB".format(B / GiB)
|
||||||
|
case B if TiB <= B:
|
||||||
|
return "{0:.2f} TiB".format(B / TiB)
|
||||||
|
case _:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_knapsack_weight(knapsack: list[dict]) -> str:
|
||||||
|
"""
|
||||||
|
Get the weight of the given knapsack in GiB or TiB.
|
||||||
|
"""
|
||||||
|
return human_bytes(sum([torrent["size_bytes"] for torrent in knapsack]))
|
||||||
|
|
||||||
|
|
||||||
|
def fill_knapsack(
|
||||||
|
max_seeders: int, knapsack_size: int, smaller: bool = False
|
||||||
|
) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Fill the knapsack.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
max_seeders: int -- Select only torrents with this number of seeders
|
||||||
|
knapsack_size: int -- The size in bytes of the knapsack
|
||||||
|
smaller: bool -- Prioritize smaller sized torrents (Default = False)
|
||||||
|
|
||||||
|
Return value:
|
||||||
|
A list of dictionaries that represent the torrents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# List of torrents with <max_seeders>
|
||||||
|
torrents = [t for t in get_torrent_health_data() if t["seeders"] <= max_seeders]
|
||||||
|
|
||||||
|
# Sorted list of torrents with <max_seeders>. If smaller == True, sort them
|
||||||
|
# in ascending order by size_bytes. Else sort them in descending order by
|
||||||
|
# size_bytes.
|
||||||
|
sorted_torrents = (
|
||||||
|
sorted(torrents, key=lambda d: d["size_bytes"])
|
||||||
|
if smaller == True
|
||||||
|
else sorted(torrents, key=lambda d: d["size_bytes"], reverse=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sum the sizes of each torrent in sorted_torrents and add them to the
|
||||||
|
# knapsack until it is filled, then return the knapsack.
|
||||||
|
sum = 0
|
||||||
|
knapsack = []
|
||||||
|
for torrent in sorted_torrents:
|
||||||
|
if sum + torrent["size_bytes"] >= knapsack_size:
|
||||||
|
break
|
||||||
|
sum += torrent["size_bytes"]
|
||||||
|
knapsack.append(torrent)
|
||||||
|
|
||||||
|
return knapsack
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = docopt(__doc__) # type: ignore
|
||||||
|
hostname = args["-H"]
|
||||||
|
username = args["-U"]
|
||||||
|
password = args["-P"]
|
||||||
|
max_seeders = int(args["-s"])
|
||||||
|
knapsack_size = convert_size_to_bytes(args["-S"])
|
||||||
|
smaller = args["--smaller"]
|
||||||
|
dry_run = args["--dry-run"]
|
||||||
|
|
||||||
|
# Initialize client and login
|
||||||
|
qb = Client(hostname)
|
||||||
|
qb.login(username=username, password=password)
|
||||||
|
|
||||||
|
# Fill the knapsack
|
||||||
|
knapsack = fill_knapsack(max_seeders, knapsack_size, smaller)
|
||||||
|
|
||||||
|
# If it's a dry run, only print the knapsack's contents. Otherwise,
|
||||||
|
# add the knapsack's contents to the qBittorrent instance.
|
||||||
|
# When finished, print the number of items and the combined weight of all
|
||||||
|
# items in the knapsack.
|
||||||
|
if args["--dry-run"]:
|
||||||
|
for torrent in knapsack:
|
||||||
|
print(torrent["name"])
|
||||||
|
else:
|
||||||
|
for torrent in knapsack:
|
||||||
|
qb.download_from_link(torrent["link"], category="scihub")
|
||||||
|
print(f"Added {torrent["name"]}")
|
||||||
|
|
||||||
|
print("----------------")
|
||||||
|
print(f"Count: {len(knapsack)} torrents")
|
||||||
|
print(f"Total combined size: {get_knapsack_weight(knapsack)}")
|
||||||
|
print("----------------")
|
@ -9,14 +9,15 @@ instance.
|
|||||||
MAX_SEEDERS is a positive integer argument.
|
MAX_SEEDERS is a positive integer argument.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
seed_scihub_max_seeders.py (HOSTNAME) (USERNAME) (PASSWORD) (MAX_SEEDERS)
|
seed_scihub_max_seeders.py [--only-count] (HOSTNAME) (USERNAME) (PASSWORD) (MAX_SEEDERS)
|
||||||
seed_scihub_max_seeders.py -h
|
seed_scihub_max_seeders.py -h
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
seed_scihub_max_seeders.py "http://localhost:8080" "admin" "adminadmin" 4
|
seed_scihub_max_seeders.py "http://localhost:8080" "admin" "adminadmin" 4
|
||||||
seed_scihub_max_seeders.py "https://cat.seedhost.eu/lol/qbittorrent" "lol" "pw" 3
|
seed_scihub_max_seeders.py --only-count "https://cat.seedhost.eu/lol/qbittorrent" "lol" "pw" 3
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
|
--only-count do not add torrents, but only print the number of torrents with MAX_SEEDERS
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -29,7 +30,7 @@ from qbittorrent import Client
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = docopt(__doc__)
|
args = docopt(__doc__) # type: ignore
|
||||||
qb = Client(args["HOSTNAME"])
|
qb = Client(args["HOSTNAME"])
|
||||||
qb.login(username=args["USERNAME"], password=args["PASSWORD"])
|
qb.login(username=args["USERNAME"], password=args["PASSWORD"])
|
||||||
|
|
||||||
@ -39,6 +40,13 @@ if __name__ == "__main__":
|
|||||||
response = requests.get(TORRENT_HEALTH_URL, timeout=60)
|
response = requests.get(TORRENT_HEALTH_URL, timeout=60)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
|
|
||||||
|
if args["--only-count"]:
|
||||||
|
sum = 0
|
||||||
|
for item in json_data:
|
||||||
|
if item["seeders"] <= int(args["MAX_SEEDERS"]):
|
||||||
|
sum += 1
|
||||||
|
print(f"Number of torrents with <= {int(args["MAX_SEEDERS"])} seeders: {sum}")
|
||||||
|
else:
|
||||||
for item in json_data:
|
for item in json_data:
|
||||||
if item["seeders"] <= int(args["MAX_SEEDERS"]):
|
if item["seeders"] <= int(args["MAX_SEEDERS"]):
|
||||||
qb.download_from_link(item["link"], category="scihub")
|
qb.download_from_link(item["link"], category="scihub")
|
||||||
|
Loading…
Reference in New Issue
Block a user