Use f-strings instead of .format()

This commit is contained in:
Jeffrey Serio 2023-09-16 11:48:54 -05:00
parent 35b5438371
commit ef29e741f9
2 changed files with 31 additions and 44 deletions

View File

@ -16,7 +16,6 @@ import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from rich.console import Console from rich.console import Console
from rich.progress import Progress from rich.progress import Progress
from rich.tree import Tree
console = Console() console = Console()
@ -24,17 +23,11 @@ console = Console()
class FandomWiki: class FandomWiki:
def __init__(self, name: str): def __init__(self, name: str):
self.name = name self.name = name
self.canonical_url = "https://{}.fandom.com".format(name) self.canonical_url = f"https://{name}.fandom.com"
self.breezewiki_url = "https://wiki.hyperreal.coffee/{}".format(name) self.breezewiki_url = f"https://wiki.hyperreal.coffee/{name}"
self.site_dir = Path.cwd().joinpath("{}.fandom.com".format(name)) self.site_dir = Path.cwd().joinpath(f"{name}.fandom.com")
self.images_dir = self.site_dir.joinpath("images") self.images_dir = self.site_dir.joinpath("images")
if not self.site_dir.exists():
self.site_dir.mkdir()
if not self.images_dir.exists():
self.images_dir.mkdir()
try: try:
response = requests.get(self.canonical_url) response = requests.get(self.canonical_url)
response.raise_for_status() response.raise_for_status()
@ -45,8 +38,14 @@ class FandomWiki:
console.print("(3) The server hosting that wiki is down for some reason.\n") console.print("(3) The server hosting that wiki is down for some reason.\n")
console.print(f"HTTP error: {http_err}") console.print(f"HTTP error: {http_err}")
if not self.site_dir.exists():
self.site_dir.mkdir()
if not self.images_dir.exists():
self.images_dir.mkdir()
def get_hop0_urls(self) -> list: def get_hop0_urls(self) -> list:
starting_url = "{}/wiki/Local_Sitemap".format(self.canonical_url) starting_url = f"{self.canonical_url}/wiki/Local_Sitemap"
hop0_urls = list() hop0_urls = list()
while True: while True:
@ -62,18 +61,12 @@ class FandomWiki:
break break
else: else:
if len(mw_allpages_nav.find_all("a")) < 2: if len(mw_allpages_nav.find_all("a")) < 2:
starting_url = "{}{}".format( starting_url = f"{self.canonical_url}{mw_allpages_nav.find_all('a')[0].get('href')}"
self.canonical_url,
mw_allpages_nav.find_all("a")[0].get("href"),
)
else: else:
starting_url = "{}{}".format( starting_url = f"{self.canonical_url}{mw_allpages_nav.find_all('a')[1].get('href')}"
self.canonical_url,
mw_allpages_nav.find_all("a")[1].get("href"),
)
hop0_urls.append(starting_url) hop0_urls.append(starting_url)
console.print("[[bold]HOP 0[/bold]] {}".format(starting_url)) console.print(f"[[bold]HOP 0[/bold]] {starting_url}")
return hop0_urls return hop0_urls
@ -90,9 +83,9 @@ class FandomWiki:
if "Local_Sitemap" not in item.get( if "Local_Sitemap" not in item.get(
"href" "href"
) and "Special:" not in item.get("href"): ) and "Special:" not in item.get("href"):
new_url = "{}{}".format(self.breezewiki_url, item.get("href")) new_url = f"{self.breezewiki_url}{item.get('href')}"
hop1_urls.append(new_url) hop1_urls.append(new_url)
console.print("[[bold]HOP 1[/bold]] {}".format(new_url)) console.print(f"[[bold]HOP 1[/bold]] {new_url}")
return hop1_urls return hop1_urls
@ -111,13 +104,11 @@ class FandomWiki:
response = requests.get(page) response = requests.get(page)
response.raise_for_status() response.raise_for_status()
css_filename = self.site_dir.joinpath( css_filename = self.site_dir.joinpath(f"proxy{css_pages.index(page)}")
"proxy{}.css".format(css_pages.index(page))
)
with open(css_filename, "wb") as outfile: with open(css_filename, "wb") as outfile:
outfile.write(response.content) outfile.write(response.content)
console.print("[[bold green]CSS[/bold green]] {}".format(css_filename)) console.print(f"[[bold green]CSS[/bold green]] {css_filename}")
def save_img(self, img_url: str): def save_img(self, img_url: str):
filename = self.images_dir.joinpath(Path(img_url.split("/revision")[0]).name) filename = self.images_dir.joinpath(Path(img_url.split("/revision")[0]).name)
@ -129,11 +120,9 @@ class FandomWiki:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
outfile.write(chunk) outfile.write(chunk)
console.print("[[bold green]IMG[/bold green]] {}".format(filename)) console.print(f"[[bold green]IMG[/bold green]] {filename}")
else: else:
console.print( console.print(f"[[bold yellow]IMG (EXISTS)[/bold yellow]] {filename}")
"[[bold yellow]IMG (EXISTS)[/bold yellow]] {}".format(filename)
)
def fetch_all_images(self, page_url: str): def fetch_all_images(self, page_url: str):
response = requests.get(page_url) response = requests.get(page_url)
@ -150,7 +139,7 @@ class FandomWiki:
self.save_img(img_url) self.save_img(img_url)
def save_page(self, url: str): def save_page(self, url: str):
filename = self.site_dir.joinpath("{}.html".format(url.split("/")[-1])) filename = self.site_dir.joinpath(f"{url.split('/')[-1]}.html")
if not filename.exists(): if not filename.exists():
response = requests.get(url) response = requests.get(url)
response.raise_for_status() response.raise_for_status()
@ -168,7 +157,7 @@ class FandomWiki:
"link", "link",
rel="stylesheet", rel="stylesheet",
type="text/css", type="text/css",
href="proxy{}.css".format(i), href=f"proxy{i}.css",
) )
) )
@ -178,19 +167,17 @@ class FandomWiki:
for link in soup.find_all("a"): for link in soup.find_all("a"):
if link.get("href") and link.get("href").startswith( if link.get("href") and link.get("href").startswith(
"/{}/wiki".format(self.name) f"/{self.name}/wiki"
): ):
link_basename = link.get("href").partition("/wiki/")[2] link_basename = link.get("href").partition("/wiki/")[2]
link["href"] = "{}/{}.html".format(self.site_dir, link_basename) link["href"] = f"{self.site_dir}/{link_basename}.html"
with open(filename, "w") as outfile: with open(filename, "w") as outfile:
outfile.write(soup.prettify()) outfile.write(soup.prettify())
console.print("[[bold green]HTML[/bold green]] {}".format(filename)) console.print(f"[[bold green]HTML[/bold green]] {filename}")
else: else:
console.print( console.print(f"[[bold yellow]HTML (EXISTS)[/bold yellow]] {filename}")
"[[bold yellow]HTML (EXISTS)[/bold yellow]] {}".format(filename)
)
def fetch_all_pages(self, hop1_urls: list): def fetch_all_pages(self, hop1_urls: list):
self.save_css() self.save_css()
@ -207,7 +194,7 @@ class FandomWiki:
if self.images_dir.joinpath(f).is_file() if self.images_dir.joinpath(f).is_file()
] ]
img_archive_filename = "{}-{}.tar.xz".format(self.images_dir, timestamp) img_archive_filename = f"{self.images_dir}-{timestamp}.tar.xz"
with Progress() as progress: with Progress() as progress:
task = progress.add_task("[cyan]Archiving images...", total=len(img_files)) task = progress.add_task("[cyan]Archiving images...", total=len(img_files))
@ -230,7 +217,7 @@ class FandomWiki:
if self.site_dir.joinpath(f).is_file or self.site_dir.joinpath(f).is_dir() if self.site_dir.joinpath(f).is_file or self.site_dir.joinpath(f).is_dir()
] ]
web_archive_filename = "{}-{}.tar.gz".format(self.site_dir, timestamp) web_archive_filename = f"{self.site_dir}-{timestamp}.tar.gz"
with Progress() as progress: with Progress() as progress:
task = progress.add_task( task = progress.add_task(
@ -249,8 +236,8 @@ class FandomWiki:
shutil.rmtree(self.site_dir, ignore_errors=True) shutil.rmtree(self.site_dir, ignore_errors=True)
console.print("\nTotal web files scraped: {}".format(len(web_files))) console.print(f"\nTotal web files scraped: {len(web_files)}")
console.print("Total images scraped: {}".format(len(img_files))) console.print(f"Total images scraped: {len(img_files)}")
def archive_site(name: str): def archive_site(name: str):

View File

@ -1,10 +1,10 @@
[tool.poetry] [tool.poetry]
name = "archive-fandom-wiki" name = "archive-fandom-wiki"
version = "0.1.0" version = "0.1.1"
description = "Archive fandom wikis" description = "Archive fandom wikis"
authors = ["Jeffrey Serio <hyperreal@fedoraproject.org>"] authors = ["Jeffrey Serio <hyperreal@fedoraproject.org>"]
license = "GPL-3.0" license = "GPL-3.0"
readme = "README.md" readme = "README.org"
packages = [{include = "archive_fandom_wiki"}] packages = [{include = "archive_fandom_wiki"}]
[tool.poetry.dependencies] [tool.poetry.dependencies]