gen-alt-text/gen_alt_text.py

120 lines
3.1 KiB
Python

#!/usr/bin/env python3
"""gen-alt-text
Usage:
gen-alt-text [-m MODEL] <path_to_image>
gen-alt-text -h
Options:
-h show this help message and exit
-m MODEL Use MODEL as the model. If MODEL is not provided, it defaults
to llama3.2-vision:11b.
Supported vision models:
- llama3.2-vision:11b
- llama3.2-vision:90b
- llava:7b
- llava:13b
- llava:34b
- llava-llama3:8b
- bakllava:7b
- moondream:1.8b
- llava-phi3:3.8b
- minicpm-v:8b
Examples:
gen-alt-text -m llama3.2-vision:90b ~/pictures/rubber_duck.jpg
gen-alt-text ~/pictures/coffee.jpg
"""
import os
from docopt import docopt
from ollama import Client, ResponseError
from rich.console import Console
from rich.markdown import Markdown
def main():
args = docopt(__doc__)
vision_models = [
"llama3.2-vision:11b",
"llama3.2-vision:90b",
"llava:7b",
"llava:13b",
"llava:34b",
"llava-llama3:8b",
"bakllava:7b",
"moondream:1.8b",
"llava-phi3:3.8b",
"minicpm-v:8b",
]
print()
console = Console()
if args["-m"]:
if args["-m"] not in vision_models:
console.print(
"You must use a vision model to generate alt-text from images."
)
print()
console.print("[bold magenta]Available vision models:[/bold magenta]")
for model in vision_models:
print(f"- {model}")
print()
exit("Select a model above and run the command again.")
else:
model = args["-m"]
else:
model = "llama3.2-vision:11b"
if os.getenv("OLLAMA_HOST"):
client = Client(host=os.getenv("OLLAMA_HOST"))
else:
client = Client(host="http://localhost:11434")
try:
with console.status(
f"[bold magenta] {model}[/bold magenta] is generating alt-text for "
+ args["<path_to_image>"]
+ "...",
spinner="aesthetic",
):
response = client.chat(
model=model,
messages=[
{
"role": "user",
"content": "Describe this image concisely",
"images": [args["<path_to_image>"]],
}
],
stream=False,
)
except ResponseError as e:
if e.status_code == 404:
with console.status(
f"[bold magenta] {model}[/bold magenta] was not found on the ollama server. Pulling it now...",
spinner="aesthetic",
):
client.pull(model=model)
console.print(
f"Successfully pulled [bold magenta]{model}[/bold magenta]. You may try running the command again."
)
exit(0)
else:
print("Error: ", e.error)
exit(1)
md_content = Markdown(response["message"]["content"])
console.print(md_content)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
exit("Keyboard interrupt detected. Exiting.")