#!/usr/bin/env python3
import argparse
from openai import OpenAI


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--base-url", default="http://127.0.0.1:30000/v1")
    parser.add_argument("--model", default="google/gemma-4-26B-A4B-it")
    parser.add_argument("--api-key", default="EMPTY")
    args = parser.parse_args()

    client = OpenAI(base_url=args.base_url, api_key=args.api_key, timeout=600)
    checks = [
        "Say exactly: Gemma 4 smoke test ready.",
        "Which is larger, 9.11 or 9.9? Answer in one sentence.",
        "Write a Python function add(a, b) with no explanation.",
        "Return JSON only with keys ok=true and model='gemma4'.",
    ]

    for index, prompt in enumerate(checks, start=1):
        response = client.chat.completions.create(
            model=args.model,
            messages=[
                {"role": "system", "content": "You are Gemma, a helpful AI assistant from Google DeepMind."},
                {"role": "user", "content": prompt},
            ],
            temperature=0.0,
            max_tokens=512,
        )
        message = response.choices[0].message
        print(f"\n--- check {index} ---")
        reasoning = getattr(message, "reasoning", None) or getattr(message, "reasoning_content", None)
        if reasoning:
            print("[reasoning present]")
        print(message.content)


if __name__ == "__main__":
    main()
