{
  "metadata": {
    "model": "google/gemma-4-26B-A4B-it",
    "server": "127.0.0.1:30000",
    "timestamp": "2026-05-08T02:06:28.535725",
    "duration_per_test": 30.0,
    "max_tokens": 8192,
    "ignore_eos": true,
    "min_tokens": 8192,
    "max_total_tokens": 1036299,
    "concurrency_levels": [
      1,
      4,
      8
    ],
    "context_lengths": [
      0,
      8192,
      32768
    ]
  },
  "prefill": {
    "8192": {
      "ttft_seconds": 0.131,
      "tok_per_sec": 76424.0
    },
    "16384": {
      "ttft_seconds": 0.315,
      "tok_per_sec": 56335.0
    },
    "32768": {
      "ttft_seconds": 1.018,
      "tok_per_sec": 32949.0
    },
    "65536": {
      "ttft_seconds": 3.831,
      "tok_per_sec": 17215.0
    },
    "131072": {
      "ttft_seconds": 14.671,
      "tok_per_sec": 8949.0
    }
  },
  "results": [
    {
      "concurrency": 1,
      "context_tokens": 0,
      "aggregate_tps": 352.93802944187564,
      "per_request_avg_tps": 352.93802944187564,
      "ttft_avg": 0.029101418000209378,
      "ttft_p50": 0.029101418000209378,
      "ttft_p99": 0.029101418000209378,
      "total_tokens": 8192,
      "wall_time": 25.216509021000093,
      "num_completed": 1,
      "num_errors": 0,
      "server_gen_throughput": 352.93802944187564,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 1.0
    },
    {
      "concurrency": 4,
      "context_tokens": 0,
      "aggregate_tps": 1013.1814360661432,
      "per_request_avg_tps": 253.2953590165358,
      "ttft_avg": 0.04461581275018034,
      "ttft_p50": 0.05005782200032627,
      "ttft_p99": 0.05031635258025744,
      "total_tokens": 30829,
      "wall_time": 30.252316875999895,
      "num_completed": 4,
      "num_errors": 0,
      "server_gen_throughput": 1013.1814360661432,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.96
    },
    {
      "concurrency": 8,
      "context_tokens": 0,
      "aggregate_tps": 1785.9126189211445,
      "per_request_avg_tps": 223.23907736514306,
      "ttft_avg": 0.04010085312518186,
      "ttft_p50": 0.037587713500215614,
      "ttft_p99": 0.05607433368052625,
      "total_tokens": 85965,
      "wall_time": 30.248987627000133,
      "num_completed": 8,
      "num_errors": 0,
      "server_gen_throughput": 1785.9126189211445,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.843125
    },
    {
      "concurrency": 1,
      "context_tokens": 8192,
      "aggregate_tps": 288.54811885051345,
      "per_request_avg_tps": 288.54811885051345,
      "ttft_avg": 0.04423867700006667,
      "ttft_p50": 0.04423867700006667,
      "ttft_p99": 0.04423867700006667,
      "total_tokens": 9408,
      "wall_time": 30.267388837,
      "num_completed": 1,
      "num_errors": 0,
      "server_gen_throughput": 288.54811885051345,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.97
    },
    {
      "concurrency": 4,
      "context_tokens": 8192,
      "aggregate_tps": 792.9608607160002,
      "per_request_avg_tps": 198.24021517900005,
      "ttft_avg": 0.07702443800008041,
      "ttft_p50": 0.0783957945000111,
      "ttft_p99": 0.09065683220039318,
      "total_tokens": 24569,
      "wall_time": 30.246081969999977,
      "num_completed": 4,
      "num_errors": 0,
      "server_gen_throughput": 792.9608607160002,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.8875
    },
    {
      "concurrency": 8,
      "context_tokens": 8192,
      "aggregate_tps": 1784.4249237969261,
      "per_request_avg_tps": 223.05311547461577,
      "ttft_avg": 0.12794574100007594,
      "ttft_p50": 0.1193320790002872,
      "ttft_p99": 0.1557766627996807,
      "total_tokens": 57247,
      "wall_time": 30.265766356999848,
      "num_completed": 8,
      "num_errors": 0,
      "server_gen_throughput": 1784.4249237969261,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.93
    },
    {
      "concurrency": 1,
      "context_tokens": 32768,
      "aggregate_tps": 189.5407660294026,
      "per_request_avg_tps": 189.5407660294026,
      "ttft_avg": 0.09317934199953015,
      "ttft_p50": 0.09317934199953015,
      "ttft_p99": 0.09317934199953015,
      "total_tokens": 4058,
      "wall_time": 30.259028581000166,
      "num_completed": 1,
      "num_errors": 0,
      "server_gen_throughput": 189.5407660294026,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.995
    },
    {
      "concurrency": 4,
      "context_tokens": 32768,
      "aggregate_tps": 744.7501368886174,
      "per_request_avg_tps": 186.18753422215434,
      "ttft_avg": 0.2418611595001039,
      "ttft_p50": 0.2337203709998903,
      "ttft_p99": 0.2657406174004609,
      "total_tokens": 29951,
      "wall_time": 30.274750239999776,
      "num_completed": 4,
      "num_errors": 0,
      "server_gen_throughput": 744.7501368886174,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 1.0
    },
    {
      "concurrency": 8,
      "context_tokens": 32768,
      "aggregate_tps": 1284.232624033095,
      "per_request_avg_tps": 160.52907800413686,
      "ttft_avg": 0.48123524387483485,
      "ttft_p50": 0.47579310349965453,
      "ttft_p99": 0.5164283592197989,
      "total_tokens": 49331,
      "wall_time": 30.276235563999762,
      "num_completed": 8,
      "num_errors": 0,
      "server_gen_throughput": 1284.232624033095,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.90125
    }
  ],
  "summary_table": {
    "0": {
      "1": 352.93802944187564,
      "4": 1013.1814360661432,
      "8": 1785.9126189211445
    },
    "8192": {
      "1": 288.54811885051345,
      "4": 792.9608607160002,
      "8": 1784.4249237969261
    },
    "32768": {
      "1": 189.5407660294026,
      "4": 744.7501368886174,
      "8": 1284.232624033095
    }
  }
}