{
  "metadata": {
    "model": "google/gemma-4-26B-A4B-it",
    "server": "127.0.0.1:30000",
    "timestamp": "2026-05-08T02:00:18.532376",
    "duration_per_test": 30.0,
    "max_tokens": 8192,
    "ignore_eos": true,
    "min_tokens": 8192,
    "max_total_tokens": 1036299,
    "concurrency_levels": [
      1,
      4,
      8
    ],
    "context_lengths": [
      0,
      8192,
      32768
    ]
  },
  "prefill": {
    "8192": {
      "ttft_seconds": 0.137,
      "tok_per_sec": 73160.0
    },
    "16384": {
      "ttft_seconds": 0.319,
      "tok_per_sec": 55739.0
    },
    "32768": {
      "ttft_seconds": 1.027,
      "tok_per_sec": 32692.0
    },
    "65536": {
      "ttft_seconds": 3.839,
      "tok_per_sec": 17185.0
    },
    "131072": {
      "ttft_seconds": 14.717,
      "tok_per_sec": 8921.0
    }
  },
  "results": [
    {
      "concurrency": 1,
      "context_tokens": 0,
      "aggregate_tps": 188.96089952543758,
      "per_request_avg_tps": 188.96089952543758,
      "ttft_avg": 0.03405220900003769,
      "ttft_p50": 0.03405220900003769,
      "ttft_p99": 0.03405220900003769,
      "total_tokens": 6247,
      "wall_time": 30.25970693499994,
      "num_completed": 1,
      "num_errors": 0,
      "server_gen_throughput": 188.96089952543758,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 4,
      "context_tokens": 0,
      "aggregate_tps": 592.781017918977,
      "per_request_avg_tps": 148.19525447974425,
      "ttft_avg": 0.04322580000007292,
      "ttft_p50": 0.043169183000145495,
      "ttft_p99": 0.047887410619882755,
      "total_tokens": 19950,
      "wall_time": 30.268116771999757,
      "num_completed": 4,
      "num_errors": 0,
      "server_gen_throughput": 592.781017918977,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 8,
      "context_tokens": 0,
      "aggregate_tps": 1035.1662271233147,
      "per_request_avg_tps": 129.39577839041434,
      "ttft_avg": 0.0662936207499456,
      "ttft_p50": 0.06602608549997058,
      "ttft_p99": 0.06780767169011596,
      "total_tokens": 31506,
      "wall_time": 30.267299944000115,
      "num_completed": 8,
      "num_errors": 0,
      "server_gen_throughput": 1035.1662271233147,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 1,
      "context_tokens": 8192,
      "aggregate_tps": 171.8619192939767,
      "per_request_avg_tps": 171.8619192939767,
      "ttft_avg": 0.04254893699999229,
      "ttft_p50": 0.04254893699999229,
      "ttft_p99": 0.04254893699999229,
      "total_tokens": 6082,
      "wall_time": 30.267170006000015,
      "num_completed": 1,
      "num_errors": 0,
      "server_gen_throughput": 171.8619192939767,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 4,
      "context_tokens": 8192,
      "aggregate_tps": 560.4463666666987,
      "per_request_avg_tps": 140.11159166667468,
      "ttft_avg": 0.09561913824984458,
      "ttft_p50": 0.09947751199979393,
      "ttft_p99": 0.11171156729010817,
      "total_tokens": 18973,
      "wall_time": 30.262540280999474,
      "num_completed": 4,
      "num_errors": 0,
      "server_gen_throughput": 560.4463666666987,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 8,
      "context_tokens": 8192,
      "aggregate_tps": 974.8406603152612,
      "per_request_avg_tps": 121.85508253940765,
      "ttft_avg": 0.14616520387505716,
      "ttft_p50": 0.1348592889999054,
      "ttft_p99": 0.17377883593963817,
      "total_tokens": 32129,
      "wall_time": 30.28203422399929,
      "num_completed": 8,
      "num_errors": 0,
      "server_gen_throughput": 974.8406603152612,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 1,
      "context_tokens": 32768,
      "aggregate_tps": 135.1218009872816,
      "per_request_avg_tps": 135.1218009872816,
      "ttft_avg": 0.09343646700017416,
      "ttft_p50": 0.09343646700017416,
      "ttft_p99": 0.09343646700017416,
      "total_tokens": 6465,
      "wall_time": 30.260107817000062,
      "num_completed": 1,
      "num_errors": 0,
      "server_gen_throughput": 135.1218009872816,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 4,
      "context_tokens": 32768,
      "aggregate_tps": 475.5006733074347,
      "per_request_avg_tps": 118.87516832685867,
      "ttft_avg": 0.3012089332503365,
      "ttft_p50": 0.29521170150019316,
      "ttft_p99": 0.32031201197048176,
      "total_tokens": 19855,
      "wall_time": 30.27386049799952,
      "num_completed": 4,
      "num_errors": 0,
      "server_gen_throughput": 475.5006733074347,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    },
    {
      "concurrency": 8,
      "context_tokens": 32768,
      "aggregate_tps": 881.7578583577364,
      "per_request_avg_tps": 110.21973229471705,
      "ttft_avg": 0.6406971699999531,
      "ttft_p50": 0.6377995929997269,
      "ttft_p99": 0.6593764942205598,
      "total_tokens": 38679,
      "wall_time": 30.435581372999877,
      "num_completed": 8,
      "num_errors": 0,
      "server_gen_throughput": 881.7578583577364,
      "server_utilization": 0.0,
      "server_spec_accept_rate": 0.0
    }
  ],
  "summary_table": {
    "0": {
      "1": 188.96089952543758,
      "4": 592.781017918977,
      "8": 1035.1662271233147
    },
    "8192": {
      "1": 171.8619192939767,
      "4": 560.4463666666987,
      "8": 974.8406603152612
    },
    "32768": {
      "1": 135.1218009872816,
      "4": 475.5006733074347,
      "8": 881.7578583577364
    }
  }
}