8:["$","div",null,{"className":"flex min-h-screen flex-col bg-background font-sans","children":[["$","$L13",null,{"logoHref":"/"}],["$","$L14",null,{"benchmark":{"id":"6a1e919efbc373592c18a2ee","slug":"llm-inference-on-qwen3-8b-workload-c-512-on-rtx-pro-6000","name":"LLM Inference on Qwen3-8B workload (c=512) on RTX PRO 6000","description":"Evaluation of LLM inference performance for the Qwen3-8B model running on an RTX PRO 6000 GPU, specifically measuring Service Level Objective (SLO) attainment under a workload characterized by a 512 context length.","taskName":"LLM Inference","taskSlug":"llm-inference","datasetName":"Qwen3-8B workload (c=512) on RTX PRO 6000","datasetSlug":"qwen3-8b-workload-c-512-on-rtx-pro-6000","datasetFamilyName":"Qwen workload","datasetFamilySlug":"qwen-workload","sotaPaperTitle":"Threshold-Based Exclusive Batching for LLM Inference","papers":0,"results":18,"lastUpdate":"2026-06-02T08:25:56.553Z","primaryMetricKey":"SLO Attainment (%)","primaryMetricValue":80.3,"trendPreview":[],"sotaMethod":"EB+","sotaPaperId":"6a1e63bfdbd7150506f8534e","sotaPaperSource":"method_origin","metrics":[{"name":"SLO Attainment (%)","unit":"PERCENT","direction":"HIGHER_IS_BETTER","isHigherBetter":true,"isPrimary":true}]},"initialResults":{"benchmarkId":"6a1e919efbc373592c18a2ee","primaryMetricKey":"SLO Attainment (%)","secondaryMetricKeys":[],"metrics":[{"name":"SLO Attainment (%)","unit":"PERCENT","direction":"HIGHER_IS_BETTER","isHigherBetter":true,"isPrimary":true}],"rows":[{"id":"6a1e92c4fbc373592c18a918","methodName":"EB+","primaryMetricValue":80.3,"metricValues":{"SLO Attainment (%)":80.3},"inferenceConfig":{"TTFT target":"<10 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a917","methodName":"EB(k*)","primaryMetricValue":77.3,"metricValues":{"SLO Attainment (%)":77.3},"inferenceConfig":{"TTFT target":"<10 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a915","methodName":"EB+","primaryMetricValue":48.4,"metricValues":{"SLO Attainment (%)":48.4},"inferenceConfig":{"TTFT target":"<5 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a914","methodName":"EB(k*)","primaryMetricValue":6.2,"metricValues":{"SLO Attainment (%)":6.2},"inferenceConfig":{"TTFT target":"<5 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a916","methodName":"v1","primaryMetricValue":5.8,"metricValues":{"SLO Attainment (%)":5.8},"inferenceConfig":{"TTFT target":"<10 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"68fa708c1e209131cea4ba94","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2023-09-12T12:50:04.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a913","methodName":"v1","primaryMetricValue":5,"metricValues":{"SLO Attainment (%)":5},"inferenceConfig":{"TTFT target":"<5 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"68fa708c1e209131cea4ba94","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2023-09-12T12:50:04.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a910","methodName":"v1","primaryMetricValue":4.9,"metricValues":{"SLO Attainment (%)":4.9},"inferenceConfig":{"TTFT target":"<2 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"68fa708c1e209131cea4ba94","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2023-09-12T12:50:04.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a90f","methodName":"EB+","primaryMetricValue":1.3,"metricValues":{"SLO Attainment (%)":1.3},"inferenceConfig":{"TTFT target":"<10 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a912","methodName":"EB+","primaryMetricValue":1.2,"metricValues":{"SLO Attainment (%)":1.2},"inferenceConfig":{"TTFT target":"<2 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a90c","methodName":"EB+","primaryMetricValue":1.1,"metricValues":{"SLO Attainment (%)":1.1},"inferenceConfig":{"TTFT target":"<5 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a907","methodName":"v1","primaryMetricValue":0.6,"metricValues":{"SLO Attainment (%)":0.6},"inferenceConfig":{"TTFT target":"<2 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"68fa708c1e209131cea4ba94","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2023-09-12T12:50:04.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a90a","methodName":"v1","primaryMetricValue":0.6,"metricValues":{"SLO Attainment (%)":0.6},"inferenceConfig":{"TTFT target":"<5 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"68fa708c1e209131cea4ba94","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2023-09-12T12:50:04.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a90d","methodName":"v1","primaryMetricValue":0.6,"metricValues":{"SLO Attainment (%)":0.6},"inferenceConfig":{"TTFT target":"<10 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"68fa708c1e209131cea4ba94","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2023-09-12T12:50:04.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a90e","methodName":"EB(k*)","primaryMetricValue":0.5,"metricValues":{"SLO Attainment (%)":0.5},"inferenceConfig":{"TTFT target":"<10 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a908","methodName":"EB(k*)","primaryMetricValue":0,"metricValues":{"SLO Attainment (%)":0},"inferenceConfig":{"TTFT target":"<2 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a909","methodName":"EB+","primaryMetricValue":0,"metricValues":{"SLO Attainment (%)":0},"inferenceConfig":{"TTFT target":"<2 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a90b","methodName":"EB(k*)","primaryMetricValue":0,"metricValues":{"SLO Attainment (%)":0},"inferenceConfig":{"TTFT target":"<5 s","TPOT target":"< 50 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"},{"id":"6a1e92c4fbc373592c18a911","methodName":"EB(k*)","primaryMetricValue":0,"metricValues":{"SLO Attainment (%)":0},"inferenceConfig":{"TTFT target":"<2 s","TPOT target":"< 100 ms"},"methodOriginPaperId":"6a1e63bfdbd7150506f8534e","methodOriginGithubRepo":null,"methodOriginImplementationUrl":null,"methodOriginPaperPublishedAt":"2026-05-30T04:11:08.000Z","reportPaperId":"6a1e63bfdbd7150506f8534e","reportPaperPublishedAt":"2026-05-30T04:11:08.000Z"}],"total":18,"page":1,"limit":5000,"totalPages":1},"relatedBenchmarks":[{"id":"6a1e9195fbc373592c18a2e5","slug":"llm-inference-on-qwen3-8b-2k-prompts-decode-heavy-workload","name":"LLM Inference on Qwen3-8B 2k prompts Decode-heavy workload","primaryMetricResultCount":30},{"id":"6a1e9191fbc373592c18a254","slug":"llm-inference-on-qwen3-8b-2k-prompts-balanced-workload","name":"LLM Inference on Qwen3-8B (2k prompts Balanced workload)","primaryMetricResultCount":28},{"id":"6a1e918dfbc373592c18a250","slug":"llm-inference-on-qwen3-8b-2k-prompts-prefill-heavy-workload","name":"LLM Inference on Qwen3-8B 2k prompts Prefill-heavy workload","primaryMetricResultCount":26},{"id":"6a1e91a3fbc373592c18a36e","slug":"llm-inference-on-qwen3-8b-synthetic-workload-mu_l-512-mu_o-256","name":"LLM Inference on Qwen3-8B synthetic workload (mu_L=512, mu_O=256)","primaryMetricResultCount":16},{"id":"6a1e919afbc373592c18a2ea","slug":"llm-inference-on-wildchat","name":"LLM Inference on WildChat","primaryMetricResultCount":11}]}],"$L15"]}]

Method	Links
EB+ 2026.05		80.3
EB(k*) 2026.05		77.3
EB+ 2026.05		48.4
EB(k*) 2026.05		6.2
v1 2026.05		5.8
v1 2026.05		5
v1 2026.05		4.9
EB+ 2026.05		1.3
EB+ 2026.05		1.2
EB+ 2026.05		1.1
v1 2026.05		0.6
v1 2026.05		0.6
v1 2026.05		0.6
EB(k*) 2026.05		0.5
EB(k*) 2026.05		0
EB+ 2026.05		0
EB(k*) 2026.05		0
EB(k*) 2026.05		0

LLM Inference on Qwen3-8B workload (c=512) on RTX PRO 6000

Evaluation Results