2b:["$","div",null,{"className":"space-y-4 max-w-4xl mx-auto","children":[["$","div",null,{"className":"text-center","children":[["$","h2",null,{"className":"text-2xl font-bold","children":"Performance Metrics"}],["$","p",null,{"className":"text-muted-foreground","children":"Context window and performance specifications"}]]}],["$","$L31",null,{"modelA":{"modelId":"gpt-4.1-2025-04-14","name":"GPT-4.1","organizationId":"openai","fineTunedFromModelId":null,"description":"GPT-4.1 is OpenAI's latest and most advanced flagship model, significantly improving upon GPT-4 Turbo in performance across benchmarks, speed, and cost-effectiveness.","releaseDate":"2025-04-14","announcementDate":"2025-04-14","licenseId":"proprietary","multimodal":true,"knowledgeCutoff":"2024-06-01","paramCount":null,"trainingTokens":null,"contextWindow":null,"availableInZeroeval":true,"sourceApiRef":"https://platform.openai.com/docs/models/gpt-4.1","sourcePlayground":"https://platform.openai.com/playground?mode=chat&model=gpt-4.1","sourcePaper":null,"sourceScorecardBlogLink":"https://openai.com/index/gpt-4-1/","sourceRepoLink":null,"sourceWeightsLink":null,"modelFamilyId":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.841Z","updatedAt":"$D2025-07-19T19:49:05.841Z","organization":{"organizationId":"openai","name":"OpenAI","website":"https://openai.com","description":"Leading AI research company","country":"US","manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.815Z","updatedAt":"$D2025-07-19T19:49:05.815Z"},"license":{"licenseId":"proprietary","name":"Proprietary","allowCommercial":false,"description":"Proprietary license - usage restrictions apply","createdAt":"$D2025-07-19T19:49:05.425Z","updatedAt":"$D2025-07-19T19:49:05.425Z"},"benchmarks":[{"modelBenchmarkId":55589621,"benchmarkId":"aider-polyglot","modelId":"gpt-4.1-2025-04-14","score":0.516,"normalizedScore":0.516,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.389Z","updatedAt":"$D2025-07-19T19:56:12.389Z","benchmark":{"benchmarkId":"aider-polyglot","name":"Aider-Polyglot","category":null}},{"modelBenchmarkId":56917012,"benchmarkId":"aider-polyglot-edit","modelId":"gpt-4.1-2025-04-14","score":0.529,"normalizedScore":0.529,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.808Z","updatedAt":"$D2025-07-19T19:56:13.808Z","benchmark":{"benchmarkId":"aider-polyglot-edit","name":"Aider-Polyglot Edit","category":null}},{"modelBenchmarkId":37824432,"benchmarkId":"aime-2024","modelId":"gpt-4.1-2025-04-14","score":0.481,"normalizedScore":0.481,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.019Z","updatedAt":"$D2025-07-19T19:56:12.019Z","benchmark":{"benchmarkId":"aime-2024","name":"AIME 2024","category":null}},{"modelBenchmarkId":5293672,"benchmarkId":"charxiv-d","modelId":"gpt-4.1-2025-04-14","score":0.879,"normalizedScore":0.879,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.330Z","updatedAt":"$D2025-07-19T19:56:15.330Z","benchmark":{"benchmarkId":"charxiv-d","name":"CharXiv-D","category":null}},{"modelBenchmarkId":86304190,"benchmarkId":"charxiv-r","modelId":"gpt-4.1-2025-04-14","score":0.567,"normalizedScore":0.567,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.201Z","updatedAt":"$D2025-07-19T19:56:15.201Z","benchmark":{"benchmarkId":"charxiv-r","name":"CharXiv-R","category":null}},{"modelBenchmarkId":80102217,"benchmarkId":"collie","modelId":"gpt-4.1-2025-04-14","score":0.658,"normalizedScore":0.658,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.261Z","updatedAt":"$D2025-07-19T19:56:15.261Z","benchmark":{"benchmarkId":"collie","name":"COLLIE","category":null}},{"modelBenchmarkId":73555218,"benchmarkId":"complexfuncbench","modelId":"gpt-4.1-2025-04-14","score":0.655,"normalizedScore":0.655,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.348Z","updatedAt":"$D2025-07-19T19:56:15.348Z","benchmark":{"benchmarkId":"complexfuncbench","name":"ComplexFuncBench","category":null}},{"modelBenchmarkId":95261107,"benchmarkId":"gpqa","modelId":"gpt-4.1-2025-04-14","score":0.663,"normalizedScore":0.663,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Diamond","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.761Z","updatedAt":"$D2025-07-19T19:56:11.761Z","benchmark":{"benchmarkId":"gpqa","name":"GPQA","category":null}},{"modelBenchmarkId":35902349,"benchmarkId":"graphwalks-bfs-<128k","modelId":"gpt-4.1-2025-04-14","score":0.617,"normalizedScore":0.617,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.294Z","updatedAt":"$D2025-07-19T19:56:15.294Z","benchmark":{"benchmarkId":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","category":null}},{"modelBenchmarkId":47168125,"benchmarkId":"graphwalks-bfs->128k","modelId":"gpt-4.1-2025-04-14","score":0.19,"normalizedScore":0.19,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.302Z","updatedAt":"$D2025-07-19T19:56:15.302Z","benchmark":{"benchmarkId":"graphwalks-bfs->128k","name":"Graphwalks BFS >128k","category":null}},{"modelBenchmarkId":41422121,"benchmarkId":"graphwalks-parents-<128k","modelId":"gpt-4.1-2025-04-14","score":0.58,"normalizedScore":0.58,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.312Z","updatedAt":"$D2025-07-19T19:56:15.312Z","benchmark":{"benchmarkId":"graphwalks-parents-<128k","name":"Graphwalks parents <128k","category":null}},{"modelBenchmarkId":65669206,"benchmarkId":"graphwalks-parents->128k","modelId":"gpt-4.1-2025-04-14","score":0.25,"normalizedScore":0.25,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.324Z","updatedAt":"$D2025-07-19T19:56:15.324Z","benchmark":{"benchmarkId":"graphwalks-parents->128k","name":"Graphwalks parents >128k","category":null}},{"modelBenchmarkId":99813914,"benchmarkId":"ifeval","modelId":"gpt-4.1-2025-04-14","score":0.874,"normalizedScore":0.874,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.304Z","updatedAt":"$D2025-07-19T19:56:12.304Z","benchmark":{"benchmarkId":"ifeval","name":"IFEval","category":null}},{"modelBenchmarkId":82267192,"benchmarkId":"internal-api-instruction-following-(hard)","modelId":"gpt-4.1-2025-04-14","score":0.491,"normalizedScore":0.491,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.230Z","updatedAt":"$D2025-07-19T19:56:15.230Z","benchmark":{"benchmarkId":"internal-api-instruction-following-(hard)","name":"Internal API instruction following (hard)","category":null}},{"modelBenchmarkId":52438477,"benchmarkId":"mathvista","modelId":"gpt-4.1-2025-04-14","score":0.722,"normalizedScore":0.722,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.121Z","updatedAt":"$D2025-07-19T19:56:12.121Z","benchmark":{"benchmarkId":"mathvista","name":"MathVista","category":null}},{"modelBenchmarkId":11924741,"benchmarkId":"mmlu","modelId":"gpt-4.1-2025-04-14","score":0.902,"normalizedScore":0.902,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.323Z","updatedAt":"$D2025-07-19T19:56:11.323Z","benchmark":{"benchmarkId":"mmlu","name":"MMLU","category":null}},{"modelBenchmarkId":80654818,"benchmarkId":"mmmlu","modelId":"gpt-4.1-2025-04-14","score":0.873,"normalizedScore":0.873,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.161Z","updatedAt":"$D2025-07-19T19:56:14.161Z","benchmark":{"benchmarkId":"mmmlu","name":"MMMLU","category":null}},{"modelBenchmarkId":1832985,"benchmarkId":"mmmu","modelId":"gpt-4.1-2025-04-14","score":0.748,"normalizedScore":0.748,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.222Z","updatedAt":"$D2025-07-19T19:56:12.222Z","benchmark":{"benchmarkId":"mmmu","name":"MMMU","category":null}},{"modelBenchmarkId":59518485,"benchmarkId":"multichallenge","modelId":"gpt-4.1-2025-04-14","score":0.383,"normalizedScore":0.383,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark (GPT-4o grader)","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.561Z","updatedAt":"$D2025-07-19T19:56:12.561Z","benchmark":{"benchmarkId":"multichallenge","name":"Multi-Challenge","category":null}},{"modelBenchmarkId":34747078,"benchmarkId":"multichallenge-(o3-mini-grader)","modelId":"gpt-4.1-2025-04-14","score":0.462,"normalizedScore":0.462,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark (o3-mini grader, see footnote [3])","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.244Z","updatedAt":"$D2025-07-19T19:56:15.244Z","benchmark":{"benchmarkId":"multichallenge-(o3-mini-grader)","name":"MultiChallenge (o3-mini grader)","category":null}},{"modelBenchmarkId":69016177,"benchmarkId":"multi-if","modelId":"gpt-4.1-2025-04-14","score":0.708,"normalizedScore":0.708,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.648Z","updatedAt":"$D2025-07-19T19:56:14.648Z","benchmark":{"benchmarkId":"multi-if","name":"Multi-IF","category":null}},{"modelBenchmarkId":34158059,"benchmarkId":"openai-mrcr:-2-needle-128k","modelId":"gpt-4.1-2025-04-14","score":0.572,"normalizedScore":0.572,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.275Z","updatedAt":"$D2025-07-19T19:56:15.275Z","benchmark":{"benchmarkId":"openai-mrcr:-2-needle-128k","name":"OpenAI-MRCR: 2 needle 128k","category":null}},{"modelBenchmarkId":73667433,"benchmarkId":"openai-mrcr:-2-needle-1m","modelId":"gpt-4.1-2025-04-14","score":0.463,"normalizedScore":0.463,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.286Z","updatedAt":"$D2025-07-19T19:56:15.286Z","benchmark":{"benchmarkId":"openai-mrcr:-2-needle-1m","name":"OpenAI-MRCR: 2 needle 1M","category":null}},{"modelBenchmarkId":94084540,"benchmarkId":"swe-bench-verified","modelId":"gpt-4.1-2025-04-14","score":0.546,"normalizedScore":0.546,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Internal methodology, see source footnote [2]","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.858Z","updatedAt":"$D2025-07-19T19:56:13.858Z","benchmark":{"benchmarkId":"swe-bench-verified","name":"SWE-Bench Verified","category":null}},{"modelBenchmarkId":1444282,"benchmarkId":"tau-bench-airline","modelId":"gpt-4.1-2025-04-14","score":0.494,"normalizedScore":0.494,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Avg 5 runs, no custom tools/prompting (footnote [4])","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.015Z","updatedAt":"$D2025-07-19T19:56:15.015Z","benchmark":{"benchmarkId":"tau-bench-airline","name":"TAU-bench Airline","category":null}},{"modelBenchmarkId":75696188,"benchmarkId":"tau-bench-retail","modelId":"gpt-4.1-2025-04-14","score":0.68,"normalizedScore":0.68,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Avg 5 runs, no custom tools/prompting (footnote [4], GPT-4o user model)","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.986Z","updatedAt":"$D2025-07-19T19:56:14.986Z","benchmark":{"benchmarkId":"tau-bench-retail","name":"TAU-bench Retail","category":null}},{"modelBenchmarkId":63707943,"benchmarkId":"video-mme-(long,-no-subtitles)","modelId":"gpt-4.1-2025-04-14","score":0.72,"normalizedScore":0.72,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"Standard benchmark","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.377Z","updatedAt":"$D2025-07-19T19:56:15.377Z","benchmark":{"benchmarkId":"video-mme-(long,-no-subtitles)","name":"Video-MME (long, no subtitles)","category":null}},{"modelBenchmarkId":55412790,"benchmarkId":"aime-2025","modelId":"gpt-4.1-2025-04-14","score":0.464,"normalizedScore":0.464,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"GPT-4.1 with no tools - Competition mathematics (AIME 2025).","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"aime-2025","name":"AIME 2025","category":null}},{"modelBenchmarkId":83362682,"benchmarkId":"humanity's-last-exam","modelId":"gpt-4.1-2025-04-14","score":0.054,"normalizedScore":0.054,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"GPT-4.1 with no tools - Expert-level questions across subjects.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"humanity's-last-exam","name":"Humanity's Last Exam","category":null}},{"modelBenchmarkId":23806600,"benchmarkId":"hmmt-2025","modelId":"gpt-4.1-2025-04-14","score":0.289,"normalizedScore":0.289,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-5-for-developers/","verifiedByLlmstats":false,"analysisMethod":"GPT-4.1 with no tools - Harvard-MIT Mathematics Tournament.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"hmmt-2025","name":"HMMT 2025","category":null}}],"providers":[{"modelProviderId":8852789,"modelId":"gpt-4.1-2025-04-14","providerId":"openai","providerModelIdUsed":"gpt-4.1-2025-04-14","deprecatedAt":null,"inputCentsPerMillionTokens":200,"outputCentsPerMillionTokens":800,"quantization":null,"maxInputTokens":1047576,"maxOutputTokens":32768,"throughput":100,"latency":10,"featureWebSearch":false,"featureFunctionCalling":true,"featureStructuredOutput":true,"featureCodeExecution":false,"featureBatchInference":true,"featureFinetuning":false,"inputModalityText":true,"inputModalityImage":true,"inputModalityAudio":false,"inputModalityVideo":false,"outputModalityText":true,"outputModalityImage":false,"outputModalityAudio":false,"outputModalityVideo":false,"createdAt":"$D2025-07-19T19:49:17.150Z","updatedAt":"$D2025-07-19T19:49:17.150Z","modelName":"GPT-4.1","organizationId":"openai","provider":{"providerId":"openai","name":"OpenAI","website":"https://openai.com","createdAt":"$D2025-07-19T19:49:17.121Z","updatedAt":"$D2025-07-19T19:49:17.121Z"}}]},"modelB":{"modelId":"llama-3.1-nemotron-ultra-253b-v1","name":"Llama 3.1 Nemotron Ultra 253B v1","organizationId":"nvidia","fineTunedFromModelId":null,"description":"A 253B parameter derivative of Meta Llama 3.1 405B Instruct, developed by NVIDIA using Neural Architecture Search (NAS) and vertical compression. It underwent multi-phase post-training (SFT for Math, Code, Reasoning, Chat, Tool Calling; RL with GRPO) to enhance reasoning and instruction-following. Optimized for accuracy/efficiency tradeoff on NVIDIA GPUs. Supports 128k context.","releaseDate":"2025-04-07","announcementDate":"2025-04-07","licenseId":"llama_3_1_community_license","multimodal":false,"knowledgeCutoff":"2023-12-01","paramCount":"$n253000000000","trainingTokens":null,"contextWindow":null,"availableInZeroeval":true,"sourceApiRef":null,"sourcePlayground":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1","sourcePaper":"https://arxiv.org/abs/2502.00203","sourceScorecardBlogLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","sourceRepoLink":null,"sourceWeightsLink":"https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1","modelFamilyId":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.735Z","updatedAt":"$D2025-07-19T19:49:05.735Z","organization":{"organizationId":"nvidia","name":"NVIDIA","website":"https://nvidia.com","description":"GPU and AI company","country":"US","manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.728Z","updatedAt":"$D2025-07-19T19:49:05.728Z"},"license":{"licenseId":"llama_3_1_community_license","name":"Llama 3.1 Community License","allowCommercial":false,"description":"Llama 3.1 Community License license","createdAt":"$D2025-07-19T19:49:05.574Z","updatedAt":"$D2025-07-19T19:49:05.574Z"},"benchmarks":[{"modelBenchmarkId":80149885,"benchmarkId":"aime-2025","modelId":"llama-3.1-nemotron-ultra-253b-v1","score":0.725,"normalizedScore":0.725,"isSelfReported":true,"selfReportedSourceLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","verifiedByLlmstats":false,"analysisMethod":"Pass@1, Reasoning","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.463Z","updatedAt":"$D2025-07-19T19:56:12.463Z","benchmark":{"benchmarkId":"aime-2025","name":"AIME 2025","category":null}},{"modelBenchmarkId":41076873,"benchmarkId":"bfcl-v2","modelId":"llama-3.1-nemotron-ultra-253b-v1","score":0.741,"normalizedScore":0.741,"isSelfReported":true,"selfReportedSourceLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","verifiedByLlmstats":false,"analysisMethod":"Score, Reasoning","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.456Z","updatedAt":"$D2025-07-19T19:56:14.456Z","benchmark":{"benchmarkId":"bfcl-v2","name":"BFCL v2","category":null}},{"modelBenchmarkId":98579832,"benchmarkId":"gpqa","modelId":"llama-3.1-nemotron-ultra-253b-v1","score":0.7601,"normalizedScore":0.7601,"isSelfReported":true,"selfReportedSourceLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","verifiedByLlmstats":false,"analysisMethod":"Pass@1, Reasoning","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.721Z","updatedAt":"$D2025-07-19T19:56:11.721Z","benchmark":{"benchmarkId":"gpqa","name":"GPQA","category":null}},{"modelBenchmarkId":23138265,"benchmarkId":"ifeval","modelId":"llama-3.1-nemotron-ultra-253b-v1","score":0.8945,"normalizedScore":0.8945,"isSelfReported":true,"selfReportedSourceLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","verifiedByLlmstats":false,"analysisMethod":"Strict Accuracy, Reasoning","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.292Z","updatedAt":"$D2025-07-19T19:56:12.292Z","benchmark":{"benchmarkId":"ifeval","name":"IFEval","category":null}},{"modelBenchmarkId":80021917,"benchmarkId":"livecodebench","modelId":"llama-3.1-nemotron-ultra-253b-v1","score":0.6631,"normalizedScore":0.6631,"isSelfReported":true,"selfReportedSourceLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","verifiedByLlmstats":false,"analysisMethod":"Pass@1, Reasoning","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.404Z","updatedAt":"$D2025-07-19T19:56:13.404Z","benchmark":{"benchmarkId":"livecodebench","name":"LiveCodeBench","category":null}},{"modelBenchmarkId":59587030,"benchmarkId":"math-500","modelId":"llama-3.1-nemotron-ultra-253b-v1","score":0.97,"normalizedScore":0.97,"isSelfReported":true,"selfReportedSourceLink":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-ultra-253b-v1/modelcard","verifiedByLlmstats":false,"analysisMethod":"Pass@1, Reasoning","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.061Z","updatedAt":"$D2025-07-19T19:56:12.061Z","benchmark":{"benchmarkId":"math-500","name":"MATH-500","category":null}}],"providers":[]},"commonBenchmarks":[{"benchmarkId":"aider-polyglot","name":"Aider-Polyglot","category":null,"modelA":0.516,"modelB":0},{"benchmarkId":"aider-polyglot-edit","name":"Aider-Polyglot Edit","category":null,"modelA":0.529,"modelB":0},{"benchmarkId":"aime-2024","name":"AIME 2024","category":null,"modelA":0.481,"modelB":0},{"benchmarkId":"charxiv-d","name":"CharXiv-D","category":null,"modelA":0.879,"modelB":0},{"benchmarkId":"charxiv-r","name":"CharXiv-R","category":null,"modelA":0.567,"modelB":0},{"benchmarkId":"collie","name":"COLLIE","category":null,"modelA":0.658,"modelB":0},{"benchmarkId":"complexfuncbench","name":"ComplexFuncBench","category":null,"modelA":0.655,"modelB":0},{"benchmarkId":"gpqa","name":"GPQA","category":null,"modelA":0.663,"modelB":0.7601},{"benchmarkId":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","category":null,"modelA":0.617,"modelB":0},{"benchmarkId":"graphwalks-bfs->128k","name":"Graphwalks BFS >128k","category":null,"modelA":0.19,"modelB":0},{"benchmarkId":"graphwalks-parents-<128k","name":"Graphwalks parents <128k","category":null,"modelA":0.58,"modelB":0},{"benchmarkId":"graphwalks-parents->128k","name":"Graphwalks parents >128k","category":null,"modelA":0.25,"modelB":0},{"benchmarkId":"ifeval","name":"IFEval","category":null,"modelA":0.874,"modelB":0.8945},{"benchmarkId":"internal-api-instruction-following-(hard)","name":"Internal API instruction following (hard)","category":null,"modelA":0.491,"modelB":0},{"benchmarkId":"mathvista","name":"MathVista","category":null,"modelA":0.722,"modelB":0},{"benchmarkId":"mmlu","name":"MMLU","category":null,"modelA":0.902,"modelB":0},{"benchmarkId":"mmmlu","name":"MMMLU","category":null,"modelA":0.873,"modelB":0},{"benchmarkId":"mmmu","name":"MMMU","category":null,"modelA":0.748,"modelB":0},{"benchmarkId":"multichallenge","name":"Multi-Challenge","category":null,"modelA":0.383,"modelB":0},{"benchmarkId":"multichallenge-(o3-mini-grader)","name":"MultiChallenge (o3-mini grader)","category":null,"modelA":0.462,"modelB":0},{"benchmarkId":"multi-if","name":"Multi-IF","category":null,"modelA":0.708,"modelB":0},{"benchmarkId":"openai-mrcr:-2-needle-128k","name":"OpenAI-MRCR: 2 needle 128k","category":null,"modelA":0.572,"modelB":0},{"benchmarkId":"openai-mrcr:-2-needle-1m","name":"OpenAI-MRCR: 2 needle 1M","category":null,"modelA":0.463,"modelB":0},{"benchmarkId":"swe-bench-verified","name":"SWE-Bench Verified","category":null,"modelA":0.546,"modelB":0},{"benchmarkId":"tau-bench-airline","name":"TAU-bench Airline","category":null,"modelA":0.494,"modelB":0},{"benchmarkId":"tau-bench-retail","name":"TAU-bench Retail","category":null,"modelA":0.68,"modelB":0},{"benchmarkId":"video-mme-(long,-no-subtitles)","name":"Video-MME (long, no subtitles)","category":null,"modelA":0.72,"modelB":0},{"benchmarkId":"aime-2025","name":"AIME 2025","category":null,"modelA":0.464,"modelB":0.725},{"benchmarkId":"humanity's-last-exam","name":"Humanity's Last Exam","category":null,"modelA":0.054,"modelB":0},{"benchmarkId":"hmmt-2025","name":"HMMT 2025","category":null,"modelA":0.289,"modelB":0},{"benchmarkId":"bfcl-v2","name":"BFCL v2","category":null,"modelA":0,"modelB":0.741},{"benchmarkId":"livecodebench","name":"LiveCodeBench","category":null,"modelA":0,"modelB":0.6631},{"benchmarkId":"math-500","name":"MATH-500","category":null,"modelA":0,"modelB":0.97}]}],"$L32"]}]

GPT-4.1 vs Llama 3.1 Nemotron Ultra 253B v1

Performance Metrics

Provider Availability & Performance