2a:["$","div",null,{"className":"space-y-4 max-w-4xl mx-auto","children":[["$","div",null,{"className":"text-center","children":[["$","h2",null,{"className":"text-2xl font-bold","children":"Performance Metrics"}],["$","p",null,{"className":"text-muted-foreground","children":"Context window and performance specifications"}]]}],["$","$L30",null,{"modelA":{"modelId":"deepseek-r1-distill-qwen-1.5b","name":"DeepSeek R1 Distill Qwen 1.5B","organizationId":"deepseek","fineTunedFromModelId":null,"description":"DeepSeek-R1 is the first-generation reasoning model built atop DeepSeek-V3 (671B total parameters, 37B activated per token). It incorporates large-scale reinforcement learning (RL) to enhance its chain-of-thought and reasoning capabilities, delivering strong performance in math, code, and multi-step reasoning tasks.","releaseDate":"2025-01-20","announcementDate":"2025-01-20","licenseId":"mit","multimodal":false,"knowledgeCutoff":null,"paramCount":"$n1780000000","trainingTokens":"$n14800000000000","contextWindow":null,"availableInZeroeval":true,"sourceApiRef":"https://api-docs.deepseek.com/news/news250120","sourcePlayground":"https://chat.deepseek.com","sourcePaper":"https://arxiv.org/pdf/2501.12948","sourceScorecardBlogLink":null,"sourceRepoLink":"https://github.com/deepseek-ai/DeepSeek-R1","sourceWeightsLink":"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","modelFamilyId":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.672Z","updatedAt":"$D2025-07-19T19:49:05.672Z","organization":{"organizationId":"deepseek","name":"DeepSeek","website":"https://deepseek.com","description":"Chinese AI company developing state-of-the-art large language models including the DeepSeek-V3 series with mixture-of-experts architecture and hybrid thinking/non-thinking capabilities","country":"CN","manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.655Z","updatedAt":"$D2025-09-15T00:00:00.000Z"},"license":{"licenseId":"mit","name":"MIT","allowCommercial":true,"description":"MIT License - allows commercial use","createdAt":"$D2025-07-19T19:49:05.544Z","updatedAt":"$D2025-07-19T19:49:05.544Z"},"benchmarks":[{"modelBenchmarkId":94496097,"benchmarkId":"aime-2024","modelId":"deepseek-r1-distill-qwen-1.5b","score":0.527,"normalizedScore":0.527,"isSelfReported":true,"selfReportedSourceLink":"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","verifiedByLlmstats":false,"analysisMethod":"Cons@64","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.976Z","updatedAt":"$D2025-07-19T19:56:11.978Z","benchmark":{"benchmarkId":"aime-2024","name":"AIME 2024","category":null}},{"modelBenchmarkId":39489094,"benchmarkId":"gpqa","modelId":"deepseek-r1-distill-qwen-1.5b","score":0.338,"normalizedScore":0.338,"isSelfReported":true,"selfReportedSourceLink":"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","verifiedByLlmstats":false,"analysisMethod":"Diamond, Pass@1","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.694Z","updatedAt":"$D2025-07-19T19:56:11.694Z","benchmark":{"benchmarkId":"gpqa","name":"GPQA","category":null}},{"modelBenchmarkId":23476313,"benchmarkId":"livecodebench","modelId":"deepseek-r1-distill-qwen-1.5b","score":0.169,"normalizedScore":0.169,"isSelfReported":true,"selfReportedSourceLink":"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","verifiedByLlmstats":false,"analysisMethod":"Pass@1","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.362Z","updatedAt":"$D2025-07-19T19:56:13.362Z","benchmark":{"benchmarkId":"livecodebench","name":"LiveCodeBench","category":null}},{"modelBenchmarkId":73271501,"benchmarkId":"math-500","modelId":"deepseek-r1-distill-qwen-1.5b","score":0.839,"normalizedScore":0.839,"isSelfReported":true,"selfReportedSourceLink":"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","verifiedByLlmstats":false,"analysisMethod":"Pass@1","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.041Z","updatedAt":"$D2025-07-19T19:56:12.041Z","benchmark":{"benchmarkId":"math-500","name":"MATH-500","category":null}}],"providers":[]},"modelB":{"modelId":"gpt-4.5","name":"GPT-4.5","organizationId":"openai","fineTunedFromModelId":null,"description":"GPT-4.5 is OpenAI's most advanced model, offering improved reasoning, coding, and creative capabilities with faster performance and longer context handling than GPT-4. It features enhanced instruction following, reduced hallucinations, and better factual accuracy.","releaseDate":"2025-02-27","announcementDate":"2025-02-27","licenseId":"proprietary","multimodal":true,"knowledgeCutoff":null,"paramCount":null,"trainingTokens":null,"contextWindow":null,"availableInZeroeval":true,"sourceApiRef":"https://platform.openai.com/docs/models/gpt-4-5#gpt-4-5","sourcePlayground":"https://platform.openai.com/playground","sourcePaper":null,"sourceScorecardBlogLink":"https://openai.com/index/introducing-gpt-4-5/","sourceRepoLink":"https://github.com/openai","sourceWeightsLink":null,"modelFamilyId":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.852Z","updatedAt":"$D2025-07-19T19:49:05.852Z","organization":{"organizationId":"openai","name":"OpenAI","website":"https://openai.com","description":"Leading AI research company","country":"US","manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.815Z","updatedAt":"$D2025-07-19T19:49:05.815Z"},"license":{"licenseId":"proprietary","name":"Proprietary","allowCommercial":false,"description":"Proprietary license - usage restrictions apply","createdAt":"$D2025-07-19T19:49:05.425Z","updatedAt":"$D2025-07-19T19:49:05.425Z"},"benchmarks":[{"modelBenchmarkId":14816551,"benchmarkId":"aider-polyglot-edit","modelId":"gpt-4.5","score":0.449,"normalizedScore":0.449,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.811Z","updatedAt":"$D2025-07-19T19:56:13.811Z","benchmark":{"benchmarkId":"aider-polyglot-edit","name":"Aider-Polyglot Edit","category":null}},{"modelBenchmarkId":95204512,"benchmarkId":"aime-2024","modelId":"gpt-4.5","score":0.367,"normalizedScore":0.367,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.024Z","updatedAt":"$D2025-07-19T19:56:12.024Z","benchmark":{"benchmarkId":"aime-2024","name":"AIME 2024","category":null}},{"modelBenchmarkId":92965140,"benchmarkId":"charxiv-d","modelId":"gpt-4.5","score":0.9,"normalizedScore":0.9,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.335Z","updatedAt":"$D2025-07-19T19:56:15.335Z","benchmark":{"benchmarkId":"charxiv-d","name":"CharXiv-D","category":null}},{"modelBenchmarkId":47093547,"benchmarkId":"charxiv-r","modelId":"gpt-4.5","score":0.554,"normalizedScore":0.554,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.204Z","updatedAt":"$D2025-07-19T19:56:15.204Z","benchmark":{"benchmarkId":"charxiv-r","name":"CharXiv-R","category":null}},{"modelBenchmarkId":64150326,"benchmarkId":"collie","modelId":"gpt-4.5","score":0.723,"normalizedScore":0.723,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.265Z","updatedAt":"$D2025-07-19T19:56:15.265Z","benchmark":{"benchmarkId":"collie","name":"COLLIE","category":null}},{"modelBenchmarkId":92515032,"benchmarkId":"complexfuncbench","modelId":"gpt-4.5","score":0.63,"normalizedScore":0.63,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.351Z","updatedAt":"$D2025-07-19T19:56:15.351Z","benchmark":{"benchmarkId":"complexfuncbench","name":"ComplexFuncBench","category":null}},{"modelBenchmarkId":50627060,"benchmarkId":"gpqa","modelId":"gpt-4.5","score":0.695,"normalizedScore":0.695,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy (Diamond)","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.767Z","updatedAt":"$D2025-07-19T19:56:11.767Z","benchmark":{"benchmarkId":"gpqa","name":"GPQA","category":null}},{"modelBenchmarkId":59542837,"benchmarkId":"graphwalks-bfs-<128k","modelId":"gpt-4.5","score":0.723,"normalizedScore":0.723,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.372Z","updatedAt":"$D2025-07-19T19:56:15.372Z","benchmark":{"benchmarkId":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","category":null}},{"modelBenchmarkId":8093949,"benchmarkId":"graphwalks-parents-<128k","modelId":"gpt-4.5","score":0.726,"normalizedScore":0.726,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.315Z","updatedAt":"$D2025-07-19T19:56:15.315Z","benchmark":{"benchmarkId":"graphwalks-parents-<128k","name":"Graphwalks parents <128k","category":null}},{"modelBenchmarkId":15876659,"benchmarkId":"gsm8k","modelId":"gpt-4.5","score":0.97,"normalizedScore":0.97,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-4-5/","verifiedByLlmstats":false,"analysisMethod":"Answer accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.114Z","updatedAt":"$D2025-07-19T19:56:13.114Z","benchmark":{"benchmarkId":"gsm8k","name":"GSM8k","category":null}},{"modelBenchmarkId":91580885,"benchmarkId":"humaneval","modelId":"gpt-4.5","score":0.88,"normalizedScore":0.88,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-4-5/","verifiedByLlmstats":false,"analysisMethod":"Pass@1","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.694Z","updatedAt":"$D2025-07-19T19:56:12.694Z","benchmark":{"benchmarkId":"humaneval","name":"HumanEval","category":null}},{"modelBenchmarkId":48322518,"benchmarkId":"ifeval","modelId":"gpt-4.5","score":0.882,"normalizedScore":0.882,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.307Z","updatedAt":"$D2025-07-19T19:56:12.307Z","benchmark":{"benchmarkId":"ifeval","name":"IFEval","category":null}},{"modelBenchmarkId":20635464,"benchmarkId":"internal-api-instruction-following-(hard)","modelId":"gpt-4.5","score":0.54,"normalizedScore":0.54,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.234Z","updatedAt":"$D2025-07-19T19:56:15.234Z","benchmark":{"benchmarkId":"internal-api-instruction-following-(hard)","name":"Internal API instruction following (hard)","category":null}},{"modelBenchmarkId":90050962,"benchmarkId":"mathvista","modelId":"gpt-4.5","score":0.723,"normalizedScore":0.723,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.124Z","updatedAt":"$D2025-07-19T19:56:12.124Z","benchmark":{"benchmarkId":"mathvista","name":"MathVista","category":null}},{"modelBenchmarkId":31341070,"benchmarkId":"mmlu","modelId":"gpt-4.5","score":0.908,"normalizedScore":0.908,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Multiple-choice accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.328Z","updatedAt":"$D2025-07-19T19:56:11.328Z","benchmark":{"benchmarkId":"mmlu","name":"MMLU","category":null}},{"modelBenchmarkId":28258338,"benchmarkId":"mmmlu","modelId":"gpt-4.5","score":0.851,"normalizedScore":0.851,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.164Z","updatedAt":"$D2025-07-19T19:56:14.164Z","benchmark":{"benchmarkId":"mmmlu","name":"MMMLU","category":null}},{"modelBenchmarkId":39306676,"benchmarkId":"mmmu","modelId":"gpt-4.5","score":0.752,"normalizedScore":0.752,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.226Z","updatedAt":"$D2025-07-19T19:56:12.226Z","benchmark":{"benchmarkId":"mmmu","name":"MMMU","category":null}},{"modelBenchmarkId":31632380,"benchmarkId":"multichallenge","modelId":"gpt-4.5","score":0.438,"normalizedScore":0.438,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.563Z","updatedAt":"$D2025-07-19T19:56:12.563Z","benchmark":{"benchmarkId":"multichallenge","name":"Multi-Challenge","category":null}},{"modelBenchmarkId":72811645,"benchmarkId":"multichallenge-(o3-mini-grader)","modelId":"gpt-4.5","score":0.501,"normalizedScore":0.501,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.249Z","updatedAt":"$D2025-07-19T19:56:15.249Z","benchmark":{"benchmarkId":"multichallenge-(o3-mini-grader)","name":"MultiChallenge (o3-mini grader)","category":null}},{"modelBenchmarkId":33695047,"benchmarkId":"multi-if","modelId":"gpt-4.5","score":0.708,"normalizedScore":0.708,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.652Z","updatedAt":"$D2025-07-19T19:56:14.652Z","benchmark":{"benchmarkId":"multi-if","name":"Multi-IF","category":null}},{"modelBenchmarkId":94056303,"benchmarkId":"openai-mrcr:-2-needle-128k","modelId":"gpt-4.5","score":0.385,"normalizedScore":0.385,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.279Z","updatedAt":"$D2025-07-19T19:56:15.279Z","benchmark":{"benchmarkId":"openai-mrcr:-2-needle-128k","name":"OpenAI-MRCR: 2 needle 128k","category":null}},{"modelBenchmarkId":65314037,"benchmarkId":"simpleqa","modelId":"gpt-4.5","score":0.625,"normalizedScore":0.625,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-4-5/","verifiedByLlmstats":false,"analysisMethod":"accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.559Z","updatedAt":"$D2025-07-19T19:56:11.559Z","benchmark":{"benchmarkId":"simpleqa","name":"SimpleQA","category":null}},{"modelBenchmarkId":67188828,"benchmarkId":"swe-bench-verified","modelId":"gpt-4.5","score":0.38,"normalizedScore":0.38,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Success rate","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.863Z","updatedAt":"$D2025-07-19T19:56:13.863Z","benchmark":{"benchmarkId":"swe-bench-verified","name":"SWE-Bench Verified","category":null}},{"modelBenchmarkId":47543870,"benchmarkId":"swe-lancer","modelId":"gpt-4.5","score":0.373,"normalizedScore":0.373,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Success rate ($186K equivalent)","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.358Z","updatedAt":"$D2025-07-19T19:56:15.358Z","benchmark":{"benchmarkId":"swe-lancer","name":"SWE-Lancer","category":null}},{"modelBenchmarkId":11119247,"benchmarkId":"swe-lancer-(ic-diamond-subset)","modelId":"gpt-4.5","score":0.174,"normalizedScore":0.174,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Success rate ($41K equivalent)","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.365Z","updatedAt":"$D2025-07-19T19:56:15.365Z","benchmark":{"benchmarkId":"swe-lancer-(ic-diamond-subset)","name":"SWE-Lancer (IC-Diamond subset)","category":null}},{"modelBenchmarkId":93657025,"benchmarkId":"tau-bench-airline","modelId":"gpt-4.5","score":0.5,"normalizedScore":0.5,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.020Z","updatedAt":"$D2025-07-19T19:56:15.020Z","benchmark":{"benchmarkId":"tau-bench-airline","name":"TAU-bench Airline","category":null}},{"modelBenchmarkId":95800472,"benchmarkId":"tau-bench-retail","modelId":"gpt-4.5","score":0.684,"normalizedScore":0.684,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.989Z","updatedAt":"$D2025-07-19T19:56:14.989Z","benchmark":{"benchmarkId":"tau-bench-retail","name":"TAU-bench Retail","category":null}}],"providers":[{"modelProviderId":15186586,"modelId":"gpt-4.5","providerId":"openai","providerModelIdUsed":"gpt-4.5","deprecatedAt":null,"inputCentsPerMillionTokens":7500,"outputCentsPerMillionTokens":15000,"quantization":null,"maxInputTokens":128000,"maxOutputTokens":4096,"throughput":50,"latency":20,"featureWebSearch":false,"featureFunctionCalling":true,"featureStructuredOutput":true,"featureCodeExecution":false,"featureBatchInference":true,"featureFinetuning":false,"inputModalityText":true,"inputModalityImage":true,"inputModalityAudio":false,"inputModalityVideo":false,"outputModalityText":true,"outputModalityImage":false,"outputModalityAudio":false,"outputModalityVideo":false,"createdAt":"$D2025-07-19T19:49:17.148Z","updatedAt":"$D2025-07-19T19:49:17.148Z","modelName":"GPT-4.5","organizationId":"openai","provider":{"providerId":"openai","name":"OpenAI","website":"https://openai.com","createdAt":"$D2025-07-19T19:49:17.121Z","updatedAt":"$D2025-07-19T19:49:17.121Z"}}]},"commonBenchmarks":[{"benchmarkId":"aime-2024","name":"AIME 2024","category":null,"modelA":0.527,"modelB":0.367},{"benchmarkId":"gpqa","name":"GPQA","category":null,"modelA":0.338,"modelB":0.695},{"benchmarkId":"livecodebench","name":"LiveCodeBench","category":null,"modelA":0.169,"modelB":0},{"benchmarkId":"math-500","name":"MATH-500","category":null,"modelA":0.839,"modelB":0},{"benchmarkId":"aider-polyglot-edit","name":"Aider-Polyglot Edit","category":null,"modelA":0,"modelB":0.449},{"benchmarkId":"charxiv-d","name":"CharXiv-D","category":null,"modelA":0,"modelB":0.9},{"benchmarkId":"charxiv-r","name":"CharXiv-R","category":null,"modelA":0,"modelB":0.554},{"benchmarkId":"collie","name":"COLLIE","category":null,"modelA":0,"modelB":0.723},{"benchmarkId":"complexfuncbench","name":"ComplexFuncBench","category":null,"modelA":0,"modelB":0.63},{"benchmarkId":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","category":null,"modelA":0,"modelB":0.723},{"benchmarkId":"graphwalks-parents-<128k","name":"Graphwalks parents <128k","category":null,"modelA":0,"modelB":0.726},{"benchmarkId":"gsm8k","name":"GSM8k","category":null,"modelA":0,"modelB":0.97},{"benchmarkId":"humaneval","name":"HumanEval","category":null,"modelA":0,"modelB":0.88},{"benchmarkId":"ifeval","name":"IFEval","category":null,"modelA":0,"modelB":0.882},{"benchmarkId":"internal-api-instruction-following-(hard)","name":"Internal API instruction following (hard)","category":null,"modelA":0,"modelB":0.54},{"benchmarkId":"mathvista","name":"MathVista","category":null,"modelA":0,"modelB":0.723},{"benchmarkId":"mmlu","name":"MMLU","category":null,"modelA":0,"modelB":0.908},{"benchmarkId":"mmmlu","name":"MMMLU","category":null,"modelA":0,"modelB":0.851},{"benchmarkId":"mmmu","name":"MMMU","category":null,"modelA":0,"modelB":0.752},{"benchmarkId":"multichallenge","name":"Multi-Challenge","category":null,"modelA":0,"modelB":0.438},{"benchmarkId":"multichallenge-(o3-mini-grader)","name":"MultiChallenge (o3-mini grader)","category":null,"modelA":0,"modelB":0.501},{"benchmarkId":"multi-if","name":"Multi-IF","category":null,"modelA":0,"modelB":0.708},{"benchmarkId":"openai-mrcr:-2-needle-128k","name":"OpenAI-MRCR: 2 needle 128k","category":null,"modelA":0,"modelB":0.385},{"benchmarkId":"simpleqa","name":"SimpleQA","category":null,"modelA":0,"modelB":0.625},{"benchmarkId":"swe-bench-verified","name":"SWE-Bench Verified","category":null,"modelA":0,"modelB":0.38},{"benchmarkId":"swe-lancer","name":"SWE-Lancer","category":null,"modelA":0,"modelB":0.373},{"benchmarkId":"swe-lancer-(ic-diamond-subset)","name":"SWE-Lancer (IC-Diamond subset)","category":null,"modelA":0,"modelB":0.174},{"benchmarkId":"tau-bench-airline","name":"TAU-bench Airline","category":null,"modelA":0,"modelB":0.5},{"benchmarkId":"tau-bench-retail","name":"TAU-bench Retail","category":null,"modelA":0,"modelB":0.684}]}],"$L31"]}]

DeepSeek R1 Distill Qwen 1.5B vs GPT-4.5

Performance Metrics

Provider Availability & Performance