2b:["$","div",null,{"className":"space-y-4 max-w-4xl mx-auto","children":[["$","div",null,{"className":"text-center","children":[["$","h2",null,{"className":"text-2xl font-bold","children":"Performance Metrics"}],["$","p",null,{"className":"text-muted-foreground","children":"Context window and performance specifications"}]]}],["$","$L31",null,{"modelA":{"modelId":"gpt-4o-2024-08-06","name":"GPT-4o","organizationId":"openai","fineTunedFromModelId":null,"description":"GPT-4o ('o' for 'omni') is a multimodal AI model that accepts text, audio, image, and video inputs, and generates text, audio, and image outputs. It matches GPT-4 Turbo performance on text and code, with improvements in non-English languages, vision, and audio understanding.","releaseDate":"2024-08-06","announcementDate":"2024-08-06","licenseId":"proprietary","multimodal":true,"knowledgeCutoff":null,"paramCount":null,"trainingTokens":null,"contextWindow":null,"availableInZeroeval":true,"sourceApiRef":"https://platform.openai.com/docs/api-reference","sourcePlayground":"https://chat.openai.com/","sourcePaper":null,"sourceScorecardBlogLink":"https://openai.com/index/hello-gpt-4o/","sourceRepoLink":null,"sourceWeightsLink":null,"modelFamilyId":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.847Z","updatedAt":"$D2025-07-19T19:49:05.847Z","organization":{"organizationId":"openai","name":"OpenAI","website":"https://openai.com","description":"Leading AI research company","country":"US","manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.815Z","updatedAt":"$D2025-07-19T19:49:05.815Z"},"license":{"licenseId":"proprietary","name":"Proprietary","allowCommercial":false,"description":"Proprietary license - usage restrictions apply","createdAt":"$D2025-07-19T19:49:05.425Z","updatedAt":"$D2025-07-19T19:49:05.425Z"},"benchmarks":[{"modelBenchmarkId":82027458,"benchmarkId":"activitynet","modelId":"gpt-4o-2024-08-06","score":0.619,"normalizedScore":0.619,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/hello-gpt-4o/","verifiedByLlmstats":false,"analysisMethod":"test set evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.381Z","updatedAt":"$D2025-07-19T19:56:15.381Z","benchmark":{"benchmarkId":"activitynet","name":"ActivityNet","category":null}},{"modelBenchmarkId":9901310,"benchmarkId":"ai2d","modelId":"gpt-4o-2024-08-06","score":0.942,"normalizedScore":0.942,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/hello-gpt-4o/","verifiedByLlmstats":false,"analysisMethod":"test set evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.646Z","updatedAt":"$D2025-07-19T19:56:13.646Z","benchmark":{"benchmarkId":"ai2d","name":"AI2D","category":null}},{"modelBenchmarkId":75858158,"benchmarkId":"aider-polyglot","modelId":"gpt-4o-2024-08-06","score":0.307,"normalizedScore":0.307,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.391Z","updatedAt":"$D2025-07-19T19:56:12.391Z","benchmark":{"benchmarkId":"aider-polyglot","name":"Aider-Polyglot","category":null}},{"modelBenchmarkId":86790031,"benchmarkId":"aider-polyglot-edit","modelId":"gpt-4o-2024-08-06","score":0.182,"normalizedScore":0.182,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.810Z","updatedAt":"$D2025-07-19T19:56:13.810Z","benchmark":{"benchmarkId":"aider-polyglot-edit","name":"Aider-Polyglot Edit","category":null}},{"modelBenchmarkId":72525798,"benchmarkId":"aime-2024","modelId":"gpt-4o-2024-08-06","score":0.131,"normalizedScore":0.131,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.022Z","updatedAt":"$D2025-07-19T19:56:12.022Z","benchmark":{"benchmarkId":"aime-2024","name":"AIME 2024","category":null}},{"modelBenchmarkId":30270052,"benchmarkId":"chartqa","modelId":"gpt-4o-2024-08-06","score":0.857,"normalizedScore":0.857,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/hello-gpt-4o/","verifiedByLlmstats":false,"analysisMethod":"test set evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.824Z","updatedAt":"$D2025-07-19T19:56:12.824Z","benchmark":{"benchmarkId":"chartqa","name":"ChartQA","category":null}},{"modelBenchmarkId":85278020,"benchmarkId":"charxiv-d","modelId":"gpt-4o-2024-08-06","score":0.853,"normalizedScore":0.853,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.333Z","updatedAt":"$D2025-07-19T19:56:15.333Z","benchmark":{"benchmarkId":"charxiv-d","name":"CharXiv-D","category":null}},{"modelBenchmarkId":69532534,"benchmarkId":"charxiv-r","modelId":"gpt-4o-2024-08-06","score":0.588,"normalizedScore":0.588,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Scientific figure reasoning and interpretation.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.203Z","updatedAt":"$D2025-07-19T19:56:15.203Z","benchmark":{"benchmarkId":"charxiv-r","name":"CharXiv-R","category":null}},{"modelBenchmarkId":68527199,"benchmarkId":"collie","modelId":"gpt-4o-2024-08-06","score":0.61,"normalizedScore":0.61,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Instruction-following in freeform writing.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.262Z","updatedAt":"$D2025-07-19T19:56:15.262Z","benchmark":{"benchmarkId":"collie","name":"COLLIE","category":null}},{"modelBenchmarkId":10214599,"benchmarkId":"tau2-airline","modelId":"gpt-4o-2024-08-06","score":0.455,"normalizedScore":0.455,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Function calling benchmark (airline domain).","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"tau2-airline","name":"Tau2 Airline","category":null}},{"modelBenchmarkId":77281637,"benchmarkId":"tau2-retail","modelId":"gpt-4o-2024-08-06","score":0.634,"normalizedScore":0.634,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Function calling benchmark (retail domain).","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"tau2-retail","name":"Tau2 Retail","category":null}},{"modelBenchmarkId":48213237,"benchmarkId":"tau2-telecom","modelId":"gpt-4o-2024-08-06","score":0.235,"normalizedScore":0.235,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Function calling benchmark (telecom domain).","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"tau2-telecom","name":"Tau2 Telecom","category":null}},{"modelBenchmarkId":11898115,"benchmarkId":"mmmu-pro","modelId":"gpt-4o-2024-08-06","score":0.599,"normalizedScore":0.599,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Graduate-level visual problem-solving with advanced multimodal reasoning.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"mmmu-pro","name":"MMMU-Pro","category":null}},{"modelBenchmarkId":90257702,"benchmarkId":"videommmu","modelId":"gpt-4o-2024-08-06","score":0.612,"normalizedScore":0.612,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Video-based multimodal reasoning (max frame 256).","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"videommmu","name":"VideoMMMU","category":null}},{"modelBenchmarkId":4735901,"benchmarkId":"erqa","modelId":"gpt-4o-2024-08-06","score":0.352,"normalizedScore":0.352,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Multimodal spatial reasoning.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"erqa","name":"ERQA","category":null}},{"modelBenchmarkId":71881930,"benchmarkId":"complexfuncbench","modelId":"gpt-4o-2024-08-06","score":0.665,"normalizedScore":0.665,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.349Z","updatedAt":"$D2025-07-19T19:56:15.349Z","benchmark":{"benchmarkId":"complexfuncbench","name":"ComplexFuncBench","category":null}},{"modelBenchmarkId":49407182,"benchmarkId":"docvqa","modelId":"gpt-4o-2024-08-06","score":0.928,"normalizedScore":0.928,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/hello-gpt-4o/","verifiedByLlmstats":false,"analysisMethod":"test set evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.873Z","updatedAt":"$D2025-07-19T19:56:12.873Z","benchmark":{"benchmarkId":"docvqa","name":"DocVQA","category":null}},{"modelBenchmarkId":58201271,"benchmarkId":"egoschema","modelId":"gpt-4o-2024-08-06","score":0.722,"normalizedScore":0.722,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/hello-gpt-4o/","verifiedByLlmstats":false,"analysisMethod":"test set evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.935Z","updatedAt":"$D2025-07-19T19:56:12.935Z","benchmark":{"benchmarkId":"egoschema","name":"EgoSchema","category":null}},{"modelBenchmarkId":50137716,"benchmarkId":"gpqa","modelId":"gpt-4o-2024-08-06","score":0.701,"normalizedScore":0.701,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o - Diamond no thinking no tools","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.764Z","updatedAt":"$D2025-07-19T19:56:11.764Z","benchmark":{"benchmarkId":"gpqa","name":"GPQA","category":null}},{"modelBenchmarkId":42366721,"benchmarkId":"graphwalks-bfs-<128k","modelId":"gpt-4o-2024-08-06","score":0.417,"normalizedScore":0.417,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.370Z","updatedAt":"$D2025-07-19T19:56:15.370Z","benchmark":{"benchmarkId":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","category":null}},{"modelBenchmarkId":86688494,"benchmarkId":"graphwalks-parents-<128k","modelId":"gpt-4o-2024-08-06","score":0.354,"normalizedScore":0.354,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.314Z","updatedAt":"$D2025-07-19T19:56:15.314Z","benchmark":{"benchmarkId":"graphwalks-parents-<128k","name":"Graphwalks parents <128k","category":null}},{"modelBenchmarkId":29013731,"benchmarkId":"ifeval","modelId":"gpt-4o-2024-08-06","score":0.81,"normalizedScore":0.81,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.306Z","updatedAt":"$D2025-07-19T19:56:12.306Z","benchmark":{"benchmarkId":"ifeval","name":"IFEval","category":null}},{"modelBenchmarkId":27887959,"benchmarkId":"internal-api-instruction-following-(hard)","modelId":"gpt-4o-2024-08-06","score":0.292,"normalizedScore":0.292,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.232Z","updatedAt":"$D2025-07-19T19:56:15.232Z","benchmark":{"benchmarkId":"internal-api-instruction-following-(hard)","name":"Internal API instruction following (hard)","category":null}},{"modelBenchmarkId":12644384,"benchmarkId":"mathvista","modelId":"gpt-4o-2024-08-06","score":0.614,"normalizedScore":0.614,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.122Z","updatedAt":"$D2025-07-19T19:56:12.122Z","benchmark":{"benchmarkId":"mathvista","name":"MathVista","category":null}},{"modelBenchmarkId":36299037,"benchmarkId":"mmlu","modelId":"gpt-4o-2024-08-06","score":0.857,"normalizedScore":0.857,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.325Z","updatedAt":"$D2025-07-19T19:56:11.325Z","benchmark":{"benchmarkId":"mmlu","name":"MMLU","category":null}},{"modelBenchmarkId":94531393,"benchmarkId":"mmlu-pro","modelId":"gpt-4o-2024-08-06","score":0.747,"normalizedScore":0.747,"isSelfReported":true,"selfReportedSourceLink":"https://huggingface.co/spaces/TIGER-Lab/MMLU-Pro","verifiedByLlmstats":false,"analysisMethod":"0-shot CoT","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.517Z","updatedAt":"$D2025-07-19T19:56:11.517Z","benchmark":{"benchmarkId":"mmlu-pro","name":"MMLU-Pro","category":null}},{"modelBenchmarkId":1300524,"benchmarkId":"mmmlu","modelId":"gpt-4o-2024-08-06","score":0.814,"normalizedScore":0.814,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.162Z","updatedAt":"$D2025-07-19T19:56:14.162Z","benchmark":{"benchmarkId":"mmmlu","name":"MMMLU","category":null}},{"modelBenchmarkId":5818007,"benchmarkId":"mmmu","modelId":"gpt-4o-2024-08-06","score":0.722,"normalizedScore":0.722,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - College-level visual problem-solving with multimodal reasoning.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:12.224Z","updatedAt":"$D2025-07-19T19:56:12.224Z","benchmark":{"benchmarkId":"mmmu","name":"MMMU","category":null}},{"modelBenchmarkId":77155674,"benchmarkId":"multichallenge-(o3-mini-grader)","modelId":"gpt-4o-2024-08-06","score":0.399,"normalizedScore":0.399,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.246Z","updatedAt":"$D2025-07-19T19:56:15.246Z","benchmark":{"benchmarkId":"multichallenge-(o3-mini-grader)","name":"MultiChallenge (o3-mini grader)","category":null}},{"modelBenchmarkId":41553531,"benchmarkId":"multi-if","modelId":"gpt-4o-2024-08-06","score":0.609,"normalizedScore":0.609,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.650Z","updatedAt":"$D2025-07-19T19:56:14.650Z","benchmark":{"benchmarkId":"multi-if","name":"Multi-IF","category":null}},{"modelBenchmarkId":5002339,"benchmarkId":"openai-mrcr:-2-needle-128k","modelId":"gpt-4o-2024-08-06","score":0.319,"normalizedScore":0.319,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.277Z","updatedAt":"$D2025-07-19T19:56:15.277Z","benchmark":{"benchmarkId":"openai-mrcr:-2-needle-128k","name":"OpenAI-MRCR: 2 needle 128k","category":null}},{"modelBenchmarkId":90472802,"benchmarkId":"simpleqa","modelId":"gpt-4o-2024-08-06","score":0.382,"normalizedScore":0.382,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/introducing-gpt-4-5/","verifiedByLlmstats":false,"analysisMethod":"accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.557Z","updatedAt":"$D2025-07-19T19:56:11.557Z","benchmark":{"benchmarkId":"simpleqa","name":"SimpleQA","category":null}},{"modelBenchmarkId":41052645,"benchmarkId":"swe-bench-verified","modelId":"gpt-4o-2024-08-06","score":0.332,"normalizedScore":0.332,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.861Z","updatedAt":"$D2025-07-19T19:56:13.861Z","benchmark":{"benchmarkId":"swe-bench-verified","name":"SWE-Bench Verified","category":null}},{"modelBenchmarkId":57894005,"benchmarkId":"swe-lancer","modelId":"gpt-4o-2024-08-06","score":0.326,"normalizedScore":0.326,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"percentage score","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.356Z","updatedAt":"$D2025-07-19T19:56:15.356Z","benchmark":{"benchmarkId":"swe-lancer","name":"SWE-Lancer","category":null}},{"modelBenchmarkId":98505218,"benchmarkId":"swe-lancer-(ic-diamond-subset)","modelId":"gpt-4o-2024-08-06","score":0.124,"normalizedScore":0.124,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"percentage score","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.363Z","updatedAt":"$D2025-07-19T19:56:15.363Z","benchmark":{"benchmarkId":"swe-lancer-(ic-diamond-subset)","name":"SWE-Lancer (IC-Diamond subset)","category":null}},{"modelBenchmarkId":35765279,"benchmarkId":"tau-bench-airline","modelId":"gpt-4o-2024-08-06","score":0.428,"normalizedScore":0.428,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.017Z","updatedAt":"$D2025-07-19T19:56:15.017Z","benchmark":{"benchmarkId":"tau-bench-airline","name":"TAU-bench Airline","category":null}},{"modelBenchmarkId":84446480,"benchmarkId":"tau-bench-retail","modelId":"gpt-4o-2024-08-06","score":0.603,"normalizedScore":0.603,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-4-1/","verifiedByLlmstats":false,"analysisMethod":"Accuracy","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.988Z","updatedAt":"$D2025-07-19T19:56:14.988Z","benchmark":{"benchmarkId":"tau-bench-retail","name":"TAU-bench Retail","category":null}},{"modelBenchmarkId":57070510,"benchmarkId":"humanity's-last-exam","modelId":"gpt-4o-2024-08-06","score":0.053,"normalizedScore":0.053,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode (no tools) - Full set of expert-level questions across subjects.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"humanity's-last-exam","name":"Humanity's Last Exam","category":null}},{"modelBenchmarkId":71970825,"benchmarkId":"scale-multichallenge","modelId":"gpt-4o-2024-08-06","score":0.403,"normalizedScore":0.403,"isSelfReported":true,"selfReportedSourceLink":"https://openai.com/index/gpt-5/","verifiedByLlmstats":false,"analysisMethod":"GPT-4o without thinking mode - Multi-turn instruction following benchmark.","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-24T12:00:00.000Z","updatedAt":"$D2025-07-24T12:00:00.000Z","benchmark":{"benchmarkId":"scale-multichallenge","name":"Scale MultiChallenge","category":null}}],"providers":[{"modelProviderId":46482582,"modelId":"gpt-4o-2024-08-06","providerId":"azure","providerModelIdUsed":"gpt-4o-2024-08-06","deprecatedAt":null,"inputCentsPerMillionTokens":250,"outputCentsPerMillionTokens":1000,"quantization":null,"maxInputTokens":128000,"maxOutputTokens":16384,"throughput":99,"latency":0.53,"featureWebSearch":false,"featureFunctionCalling":true,"featureStructuredOutput":true,"featureCodeExecution":false,"featureBatchInference":true,"featureFinetuning":false,"inputModalityText":true,"inputModalityImage":true,"inputModalityAudio":false,"inputModalityVideo":false,"outputModalityText":true,"outputModalityImage":false,"outputModalityAudio":false,"outputModalityVideo":false,"createdAt":"$D2025-07-19T19:49:16.765Z","updatedAt":"$D2025-07-19T19:49:16.765Z","modelName":"GPT-4o","organizationId":"openai","provider":{"providerId":"azure","name":"Azure","website":"https://azure.microsoft.com","createdAt":"$D2025-07-19T19:49:16.749Z","updatedAt":"$D2025-07-19T19:49:16.749Z"}},{"modelProviderId":90523067,"modelId":"gpt-4o-2024-08-06","providerId":"openai","providerModelIdUsed":"gpt-4o-2024-08-06","deprecatedAt":null,"inputCentsPerMillionTokens":250,"outputCentsPerMillionTokens":1000,"quantization":null,"maxInputTokens":128000,"maxOutputTokens":16384,"throughput":132,"latency":0.5,"featureWebSearch":false,"featureFunctionCalling":true,"featureStructuredOutput":true,"featureCodeExecution":false,"featureBatchInference":true,"featureFinetuning":false,"inputModalityText":true,"inputModalityImage":true,"inputModalityAudio":false,"inputModalityVideo":false,"outputModalityText":true,"outputModalityImage":false,"outputModalityAudio":false,"outputModalityVideo":false,"createdAt":"$D2025-07-19T19:49:17.130Z","updatedAt":"$D2025-07-19T19:49:17.130Z","modelName":"GPT-4o","organizationId":"openai","provider":{"providerId":"openai","name":"OpenAI","website":"https://openai.com","createdAt":"$D2025-07-19T19:49:17.121Z","updatedAt":"$D2025-07-19T19:49:17.121Z"}}]},"modelB":{"modelId":"llama-3.1-nemotron-70b-instruct","name":"Llama 3.1 Nemotron 70B Instruct","organizationId":"nvidia","fineTunedFromModelId":"llama-3.1-70b-instruct","description":"A large language model customized by NVIDIA to improve the helpfulness of LLM generated responses. It is a fine-tuned version of Llama 3.1 70B Instruct. The model was trained using RLHF (REINFORCE) with HelpSteer2-Preference prompts.","releaseDate":"2024-10-01","announcementDate":"2024-10-01","licenseId":"llama_3_1_community_license","multimodal":false,"knowledgeCutoff":"2023-12-01","paramCount":"$n70000000000","trainingTokens":null,"contextWindow":null,"availableInZeroeval":true,"sourceApiRef":"https://build.nvidia.com/nvidia/llama-3_1-nemotron-70b-instruct","sourcePlayground":null,"sourcePaper":"https://arxiv.org/abs/2410.01257","sourceScorecardBlogLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","sourceRepoLink":null,"sourceWeightsLink":"https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct","modelFamilyId":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.908Z","updatedAt":"$D2025-10-03T17:22:49.553Z","organization":{"organizationId":"nvidia","name":"NVIDIA","website":"https://nvidia.com","description":"GPU and AI company","country":"US","manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:49:05.728Z","updatedAt":"$D2025-07-19T19:49:05.728Z"},"license":{"licenseId":"llama_3_1_community_license","name":"Llama 3.1 Community License","allowCommercial":false,"description":"Llama 3.1 Community License license","createdAt":"$D2025-07-19T19:49:05.574Z","updatedAt":"$D2025-07-19T19:49:05.574Z"},"benchmarks":[{"modelBenchmarkId":43218499,"benchmarkId":"arc-c","modelId":"llama-3.1-nemotron-70b-instruct","score":0.692,"normalizedScore":0.692,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.133Z","updatedAt":"$D2025-07-19T19:56:11.133Z","benchmark":{"benchmarkId":"arc-c","name":"ARC-C","category":null}},{"modelBenchmarkId":79448422,"benchmarkId":"gsm8k","modelId":"llama-3.1-nemotron-70b-instruct","score":0.9143,"normalizedScore":0.9143,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:13.099Z","updatedAt":"$D2025-07-19T19:56:13.099Z","benchmark":{"benchmarkId":"gsm8k","name":"GSM8k","category":null}},{"modelBenchmarkId":847742,"benchmarkId":"gsm8k-chat","modelId":"llama-3.1-nemotron-70b-instruct","score":0.8188,"normalizedScore":0.8188,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Chat evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.104Z","updatedAt":"$D2025-07-19T19:56:15.104Z","benchmark":{"benchmarkId":"gsm8k-chat","name":"GSM8K Chat","category":null}},{"modelBenchmarkId":71450147,"benchmarkId":"hellaswag","modelId":"llama-3.1-nemotron-70b-instruct","score":0.8558,"normalizedScore":0.8558,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.188Z","updatedAt":"$D2025-07-19T19:56:11.188Z","benchmark":{"benchmarkId":"hellaswag","name":"HellaSwag","category":null}},{"modelBenchmarkId":26064337,"benchmarkId":"instruct-humaneval","modelId":"llama-3.1-nemotron-70b-instruct","score":0.7384,"normalizedScore":0.7384,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Code evaluation (n=20)","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.108Z","updatedAt":"$D2025-07-19T19:56:15.108Z","benchmark":{"benchmarkId":"instruct-humaneval","name":"Instruct HumanEval","category":null}},{"modelBenchmarkId":21630144,"benchmarkId":"mmlu","modelId":"llama-3.1-nemotron-70b-instruct","score":0.802,"normalizedScore":0.802,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.292Z","updatedAt":"$D2025-07-19T19:56:11.292Z","benchmark":{"benchmarkId":"mmlu","name":"MMLU","category":null}},{"modelBenchmarkId":43719922,"benchmarkId":"mmlu-chat","modelId":"llama-3.1-nemotron-70b-instruct","score":0.8058,"normalizedScore":0.8058,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Chat evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.100Z","updatedAt":"$D2025-07-19T19:56:15.100Z","benchmark":{"benchmarkId":"mmlu-chat","name":"MMLU Chat","category":null}},{"modelBenchmarkId":74516605,"benchmarkId":"mt-bench","modelId":"llama-3.1-nemotron-70b-instruct","score":0.0899,"normalizedScore":0.0899,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Chat evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:14.532Z","updatedAt":"$D2025-07-19T19:56:14.532Z","benchmark":{"benchmarkId":"mt-bench","name":"MT-Bench","category":null}},{"modelBenchmarkId":44931101,"benchmarkId":"truthfulqa","modelId":"llama-3.1-nemotron-70b-instruct","score":0.5863,"normalizedScore":0.5863,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.363Z","updatedAt":"$D2025-07-19T19:56:11.363Z","benchmark":{"benchmarkId":"truthfulqa","name":"TruthfulQA","category":null}},{"modelBenchmarkId":16921630,"benchmarkId":"winogrande","modelId":"llama-3.1-nemotron-70b-instruct","score":0.8453,"normalizedScore":0.8453,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:11.390Z","updatedAt":"$D2025-07-19T19:56:11.390Z","benchmark":{"benchmarkId":"winogrande","name":"Winogrande","category":null}},{"modelBenchmarkId":38184373,"benchmarkId":"xlsum-english","modelId":"llama-3.1-nemotron-70b-instruct","score":0.3161,"normalizedScore":0.3161,"isSelfReported":true,"selfReportedSourceLink":"https://developer.nvidia.com/blog/advancing-the-accuracy-efficiency-frontier-with-llama-3-1-nemotron-51b/","verifiedByLlmstats":false,"analysisMethod":"Standard evaluation","verificationProviderId":null,"verificationHardware":null,"verificationDate":null,"verificationNotes":null,"manualEdits":null,"editedBy":null,"editedAt":null,"createdAt":"$D2025-07-19T19:56:15.094Z","updatedAt":"$D2025-07-19T19:56:15.094Z","benchmark":{"benchmarkId":"xlsum-english","name":"XLSum English","category":null}}],"providers":[]},"commonBenchmarks":[{"benchmarkId":"activitynet","name":"ActivityNet","category":null,"modelA":0.619,"modelB":0},{"benchmarkId":"ai2d","name":"AI2D","category":null,"modelA":0.942,"modelB":0},{"benchmarkId":"aider-polyglot","name":"Aider-Polyglot","category":null,"modelA":0.307,"modelB":0},{"benchmarkId":"aider-polyglot-edit","name":"Aider-Polyglot Edit","category":null,"modelA":0.182,"modelB":0},{"benchmarkId":"aime-2024","name":"AIME 2024","category":null,"modelA":0.131,"modelB":0},{"benchmarkId":"chartqa","name":"ChartQA","category":null,"modelA":0.857,"modelB":0},{"benchmarkId":"charxiv-d","name":"CharXiv-D","category":null,"modelA":0.853,"modelB":0},{"benchmarkId":"charxiv-r","name":"CharXiv-R","category":null,"modelA":0.588,"modelB":0},{"benchmarkId":"collie","name":"COLLIE","category":null,"modelA":0.61,"modelB":0},{"benchmarkId":"tau2-airline","name":"Tau2 Airline","category":null,"modelA":0.455,"modelB":0},{"benchmarkId":"tau2-retail","name":"Tau2 Retail","category":null,"modelA":0.634,"modelB":0},{"benchmarkId":"tau2-telecom","name":"Tau2 Telecom","category":null,"modelA":0.235,"modelB":0},{"benchmarkId":"mmmu-pro","name":"MMMU-Pro","category":null,"modelA":0.599,"modelB":0},{"benchmarkId":"videommmu","name":"VideoMMMU","category":null,"modelA":0.612,"modelB":0},{"benchmarkId":"erqa","name":"ERQA","category":null,"modelA":0.352,"modelB":0},{"benchmarkId":"complexfuncbench","name":"ComplexFuncBench","category":null,"modelA":0.665,"modelB":0},{"benchmarkId":"docvqa","name":"DocVQA","category":null,"modelA":0.928,"modelB":0},{"benchmarkId":"egoschema","name":"EgoSchema","category":null,"modelA":0.722,"modelB":0},{"benchmarkId":"gpqa","name":"GPQA","category":null,"modelA":0.701,"modelB":0},{"benchmarkId":"graphwalks-bfs-<128k","name":"Graphwalks BFS <128k","category":null,"modelA":0.417,"modelB":0},{"benchmarkId":"graphwalks-parents-<128k","name":"Graphwalks parents <128k","category":null,"modelA":0.354,"modelB":0},{"benchmarkId":"ifeval","name":"IFEval","category":null,"modelA":0.81,"modelB":0},{"benchmarkId":"internal-api-instruction-following-(hard)","name":"Internal API instruction following (hard)","category":null,"modelA":0.292,"modelB":0},{"benchmarkId":"mathvista","name":"MathVista","category":null,"modelA":0.614,"modelB":0},{"benchmarkId":"mmlu","name":"MMLU","category":null,"modelA":0.857,"modelB":0.802},{"benchmarkId":"mmlu-pro","name":"MMLU-Pro","category":null,"modelA":0.747,"modelB":0},{"benchmarkId":"mmmlu","name":"MMMLU","category":null,"modelA":0.814,"modelB":0},{"benchmarkId":"mmmu","name":"MMMU","category":null,"modelA":0.722,"modelB":0},{"benchmarkId":"multichallenge-(o3-mini-grader)","name":"MultiChallenge (o3-mini grader)","category":null,"modelA":0.399,"modelB":0},{"benchmarkId":"multi-if","name":"Multi-IF","category":null,"modelA":0.609,"modelB":0},{"benchmarkId":"openai-mrcr:-2-needle-128k","name":"OpenAI-MRCR: 2 needle 128k","category":null,"modelA":0.319,"modelB":0},{"benchmarkId":"simpleqa","name":"SimpleQA","category":null,"modelA":0.382,"modelB":0},{"benchmarkId":"swe-bench-verified","name":"SWE-Bench Verified","category":null,"modelA":0.332,"modelB":0},{"benchmarkId":"swe-lancer","name":"SWE-Lancer","category":null,"modelA":0.326,"modelB":0},{"benchmarkId":"swe-lancer-(ic-diamond-subset)","name":"SWE-Lancer (IC-Diamond subset)","category":null,"modelA":0.124,"modelB":0},{"benchmarkId":"tau-bench-airline","name":"TAU-bench Airline","category":null,"modelA":0.428,"modelB":0},{"benchmarkId":"tau-bench-retail","name":"TAU-bench Retail","category":null,"modelA":0.603,"modelB":0},{"benchmarkId":"humanity's-last-exam","name":"Humanity's Last Exam","category":null,"modelA":0.053,"modelB":0},{"benchmarkId":"scale-multichallenge","name":"Scale MultiChallenge","category":null,"modelA":0.403,"modelB":0},{"benchmarkId":"arc-c","name":"ARC-C","category":null,"modelA":0,"modelB":0.692},{"benchmarkId":"gsm8k","name":"GSM8k","category":null,"modelA":0,"modelB":0.9143},{"benchmarkId":"gsm8k-chat","name":"GSM8K Chat","category":null,"modelA":0,"modelB":0.8188},{"benchmarkId":"hellaswag","name":"HellaSwag","category":null,"modelA":0,"modelB":0.8558},{"benchmarkId":"instruct-humaneval","name":"Instruct HumanEval","category":null,"modelA":0,"modelB":0.7384},{"benchmarkId":"mmlu-chat","name":"MMLU Chat","category":null,"modelA":0,"modelB":0.8058},{"benchmarkId":"mt-bench","name":"MT-Bench","category":null,"modelA":0,"modelB":0.0899},{"benchmarkId":"truthfulqa","name":"TruthfulQA","category":null,"modelA":0,"modelB":0.5863},{"benchmarkId":"winogrande","name":"Winogrande","category":null,"modelA":0,"modelB":0.8453},{"benchmarkId":"xlsum-english","name":"XLSum English","category":null,"modelA":0,"modelB":0.3161}]}],"$L32"]}]

GPT-4o vs Llama 3.1 Nemotron 70B Instruct

Performance Metrics

Provider Availability & Performance