Ru Arena General

{
  • "headers": [
    • "model",
    • "score",
    • "95% CI",
    • "lower",
    • "upper",
    • "avg_tokens",
    • "std_tokens",
    • "lc_score"
    ],
  • "data": [
    • [
      • "DeepSeek - DeepSeek-V3-Chat",
      • 96.3,
      • "+0.7 / -0.8",
      • 95.51,
      • 97.02,
      • 665.97,
      • 504.83,
      • 56.62
      ],
    • [
      • "-chatgpt-4o-latest",
      • 94.75,
      • "+0.8 / -0.7",
      • 94.03,
      • 95.53,
      • 693.15,
      • 634.2,
      • 56.4
      ],
    • [
      • "yi-lightning",
      • 93.46,
      • "+0.9 / -0.9",
      • 92.51,
      • 94.33,
      • 636.68,
      • 469.74,
      • 56.22
      ],
    • [
      • "o1-mini",
      • 93.45,
      • "+0.7 / -0.8",
      • 92.61,
      • 94.19,
      • 791.18,
      • 647.74,
      • 56.22
      ],
    • [
      • "RefalMachine-RuadaptQwen-32B-Pro_v1",
      • 92.18,
      • "+1.2 / -1.2",
      • 90.99,
      • 93.43,
      • 563.43,
      • 387.83,
      • 56.04
      ],
    • [
      • "claude-3-opus-20240229",
      • 91.3,
      • "+1.0 / -1.1",
      • 90.17,
      • 92.34,
      • 468.69,
      • 254.1,
      • 55.92
      ],
    • [
      • "gpt-4-1106-preview",
      • 90.89,
      • "+1.1 / -1.1",
      • 89.78,
      • 91.97,
      • 541.66,
      • 346.59,
      • 55.86
      ],
    • [
      • "T-Tech-T-pro-it-1.0",
      • 90.87,
      • "+1.1 / -1.2",
      • 89.69,
      • 92.02,
      • 502,
      • 380.68,
      • 55.85
      ],
    • [
      • "o1-preview",
      • 90.8,
      • "+0.9 / -1.0",
      • 89.84,
      • 91.73,
      • 664.89,
      • 601.34,
      • 55.84
      ],
    • [
      • "RefalMachine-RuadaptQwen2.5-32B-instruct-v1",
      • 90.47,
      • "+1.1 / -1.0",
      • 89.48,
      • 91.55,
      • 527.86,
      • 366.54,
      • 55.8
      ],
    • [
      • "SberDevices-GigaChatMaxWithoutFilter",
      • 89.96,
      • "+1.2 / -1.4",
      • 88.52,
      • 91.14,
      • 523.95,
      • 421.87,
      • 55.73
      ],
    • [
      • "DeepSeek, Inc.-DeepSeek-V2-Chat-0628",
      • 89.67,
      • "+1.3 / -1.1",
      • 88.61,
      • 90.96,
      • 514.79,
      • 340.79,
      • 55.68
      ],
    • [
      • "gemini-1.5-pro-002",
      • 89.08,
      • "+1.3 / -1.0",
      • 88.12,
      • 90.34,
      • 639.51,
      • 493.3,
      • 55.6
      ],
    • [
      • "gemini-1.5-pro-exp-0801",
      • 88.89,
      • "+1.0 / -1.0",
      • 87.93,
      • 89.85,
      • 547.91,
      • 411.58,
      • 55.57
      ],
    • [
      • "RefalMachine-RuadaptQwen2.5-14B-Instruct-v1",
      • 88.63,
      • "+1.4 / -1.0",
      • 87.59,
      • 90.04,
      • 572.22,
      • 397.38,
      • 55.54
      ],
    • [
      • "RefalMachine-RuadaptQwen2.5-7B-Lite-v1",
      • 88.6,
      • "+1.0 / -1.2",
      • 87.4,
      • 89.57,
      • 580.95,
      • 402.27,
      • 55.53
      ],
    • [
      • "Qwen-Qwen2.5-72B-Instruct",
      • 88.25,
      • "+1.0 / -1.5",
      • 86.77,
      • 89.25,
      • 557.41,
      • 437.32,
      • 55.48
      ],
    • [
      • "claude-3-5-sonnet-20240620",
      • 88.17,
      • "+1.4 / -1.1",
      • 87.04,
      • 89.55,
      • 387.42,
      • 248.97,
      • 55.47
      ],
    • [
      • "ZeroAgency.ru-Zero-Mistral-Small-24B-Instruct-2501",
      • 87.43,
      • "+1.4 / -1.2",
      • 86.25,
      • 88.85,
      • 565.19,
      • 339.27,
      • 55.37
      ],
    • [
      • "Vikhrmodels-vikhr-nemo-12b-instruct-r-21-09-24",
      • 87.32,
      • "+1.1 / -1.4",
      • 85.87,
      • 88.45,
      • 627,
      • 416.72,
      • 55.35
      ],
    • [
      • "Google-gemma-2-27b-it",
      • 86.87,
      • "+1.3 / -1.7",
      • 85.16,
      • 88.2,
      • 472.79,
      • 336.56,
      • 55.29
      ],
    • [
      • "SberDevices-GigaChatMaxDefault",
      • 86.7,
      • "+1.7 / -1.3",
      • 85.38,
      • 88.36,
      • 515.48,
      • 429.13,
      • 55.26
      ],
    • [
      • "IlyaGusev-IlyaGusevvikhr_nemo_orpo_dostoevsky_12b_slerp",
      • 86.64,
      • "+1.2 / -1.3",
      • 85.33,
      • 87.81,
      • 634.57,
      • 405.24,
      • 55.25
      ],
    • [
      • "-Phi-4",
      • 86.59,
      • "+1.1 / -1.3",
      • 85.28,
      • 87.66,
      • 641.7,
      • 439.8,
      • 55.25
      ],
    • [
      • "IlyaGusev-IlyaGusevsaiga_nemo_12b_sft_m9_d14_simpo_m19_d31",
      • 85.58,
      • "+1.4 / -1.1",
      • 84.51,
      • 86.95,
      • 649.12,
      • 450.98,
      • 55.1
      ],
    • [
      • "T-Tech-T-lite-it-1.0",
      • 84.98,
      • "+1.3 / -1.3",
      • 83.68,
      • 86.24,
      • 544.92,
      • 454.22,
      • 55.02
      ],
    • [
      • "claude-3-sonnet-20240229",
      • 84.27,
      • "+1.5 / -1.5",
      • 82.77,
      • 85.81,
      • 432.07,
      • 248.67,
      • 54.92
      ],
    • [
      • "gpt-4o-mini",
      • 83.9,
      • "+1.7 / -1.6",
      • 82.26,
      • 85.63,
      • 448.12,
      • 367.72,
      • 54.86
      ],
    • [
      • "IlyaGusev-IlyaGusevsaiga_nemo_12b_sft_m9_d14_simpo_m22_d34",
      • 83.85,
      • "+1.2 / -1.4",
      • 82.48,
      • 85.06,
      • 552.85,
      • 431.47,
      • 54.86
      ],
    • [
      • "llama-3.1-70b-instruct",
      • 83.26,
      • "+1.4 / -1.1",
      • 82.17,
      • 84.71,
      • 537.53,
      • 428.6,
      • 54.77
      ],
    • [
      • "RefalMachine-ruadapt_qwen2.5_7B_ext_u48_instruct_v3",
      • 81.92,
      • "+1.6 / -1.3",
      • 80.59,
      • 83.52,
      • 555.9,
      • 383.32,
      • 54.58
      ],
    • [
      • "claude-3-haiku-20240307",
      • 80.74,
      • "+1.5 / -1.1",
      • 79.62,
      • 82.24,
      • 394.8,
      • 221.79,
      • 54.41
      ],
    • [
      • "mistral-large-2407",
      • 80.39,
      • "+2.1 / -1.5",
      • 78.87,
      • 82.52,
      • 422.42,
      • 320.72,
      • 54.36
      ],
    • [
      • "gemini-1.5-flash-8b-exp-0827",
      • 80.17,
      • "+1.6 / -1.5",
      • 78.66,
      • 81.8,
      • 596.56,
      • 527.68,
      • 54.33
      ],
    • [
      • "IlyaGusev-saiga-nemo_v3",
      • 79.89,
      • "+1.8 / -1.3",
      • 78.55,
      • 81.71,
      • 550.91,
      • 463.81,
      • 54.29
      ],
    • [
      • "RefalMachine-ruadapt_qwen2.5_7B_ext_u48_instruct_v1",
      • 78.67,
      • "+1.3 / -2.0",
      • 76.67,
      • 79.94,
      • 539.24,
      • 377.67,
      • 54.12
      ],
    • [
      • "RefalMachine-ruadapt_qwen2.5_7B_ext_u48_instruct_v2",
      • 78.56,
      • "+1.2 / -1.8",
      • 76.72,
      • 79.73,
      • 547.74,
      • 380.85,
      • 54.1
      ],
    • [
      • "DeepSeek - DeepSeek-R1-Distill-Qwen-32B--temp-0.6--think-excluded",
      • 77.28,
      • "+1.6 / -2.0",
      • 75.32,
      • 78.84,
      • 452.09,
      • 402.34,
      • 53.92
      ],
    • [
      • "command-r-plus",
      • 77.17,
      • "+1.5 / -1.5",
      • 75.63,
      • 78.62,
      • 560.83,
      • 424.07,
      • 53.9
      ],
    • [
      • "gemma-2-9b-it",
      • 76.5,
      • "+1.3 / -1.2",
      • 75.25,
      • 77.8,
      • 459.15,
      • 312.59,
      • 53.81
      ],
    • [
      • "Qwen-Qwen2.5-7B-Instruct",
      • 76.03,
      • "+1.4 / -1.9",
      • 74.16,
      • 77.41,
      • 484.87,
      • 383.86,
      • 53.74
      ],
    • [
      • "SberDevices-GigaChat-20B-A3B-instruct-v1.5 ",
      • 74.43,
      • "+1.6 / -1.7",
      • 72.76,
      • 76.04,
      • 538.06,
      • 447.81,
      • 53.51
      ],
    • [
      • "SberDevices-GigaChat-20B-A3B-instruct-v1.5",
      • 73.81,
      • "+2.0 / -1.6",
      • 72.21,
      • 75.8,
      • 538.06,
      • 447.81,
      • 53.42
      ],
    • [
      • "gemma-2-9b-it-sppo-iter3",
      • 73.61,
      • "+1.9 / -2.1",
      • 71.47,
      • 75.48,
      • 509.66,
      • 336.7,
      • 53.39
      ],
    • [
      • "llama-3.1-405b-instruct",
      • 73.39,
      • "+2.0 / -2.1",
      • 71.32,
      • 75.35,
      • 435.44,
      • 372.72,
      • 53.36
      ],
    • [
      • "Attention Signs-Watari-7b-v1",
      • 69.49,
      • "+1.8 / -1.7",
      • 67.78,
      • 71.24,
      • 616.8,
      • 449.25,
      • 52.8
      ],
    • [
      • "IlyaGusev-IlyaGusevsaiga_llama3_8b_v7",
      • 67.69,
      • "+1.9 / -2.1",
      • 65.56,
      • 69.59,
      • 503.54,
      • 360.83,
      • 52.54
      ],
    • [
      • "IlyaGusev-saiga_llama3_8b_v7_no_system",
      • 66.14,
      • "+2.1 / -2.0",
      • 64.09,
      • 68.26,
      • 492.28,
      • 355.62,
      • 52.32
      ],
    • [
      • "RefalMachine-ruadapt_qwen2.5_3B_ext_u48_instruct_v4",
      • 66.1,
      • "+1.7 / -1.9",
      • 64.15,
      • 67.77,
      • 531.37,
      • 384.03,
      • 52.32
      ],
    • [
      • "-gemini-pro",
      • 65.35,
      • "+2.6 / -1.5",
      • 63.81,
      • 67.92,
      • 396.96,
      • 366.17,
      • 52.21
      ],
    • [
      • "t-lite-instruct-0.1",
      • 64.66,
      • "+1.8 / -2.0",
      • 62.68,
      • 66.45,
      • 810.27,
      • 445.23,
      • 52.11
      ],
    • [
      • "-ruadapt_llama_saiga_kto_ablitirated_ru_arena_hard_rep_pen_1.1",
      • 63.3,
      • "+1.9 / -2.2",
      • 61.12,
      • 65.15,
      • 512.17,
      • 366.2,
      • 51.91
      ],
    • [
      • "DeepSeek - DeepSeek-R1-Qwen-14b-t0.0",
      • 63.25,
      • "+1.9 / -2.1",
      • 61.17,
      • 65.15,
      • 446.69,
      • 569.64,
      • 51.91
      ],
    • [
      • "RefalMachine-ruadapt_qwen2.5_3B_ext_u32_instruct_v3",
      • 62.34,
      • "+2.1 / -2.2",
      • 60.14,
      • 64.42,
      • 517.16,
      • 330.79,
      • 51.78
      ],
    • [
      • "RefalMachine-RuadaptQwen2.5-3B-instruct-v6",
      • 62.21,
      • "+1.9 / -1.6",
      • 60.65,
      • 64.16,
      • 579.01,
      • 420.59,
      • 51.76
      ],
    • [
      • "DeepSeek - DeepSeek-R1-Qwen-14B-t0.6",
      • 61.92,
      • "+1.9 / -2.3",
      • 59.62,
      • 63.83,
      • 414.79,
      • 346.68,
      • 51.71
      ],
    • [
      • "-ruadapt_llama3_instruct_lep_saiga_ablitirated_gm_kto",
      • 60.04,
      • "+1.5 / -1.9",
      • 58.19,
      • 61.56,
      • 493.09,
      • 359.95,
      • 51.44
      ],
    • [
      • "-saiga_llama3_8b_recalc_bench_infer",
      • 59.93,
      • "+1.9 / -1.6",
      • 58.28,
      • 61.78,
      • 519.15,
      • 430.42,
      • 51.43
      ],
    • [
      • "Yandex-yandexgpt-4-pro",
      • 59.23,
      • "+2.1 / -1.9",
      • 57.31,
      • 61.37,
      • 383.8,
      • 306.97,
      • 51.33
      ],
    • [
      • "RefalMachine-ruadapt_qwen_2.5_3B_ext_u32_lep_ft_sft_kto_v2",
      • 58.89,
      • "+1.8 / -1.5",
      • 57.34,
      • 60.67,
      • 541.56,
      • 376.48,
      • 51.28
      ],
    • [
      • "suzume-llama-3-8B-multilingual-orpo-borda-half",
      • 57.13,
      • "+2.1 / -1.8",
      • 55.32,
      • 59.26,
      • 682.81,
      • 378.3,
      • 51.03
      ],
    • [
      • "-ruadapt_qwen_2.5_3B_ext_u32_lep_ft_sft_kto",
      • 56.85,
      • "+1.9 / -2.3",
      • 54.52,
      • 58.77,
      • 536.33,
      • 387.74,
      • 50.99
      ],
    • [
      • "-ruadapt_llama_saiga_kto_ablitirated_ru_arena_hard_external_infer",
      • 56.76,
      • "+1.9 / -2.1",
      • 54.65,
      • 58.64,
      • 526.43,
      • 385.11,
      • 50.97
      ],
    • [
      • "-ruadapt_llama3_extended_gm_ft_v4d1_external_infer",
      • 56.5,
      • "+2.1 / -2.3",
      • 54.19,
      • 58.61,
      • 546.01,
      • 352.63,
      • 50.94
      ],
    • [
      • "-ruadapt_llama3_extended_gm_ft_v5d1_external_infer",
      • 55.7,
      • "+1.5 / -1.8",
      • 53.91,
      • 57.22,
      • 591.59,
      • 383.26,
      • 50.82
      ],
    • [
      • "-ruadapt_llama_instruct_lep_saiga_ablitirated_gm_d1_v6",
      • 55.56,
      • "+2.2 / -1.7",
      • 53.82,
      • 57.76,
      • 579.29,
      • 501.64,
      • 50.8
      ],
    • [
      • "phi-3-medium-4k-instruct",
      • 55.15,
      • "+1.7 / -2.3",
      • 52.81,
      • 56.85,
      • 566.47,
      • 485.71,
      • 50.74
      ],
    • [
      • "llama-3-sonar-large-32k-online",
      • 54.99,
      • "+1.9 / -2.3",
      • 52.65,
      • 56.9,
      • 419.91,
      • 369.08,
      • 50.72
      ],
    • [
      • "Vikhrmodels-QVikhr-2.5-1.5B-Instruct-SMPO",
      • 53.67,
      • "+2.3 / -1.9",
      • 51.78,
      • 55.98,
      • 535.71,
      • 390.6,
      • 50.53
      ],
    • [
      • "Vikhrmodels-VikhrmodelsQVikhr-2.5-1.5B-Instruct-r",
      • 52.82,
      • "+1.7 / -1.8",
      • 50.98,
      • 54.53,
      • 548.43,
      • 423.81,
      • 50.41
      ],
    • [
      • "-ruadapt_llama3_extended_gm_ft_v4d1",
      • 52.51,
      • "+1.8 / -1.8",
      • 50.73,
      • 54.31,
      • 689.29,
      • 732.82,
      • 50.36
      ],
    • [
      • "RefalMachine-RefalMachineruadapt_mistral_7b_openchat_extended_lep_ft_external_infer",
      • 51.87,
      • "+1.8 / -2.0",
      • 49.82,
      • 53.69,
      • 484.7,
      • 372.78,
      • 50.27
      ],
    • [
      • "Vikhrmodels-vikhr-2b-grndm",
      • 50.72,
      • "+1.9 / -1.9",
      • 48.82,
      • 52.63,
      • 691.59,
      • 630.15,
      • 50.1
      ],
    • [
      • "google-gemma-2-2b-it",
      • 50.55,
      • "+2.3 / -2.0",
      • 48.59,
      • 52.85,
      • 483.57,
      • 367.72,
      • 50.08
      ],
    • [
      • "mistral-nemo-instruct-2407",
      • 50.52,
      • "+2.6 / -2.4",
      • 48.09,
      • 53.08,
      • 403.17,
      • 321.53,
      • 50.07
      ],
    • [
      • "MTSAIR-Cotype-Nano-Uncensored ",
      • 50.51,
      • "+1.9 / -1.4",
      • 49.16,
      • 52.43,
      • 567.34,
      • 435.47,
      • 50.07
      ],
    • [
      • "sfr-iterative-dpo-llama-3-8b-r",
      • 50.06,
      • "+2.4 / -1.6",
      • 48.44,
      • 52.48,
      • 516.74,
      • 316.84,
      • 50.01
      ],
    • [
      • "gpt-3.5-turbo-0125",
      • 50,
      • "+0.0 / 0.0",
      • 50,
      • 50,
      • 220.83,
      • 170.3,
      • 50
      ],
    • [
      • "glm-4-9b-chat",
      • 49.75,
      • "+1.6 / -2.1",
      • 47.65,
      • 51.39,
      • 568.81,
      • 448.76,
      • 49.96
      ],
    • [
      • "c4ai-command-r-v01",
      • 48.95,
      • "+3.0 / -2.1",
      • 46.81,
      • 51.98,
      • 529.34,
      • 368.98,
      • 49.85
      ],
    • [
      • "-ruadapt_llama3_extended_gm_ft_v5d1",
      • 48.17,
      • "+2.1 / -1.9",
      • 46.24,
      • 50.23,
      • 759.44,
      • 729.04,
      • 49.74
      ],
    • [
      • "-kolibri-mistral-0427-upd",
      • 47.92,
      • "+2.1 / -2.6",
      • 45.32,
      • 50.06,
      • 551.33,
      • 497.89,
      • 49.7
      ],
    • [
      • "-ruadapt_llama3_8b_instruct_extended_lep_ft-external_infer",
      • 47.81,
      • "+2.2 / -2.1",
      • 45.68,
      • 49.99,
      • 465.39,
      • 429.25,
      • 49.68
      ],
    • [
      • "MTSAIR-Cotype-Nano",
      • 47.74,
      • "+1.9 / -1.6",
      • 46.16,
      • 49.61,
      • 542.49,
      • 409.34,
      • 49.67
      ],
    • [
      • "llama-3-instruct-8b-sppo-iter3",
      • 47.45,
      • "+2.3 / -2.0",
      • 45.44,
      • 49.72,
      • 502.27,
      • 304.27,
      • 49.63
      ],
    • [
      • "-ruadapt_saiga_v7_lep_ft_external_infer",
      • 47.36,
      • "+2.3 / -2.2",
      • 45.12,
      • 49.7,
      • 482.73,
      • 428.93,
      • 49.62
      ],
    • [
      • "Vikhrmodels-Vikhr-Qwen-2.5-1.5b-Instruct",
      • 47.23,
      • "+2.1 / -1.8",
      • 45.38,
      • 49.34,
      • 536.05,
      • 418.23,
      • 49.6
      ],
    • [
      • "MTSAIR-Cotype-Nano-1B",
      • 47.21,
      • "+2.0 / -2.0",
      • 45.18,
      • 49.22,
      • 542.49,
      • 409.34,
      • 49.6
      ],
    • [
      • "-openchat_3.5_0106_external_infer",
      • 47.06,
      • "+2.2 / -1.8",
      • 45.29,
      • 49.23,
      • 430.01,
      • 302.31,
      • 49.58
      ],
    • [
      • "mixtral-8x7b-original",
      • 46.94,
      • "+1.7 / -1.8",
      • 45.14,
      • 48.63,
      • 371.3,
      • 278.71,
      • 49.56
      ],
    • [
      • "-ruadapt_qwen_2.5_3B_ext_u32_lep_ft_sft_v1",
      • 46.47,
      • "+1.9 / -1.6",
      • 44.89,
      • 48.33,
      • 497.51,
      • 471.7,
      • 49.49
      ],
    • [
      • "Vikhrmodels-vikhr-gemma-2b-it",
      • 45.82,
      • "+2.2 / -2.4",
      • 43.42,
      • 48.01,
      • 722.83,
      • 710.71,
      • 49.4
      ],
    • [
      • "suzume-llama-3-8b-multilingual",
      • 45.71,
      • "+2.3 / -2.6",
      • 43.13,
      • 48.01,
      • 641.18,
      • 858.96,
      • 49.38
      ],
    • [
      • "yandex_gpt_pro",
      • 45.11,
      • "+1.8 / -2.2",
      • 42.89,
      • 46.87,
      • 345.3,
      • 277.64,
      • 49.3
      ],
    • [
      • "attn-signs-zariman-reason-7b-v0",
      • 44.84,
      • "+1.9 / -2.5",
      • 42.33,
      • 46.76,
      • 1748.19,
      • 1925.13,
      • 49.26
      ],
    • [
      • "Vikhrmodels-Vikhr-Gemma-2B-instruct-v1.0",
      • 44.1,
      • "+1.9 / -1.9",
      • 42.16,
      • 46.04,
      • 701.48,
      • 681.22,
      • 49.15
      ],
    • [
      • "hermes-2-theta-llama-3-8b",
      • 44.07,
      • "+1.9 / -2.0",
      • 42.05,
      • 45.97,
      • 485.99,
      • 390.85,
      • 49.15
      ],
    • [
      • "gpt-3.5-turbo-1106",
      • 41.47,
      • "+1.8 / -2.1",
      • 39.41,
      • 43.26,
      • 191.19,
      • 177.31,
      • 48.77
      ],
    • [
      • "RefalMachine-RuadaptQwen2.5-1.5B-instruct-v1",
      • 41.03,
      • "+2.0 / -1.9",
      • 39.12,
      • 43.04,
      • 445.03,
      • 391.78,
      • 48.71
      ],
    • [
      • "llama-3-smaug-8b",
      • 40.8,
      • "+2.5 / -2.1",
      • 38.65,
      • 43.25,
      • 524.02,
      • 480.56,
      • 48.68
      ],
    • [
      • "RefalMachine-RuadaptQwen-2.5-1.5B-instruct-v2",
      • 40.31,
      • "+1.6 / -1.9",
      • 38.4,
      • 41.95,
      • 553.42,
      • 464.04,
      • 48.61
      ],
    • [
      • "-ruadapt_llama3_8b_instruct_extended_led_ft",
      • 40.13,
      • "+1.6 / -2.2",
      • 37.9,
      • 41.74,
      • 604.91,
      • 796.85,
      • 48.58
      ],
    • [
      • "llama-3-8b-saiga-suzume-ties",
      • 39.94,
      • "+2.0 / -2.7",
      • 37.27,
      • 41.89,
      • 763.27,
      • 699.39,
      • 48.55
      ],
    • [
      • "starling-lm-7b-beta",
      • 39.76,
      • "+2.0 / -2.2",
      • 37.56,
      • 41.75,
      • 629.68,
      • 465.08,
      • 48.53
      ],
    • [
      • "vikhr-it-5.4-fp16-orpo-v2",
      • 39.33,
      • "+2.0 / -1.9",
      • 37.38,
      • 41.32,
      • 379.23,
      • 558.81,
      • 48.46
      ],
    • [
      • "saiga_llama3_8b_v6",
      • 39.17,
      • "+2.1 / -1.6",
      • 37.57,
      • 41.23,
      • 471.51,
      • 463.62,
      • 48.44
      ],
    • [
      • "llama-3-instruct-8b-simpo",
      • 38.01,
      • "+2.0 / -2.1",
      • 35.9,
      • 40.05,
      • 417.5,
      • 262.37,
      • 48.28
      ],
    • [
      • "MTSAIR-Cotype-Nano-4bit",
      • 37.64,
      • "+1.9 / -1.9",
      • 35.77,
      • 39.58,
      • 582.7,
      • 472.43,
      • 48.22
      ],
    • [
      • "qwen2-7b-instruct",
      • 37.53,
      • "+1.9 / -2.1",
      • 35.46,
      • 39.4,
      • 340.65,
      • 288.17,
      • 48.21
      ],
    • [
      • "paralex-llama-3-8b-sft",
      • 37.36,
      • "+2.5 / -2.2",
      • 35.11,
      • 39.85,
      • 688.57,
      • 632.87,
      • 48.18
      ],
    • [
      • "MTSAIR-Cotype-Nano-1B-v2",
      • 36.77,
      • "+1.8 / -2.1",
      • 34.66,
      • 38.53,
      • 616.55,
      • 532.28,
      • 48.1
      ],
    • [
      • "aya-23-8b",
      • 36.26,
      • "+1.9 / -2.2",
      • 34.06,
      • 38.19,
      • 554.34,
      • 433.51,
      • 48.02
      ],
    • [
      • "meta-llama-3-8b-instruct",
      • 35.06,
      • "+1.7 / -2.0",
      • 33.11,
      • 36.79,
      • 450.85,
      • 317.66,
      • 47.85
      ],
    • [
      • "openchat-3.5-0106",
      • 33.79,
      • "+2.2 / -2.1",
      • 31.69,
      • 36.04,
      • 492.47,
      • 690.73,
      • 47.67
      ],
    • [
      • "mistral-7b-instruct-v0.3",
      • 32.92,
      • "+2.1 / -2.1",
      • 30.83,
      • 34.99,
      • 469.38,
      • 455.43,
      • 47.54
      ],
    • [
      • "vikhr-it-5.2-fp16-cp",
      • 31.73,
      • "+2.0 / -1.9",
      • 29.78,
      • 33.73,
      • 543.44,
      • 441.71,
      • 47.37
      ],
    • [
      • "gigachat_pro",
      • 31.37,
      • "+2.2 / -2.0",
      • 29.41,
      • 33.53,
      • 294.33,
      • 242.61,
      • 47.32
      ],
    • [
      • "hermes-2-pro-llama-3-8b",
      • 30.78,
      • "+2.3 / -2.2",
      • 28.53,
      • 33.09,
      • 463.45,
      • 559.96,
      • 47.24
      ],
    • [
      • "openchat-3.6-8b-20240522",
      • 30.28,
      • "+1.4 / -2.2",
      • 28.09,
      • 31.71,
      • 428.7,
      • 400.82,
      • 47.17
      ],
    • [
      • "vikhr-it-5.3-fp16-32k",
      • 27.81,
      • "+1.7 / -2.3",
      • 25.46,
      • 29.46,
      • 519.71,
      • 516.09,
      • 46.81
      ],
    • [
      • "vikhr-it-5.3-fp16",
      • 22.73,
      • "+2.1 / -1.8",
      • 20.95,
      • 24.83,
      • 523.45,
      • 543.91,
      • 46.08
      ],
    • [
      • "snorkel-mistral-pairrm-dpo",
      • 22.41,
      • "+1.7 / -1.6",
      • 20.78,
      • 24.11,
      • 773.8,
      • 950.3,
      • 46.04
      ],
    • [
      • "kolibri-vikhr-mistral-0427",
      • 22.41,
      • "+1.8 / -1.5",
      • 20.9,
      • 24.22,
      • 489.89,
      • 566.29,
      • 46.04
      ],
    • [
      • "storm-7b",
      • 20.62,
      • "+1.8 / -1.4",
      • 19.22,
      • 22.41,
      • 419.32,
      • 190.85,
      • 45.78
      ],
    • [
      • "Vikhrmodels-Vikhr-Llama-3.2-1B-instruct",
      • 19.04,
      • "+1.6 / -1.3",
      • 17.7,
      • 20.63,
      • 958.63,
      • 1297.33,
      • 45.56
      ],
    • [
      • "neural-chat-7b-v3-3",
      • 19.03,
      • "+1.6 / -1.5",
      • 17.52,
      • 20.61,
      • 927.21,
      • 1211.62,
      • 45.55
      ],
    • [
      • "gigachat_lite",
      • 17.2,
      • "+1.3 / -1.4",
      • 15.81,
      • 18.5,
      • 276.81,
      • 329.66,
      • 45.29
      ],
    • [
      • "Vikhrmodels-Vikhr-Qwen-2.5-0.5b-Instruct",
      • 16.5,
      • "+1.0 / -1.3",
      • 15.15,
      • 17.5,
      • 583.5,
      • 506.76,
      • 45.19
      ],
    • [
      • "Qwen-Qwen2.5-1.5B-Instruct",
      • 16.46,
      • "+1.6 / -1.4",
      • 15.11,
      • 18.03,
      • 483.67,
      • 674.11,
      • 45.19
      ],
    • [
      • "Vikhrmodels-vikhr-qwen-1.5b-it",
      • 13.18,
      • "+1.0 / -1.3",
      • 11.92,
      • 14.18,
      • 2495.38,
      • 741.45,
      • 44.72
      ],
    • [
      • "meta-llama-Llama-3.2-1B-Instruct",
      • 4.04,
      • "+0.6 / -0.7",
      • 3.3,
      • 4.68,
      • 1240.53,
      • 1783.08,
      • 43.42
      ],
    • [
      • "Qwen-Qwen2.5-0.5B-Instruct",
      • 4.02,
      • "+0.7 / -0.6",
      • 3.39,
      • 4.76,
      • 829.87,
      • 931.51,
      • 43.42
      ],
    • [
      • "HuggingFaceTB-SmolLM2-1.7B-instruct",
      • 1.75,
      • "+0.5 / -0.5",
      • 1.28,
      • 2.24,
      • 486.11,
      • 473.23,
      • 43.1
      ],
    • [
      • "HuggingFaceTB-SmolLM2-1.7B-Instruct",
      • 1.72,
      • "+0.6 / -0.4",
      • 1.29,
      • 2.27,
      • 486.11,
      • 473.23,
      • 43.1
      ],
    • [
      • "HuggingFaceTB-SmolLM2-135M-Instruct",
      • 0.64,
      • "+0.3 / -0.2",
      • 0.4,
      • 0.92,
      • 524.63,
      • 589.35,
      • 42.94
      ],
    • [
      • "HuggingFaceTB-SmolLM2-135M-instruct",
      • 0.55,
      • "+0.3 / -0.2",
      • 0.34,
      • 0.83,
      • 524.63,
      • 589.35,
      • 42.93
      ],
    • [
      • "HuggingFaceTB-SmolLM2-360M-Instruct",
      • 0.15,
      • "+0.2 / -0.1",
      • 0.02,
      • 0.31,
      • 312.11,
      • 481.48,
      • 42.87
      ]
    ],
  • "metadata": null
}