|
8 | 8 | from taoverse.model.competition.epsilon import LinearDecay
|
9 | 9 | from taoverse.model.eval.normalization import NormalizationId
|
10 | 10 | from taoverse.model.eval.task import EvalTask
|
| 11 | +from taoverse.model.tts.e2tts import E2TTS |
| 12 | + |
11 | 13 | from transformers import (
|
12 | 14 | BartForCausalLM,
|
13 | 15 | FalconForCausalLM,
|
|
32 | 34 | # ---------------------------------
|
33 | 35 |
|
34 | 36 | # Release
|
35 |
| -__version__ = "5.1.1" |
| 37 | +__version__ = "6.0.0" |
36 | 38 |
|
37 | 39 | # Validator schema version
|
38 | 40 | __validator_version__ = "4.6.0"
|
|
52 | 54 | # The root directory of this project.
|
53 | 55 | ROOT_DIR = Path(__file__).parent.parent
|
54 | 56 |
|
| 57 | +# block to start the tts competition |
| 58 | +BLOCK_TTS = 5_177_981 |
| 59 | + |
55 | 60 | # Minimum stake to consider a validator when checking for miners with weights.
|
56 | 61 | # This corresponded to top-10 validator on july 31st, 2024
|
57 | 62 | WEIGHT_SYNC_VALI_MIN_STAKE = 200_000
|
|
62 | 67 |
|
63 | 68 | # Validator eval batch size.
|
64 | 69 | BATCH_SIZE = 1
|
| 70 | +BATCH_SIZE_TTS = 1 |
65 | 71 | # Validators number of pages to eval over miners on each step.
|
66 | 72 |
|
67 | 73 | # This will be used before activation block BLOCK_MULTI_DATASETS
|
|
75 | 81 | PAGES_PER_EVAL_PES2OX = 2
|
76 | 82 | PAGES_PER_EVAL_FINEMATH3P = 6
|
77 | 83 | PAGES_PER_EVAL_WEBMATH3P = 6
|
| 84 | +PAGES_PER_EVAL_PPLSPEECH = 1 |
78 | 85 |
|
79 | 86 | # Maximum number of batches to use for evaluation per dataset.
|
80 | 87 | MAX_BATCHES_PER_DATASET = 50
|
|
101 | 108 | Gemma2ForCausalLM,
|
102 | 109 | Qwen2ForCausalLM,
|
103 | 110 | }
|
| 111 | +ALLOWED_MODEL_TYPES_TTS = { |
| 112 | + E2TTS |
| 113 | +} |
104 | 114 |
|
105 | 115 |
|
106 | 116 | # Synchronize on blocks roughly every 30 minutes.
|
107 | 117 | SYNC_BLOCK_CADENCE = 150
|
108 | 118 | # Delay at least as long as the sync block cadence with an additional buffer.
|
109 | 119 | EVAL_BLOCK_DELAY = SYNC_BLOCK_CADENCE + 100
|
110 | 120 |
|
111 |
| -MODEL_CONSTRAINTS_BY_COMPETITION_ID: Dict[CompetitionId, ModelConstraints] = { |
| 121 | +MODEL_CONSTRAINTS_BY_COMPETITION_ID_TMP: Dict[CompetitionId, ModelConstraints] = { |
112 | 122 | CompetitionId.B3_MODEL: ModelConstraints(
|
113 | 123 | max_model_parameter_size=3_400_000_000,
|
114 | 124 | min_model_parameter_size=3_200_000_000,
|
|
120 | 130 | "attn_implementation": "flash_attention_2",
|
121 | 131 | },
|
122 | 132 | eval_block_delay=EVAL_BLOCK_DELAY,
|
123 |
| - epsilon_func=LinearDecay(0.005, 0.0005, 50400), |
| 133 | + epsilon_func=LinearDecay(0.005, 0.0005, 7200 * 7), |
124 | 134 | max_bytes=15 * 1024 * 1024 * 1024,
|
125 | 135 | ),
|
126 | 136 | CompetitionId.B14_MODEL: ModelConstraints(
|
|
134 | 144 | "attn_implementation": "flash_attention_2",
|
135 | 145 | },
|
136 | 146 | eval_block_delay=EVAL_BLOCK_DELAY,
|
137 |
| - epsilon_func=LinearDecay(0.005, 0.0005, 72000), |
| 147 | + epsilon_func=LinearDecay(0.005, 0.0005, 7200 * 10), |
138 | 148 | max_bytes=29 * 1024 * 1024 * 1024,
|
139 | 149 | ),
|
140 | 150 | }
|
141 | 151 |
|
| 152 | +MODEL_CONSTRAINTS_BY_COMPETITION_ID: Dict[CompetitionId, ModelConstraints] = { |
| 153 | + CompetitionId.B3_MODEL: ModelConstraints( |
| 154 | + max_model_parameter_size=3_400_000_000, |
| 155 | + min_model_parameter_size=3_200_000_000, |
| 156 | + sequence_length=4096, |
| 157 | + allowed_architectures=ALLOWED_MODEL_TYPES_2, |
| 158 | + tokenizer="Xenova/gpt-4", |
| 159 | + kwargs={ |
| 160 | + "torch_dtype": torch.bfloat16, |
| 161 | + "attn_implementation": "flash_attention_2", |
| 162 | + }, |
| 163 | + eval_block_delay=EVAL_BLOCK_DELAY, |
| 164 | + epsilon_func=LinearDecay(0.005, 0.0001, 7200 * 2), |
| 165 | + max_bytes=15 * 1024 * 1024 * 1024, |
| 166 | + ), |
| 167 | + CompetitionId.B14_MODEL: ModelConstraints( |
| 168 | + max_model_parameter_size=13_900_000_000, |
| 169 | + min_model_parameter_size=13_700_000_000, |
| 170 | + sequence_length=4096, |
| 171 | + allowed_architectures=ALLOWED_MODEL_TYPES_2, |
| 172 | + tokenizer="Xenova/gpt-4", |
| 173 | + kwargs={ |
| 174 | + "torch_dtype": torch.bfloat16, |
| 175 | + "attn_implementation": "flash_attention_2", |
| 176 | + }, |
| 177 | + eval_block_delay=EVAL_BLOCK_DELAY, |
| 178 | + epsilon_func=LinearDecay(0.005, 0.0001, 7200 * 2), |
| 179 | + max_bytes=29 * 1024 * 1024 * 1024, |
| 180 | + ), |
| 181 | + CompetitionId.TTS_V0: ModelConstraints( |
| 182 | + max_model_parameter_size=400_000_000, |
| 183 | + min_model_parameter_size=350_000_000, |
| 184 | + sequence_length=None, |
| 185 | + allowed_architectures=ALLOWED_MODEL_TYPES_TTS, |
| 186 | + tokenizer="e2tts", |
| 187 | + eval_block_delay=EVAL_BLOCK_DELAY, |
| 188 | + epsilon_func=LinearDecay(0.005, 0.0005, 7200 * 7), |
| 189 | + max_bytes=2 * 1024 * 1024 * 1024, |
| 190 | + ), |
| 191 | + |
| 192 | +} |
142 | 193 | # Schedule of competitions by block.
|
143 | 194 | COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
|
144 | 195 | (
|
145 | 196 | 0,
|
146 | 197 | [
|
147 | 198 | Competition(
|
148 | 199 | CompetitionId.B3_MODEL,
|
149 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B3_MODEL], |
| 200 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_TMP[CompetitionId.B3_MODEL], |
150 | 201 | 0.3,
|
151 | 202 | eval_tasks=[
|
152 | 203 | EvalTask(
|
|
219 | 270 | ),
|
220 | 271 | Competition(
|
221 | 272 | CompetitionId.B14_MODEL,
|
222 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B14_MODEL], |
| 273 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_TMP[CompetitionId.B14_MODEL], |
223 | 274 | 0.7,
|
224 | 275 | eval_tasks=[
|
225 | 276 | EvalTask(
|
|
292 | 343 | ),
|
293 | 344 | ],
|
294 | 345 | ),
|
| 346 | + ( |
| 347 | + BLOCK_TTS, |
| 348 | + [ |
| 349 | + Competition( |
| 350 | + CompetitionId.TTS_V0, |
| 351 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.TTS_V0], |
| 352 | + 0.55, |
| 353 | + eval_tasks=[ |
| 354 | + EvalTask( |
| 355 | + name="PPL_SPEECH", |
| 356 | + method_id=EvalMethodId.WER, |
| 357 | + dataset_id=DatasetId.PPLSPEECH, |
| 358 | + normalization_id=NormalizationId.NONE, |
| 359 | + dataset_kwargs={ |
| 360 | + "batch_size": BATCH_SIZE_TTS, |
| 361 | + "num_pages": PAGES_PER_EVAL_PPLSPEECH, |
| 362 | + "target_sr": 24000, |
| 363 | + "target_rms": 0.1, |
| 364 | + "ref_audio_max_duration": 15, |
| 365 | + "hop_length": 256 |
| 366 | + }, |
| 367 | + weight=1.0, |
| 368 | + ), |
| 369 | + ], |
| 370 | + ), |
| 371 | + Competition( |
| 372 | + CompetitionId.B3_MODEL, |
| 373 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B3_MODEL], |
| 374 | + 0.15, |
| 375 | + eval_tasks=[ |
| 376 | + EvalTask( |
| 377 | + name="FINEWEB", |
| 378 | + method_id=EvalMethodId.TEXT_LOSS, |
| 379 | + dataset_id=DatasetId.FINEWEB, |
| 380 | + normalization_id=NormalizationId.NONE, |
| 381 | + dataset_kwargs={ |
| 382 | + "batch_size": BATCH_SIZE, |
| 383 | + "num_pages": PAGES_PER_EVAL_FINEWEB, |
| 384 | + }, |
| 385 | + weight=0.3, |
| 386 | + ), |
| 387 | + EvalTask( |
| 388 | + name="FINEWEB_EDU2", |
| 389 | + method_id=EvalMethodId.TEXT_LOSS, |
| 390 | + dataset_id=DatasetId.FINEWEB2, |
| 391 | + normalization_id=NormalizationId.NONE, |
| 392 | + dataset_kwargs={ |
| 393 | + "batch_size": BATCH_SIZE, |
| 394 | + "num_pages": PAGES_PER_EVAL_FINEWEB2, |
| 395 | + }, |
| 396 | + weight=0.25, |
| 397 | + ), |
| 398 | + EvalTask( |
| 399 | + name="STACKV2_DEDUP", |
| 400 | + method_id=EvalMethodId.TEXT_LOSS, |
| 401 | + dataset_id=DatasetId.STACK2, |
| 402 | + normalization_id=NormalizationId.NONE, |
| 403 | + dataset_kwargs={ |
| 404 | + "batch_size": BATCH_SIZE, |
| 405 | + "num_pages": PAGES_PER_EVAL_STACK2, |
| 406 | + }, |
| 407 | + weight=0.35, |
| 408 | + ), |
| 409 | + EvalTask( |
| 410 | + name="PES2OX", |
| 411 | + method_id=EvalMethodId.TEXT_LOSS, |
| 412 | + dataset_id=DatasetId.PES2OX, |
| 413 | + normalization_id=NormalizationId.NONE, |
| 414 | + dataset_kwargs={ |
| 415 | + "batch_size": BATCH_SIZE, |
| 416 | + "num_pages": PAGES_PER_EVAL_PES2OX, |
| 417 | + }, |
| 418 | + weight=0.05, |
| 419 | + ), |
| 420 | + EvalTask( |
| 421 | + name="FINEMATH_3P", |
| 422 | + method_id=EvalMethodId.TEXT_LOSS, |
| 423 | + dataset_id=DatasetId.FINEMATH3P, |
| 424 | + normalization_id=NormalizationId.NONE, |
| 425 | + dataset_kwargs={ |
| 426 | + "batch_size": BATCH_SIZE, |
| 427 | + "num_pages": PAGES_PER_EVAL_FINEMATH3P, |
| 428 | + }, |
| 429 | + weight=0.03, |
| 430 | + ), |
| 431 | + EvalTask( |
| 432 | + name="INFIWEBMATH_3P", |
| 433 | + method_id=EvalMethodId.TEXT_LOSS, |
| 434 | + dataset_id=DatasetId.WEBMATH3P, |
| 435 | + normalization_id=NormalizationId.NONE, |
| 436 | + dataset_kwargs={ |
| 437 | + "batch_size": BATCH_SIZE, |
| 438 | + "num_pages": PAGES_PER_EVAL_WEBMATH3P, |
| 439 | + }, |
| 440 | + weight=0.02, |
| 441 | + ), |
| 442 | + ], |
| 443 | + ), |
| 444 | + Competition( |
| 445 | + CompetitionId.B14_MODEL, |
| 446 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B14_MODEL], |
| 447 | + 0.3, |
| 448 | + eval_tasks=[ |
| 449 | + EvalTask( |
| 450 | + name="FINEWEB", |
| 451 | + method_id=EvalMethodId.TEXT_LOSS, |
| 452 | + dataset_id=DatasetId.FINEWEB, |
| 453 | + normalization_id=NormalizationId.NONE, |
| 454 | + dataset_kwargs={ |
| 455 | + "batch_size": BATCH_SIZE, |
| 456 | + "num_pages": PAGES_PER_EVAL_FINEWEB, |
| 457 | + }, |
| 458 | + weight=0.3, |
| 459 | + ), |
| 460 | + EvalTask( |
| 461 | + name="FINEWEB_EDU2", |
| 462 | + method_id=EvalMethodId.TEXT_LOSS, |
| 463 | + dataset_id=DatasetId.FINEWEB2, |
| 464 | + normalization_id=NormalizationId.NONE, |
| 465 | + dataset_kwargs={ |
| 466 | + "batch_size": BATCH_SIZE, |
| 467 | + "num_pages": PAGES_PER_EVAL_FINEWEB2, |
| 468 | + }, |
| 469 | + weight=0.25, |
| 470 | + ), |
| 471 | + EvalTask( |
| 472 | + name="STACKV2_DEDUP", |
| 473 | + method_id=EvalMethodId.TEXT_LOSS, |
| 474 | + dataset_id=DatasetId.STACK2, |
| 475 | + normalization_id=NormalizationId.NONE, |
| 476 | + dataset_kwargs={ |
| 477 | + "batch_size": BATCH_SIZE, |
| 478 | + "num_pages": PAGES_PER_EVAL_STACK2, |
| 479 | + }, |
| 480 | + weight=0.35, |
| 481 | + ), |
| 482 | + EvalTask( |
| 483 | + name="PES2OX", |
| 484 | + method_id=EvalMethodId.TEXT_LOSS, |
| 485 | + dataset_id=DatasetId.PES2OX, |
| 486 | + normalization_id=NormalizationId.NONE, |
| 487 | + dataset_kwargs={ |
| 488 | + "batch_size": BATCH_SIZE, |
| 489 | + "num_pages": PAGES_PER_EVAL_PES2OX, |
| 490 | + }, |
| 491 | + weight=0.05, |
| 492 | + ), |
| 493 | + EvalTask( |
| 494 | + name="FINEMATH_3P", |
| 495 | + method_id=EvalMethodId.TEXT_LOSS, |
| 496 | + dataset_id=DatasetId.FINEMATH3P, |
| 497 | + normalization_id=NormalizationId.NONE, |
| 498 | + dataset_kwargs={ |
| 499 | + "batch_size": BATCH_SIZE, |
| 500 | + "num_pages": PAGES_PER_EVAL_FINEMATH3P, |
| 501 | + }, |
| 502 | + weight=0.03, |
| 503 | + ), |
| 504 | + EvalTask( |
| 505 | + name="INFIWEBMATH_3P", |
| 506 | + method_id=EvalMethodId.TEXT_LOSS, |
| 507 | + dataset_id=DatasetId.WEBMATH3P, |
| 508 | + normalization_id=NormalizationId.NONE, |
| 509 | + dataset_kwargs={ |
| 510 | + "batch_size": BATCH_SIZE, |
| 511 | + "num_pages": PAGES_PER_EVAL_WEBMATH3P, |
| 512 | + }, |
| 513 | + weight=0.02, |
| 514 | + ), |
| 515 | + ], |
| 516 | + ), |
| 517 | + ], |
| 518 | + ), |
295 | 519 | ]
|
296 | 520 |
|
297 | 521 | for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:
|
|
0 commit comments