|
24 | 24 | ModelConstraints,
|
25 | 25 | NormValidationConstraints,
|
26 | 26 | )
|
27 |
| -from taoverse.model.competition.epsilon import ( |
28 |
| - FixedEpsilon, |
29 |
| - LinearDecay |
30 |
| -) |
| 27 | +from taoverse.model.competition.epsilon import FixedEpsilon, LinearDecay |
31 | 28 | from competitions.data import CompetitionId
|
32 | 29 |
|
33 | 30 | from typing import Dict, List, Tuple
|
|
37 | 34 | # ---------------------------------
|
38 | 35 |
|
39 | 36 | # Release
|
40 |
| -__version__ = "4.5.0" |
| 37 | +__version__ = "4.5.1" |
41 | 38 |
|
42 | 39 | # Validator schema version
|
43 | 40 | __validator_version__ = "3.2.0"
|
|
98 | 95 | DATASET_BY_COMPETITION_ID: Dict[CompetitionId, str] = {
|
99 | 96 | CompetitionId.M772_MODEL: pt.dataset.SubsetFalconLoader,
|
100 | 97 | CompetitionId.B3_MODEL: pt.dataset.SubsetFalconLoader,
|
101 |
| - CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader, |
| 98 | + CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader, |
102 | 99 | CompetitionId.B14_MODEL: pt.dataset.SubsetFineWebEdu2Loader,
|
103 | 100 | }
|
104 | 101 |
|
|
159 | 156 | }
|
160 | 157 |
|
161 | 158 | # Defined model constraints by competition id with decaying epsilon
|
162 |
| -MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[CompetitionId, ModelConstraints] = { |
| 159 | +MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[ |
| 160 | + CompetitionId, ModelConstraints |
| 161 | +] = { |
163 | 162 | CompetitionId.M772_MODEL: ModelConstraints(
|
164 | 163 | max_model_parameter_size=772_000_000,
|
165 | 164 | min_model_parameter_size=572_000_000,
|
|
215 | 214 | }
|
216 | 215 |
|
217 | 216 | # Defined model constraints by competition id with decaying epsilon
|
218 |
| -MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[CompetitionId, ModelConstraints] = { |
| 217 | +MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[ |
| 218 | + CompetitionId, ModelConstraints |
| 219 | +] = { |
219 | 220 | CompetitionId.M772_MODEL: ModelConstraints(
|
220 | 221 | max_model_parameter_size=772_000_000,
|
221 | 222 | min_model_parameter_size=572_000_000,
|
|
251 | 252 | "attn_implementation": "flash_attention_2",
|
252 | 253 | },
|
253 | 254 | eval_block_delay=0,
|
254 |
| - epsilon_func=LinearDecay(0.005, 0.0001, 100800), |
| 255 | + epsilon_func=LinearDecay(0.005, 0.0001, 50400), |
255 | 256 | max_bytes=29 * 1024 * 1024 * 1024,
|
256 | 257 | ),
|
257 | 258 | }
|
|
309 | 310 | [
|
310 | 311 | Competition(
|
311 | 312 | CompetitionId.M772_MODEL,
|
312 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL], |
| 313 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 314 | + CompetitionId.M772_MODEL |
| 315 | + ], |
313 | 316 | 0.14,
|
314 | 317 | ),
|
315 | 318 | Competition(
|
316 | 319 | CompetitionId.B3_MODEL,
|
317 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL], |
| 320 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 321 | + CompetitionId.B3_MODEL |
| 322 | + ], |
318 | 323 | 0.29,
|
319 | 324 | ),
|
320 | 325 | Competition(
|
321 | 326 | CompetitionId.B7_MODEL,
|
322 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B7_MODEL], |
| 327 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 328 | + CompetitionId.B7_MODEL |
| 329 | + ], |
323 | 330 | 0.15,
|
324 | 331 | ),
|
325 | 332 | Competition(
|
326 | 333 | CompetitionId.B14_MODEL,
|
327 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL], |
| 334 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 335 | + CompetitionId.B14_MODEL |
| 336 | + ], |
328 | 337 | 0.42,
|
329 | 338 | ),
|
330 | 339 | ],
|
|
334 | 343 | [
|
335 | 344 | Competition(
|
336 | 345 | CompetitionId.M772_MODEL,
|
337 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL], |
| 346 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 347 | + CompetitionId.M772_MODEL |
| 348 | + ], |
338 | 349 | 0.14,
|
339 | 350 | ),
|
340 | 351 | Competition(
|
341 | 352 | CompetitionId.B3_MODEL,
|
342 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL], |
| 353 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 354 | + CompetitionId.B3_MODEL |
| 355 | + ], |
343 | 356 | 0.29,
|
344 | 357 | ),
|
345 | 358 | Competition(
|
346 | 359 | CompetitionId.B14_MODEL,
|
347 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL], |
| 360 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ |
| 361 | + CompetitionId.B14_MODEL |
| 362 | + ], |
348 | 363 | 0.57,
|
349 | 364 | ),
|
350 | 365 | ],
|
|
354 | 369 | [
|
355 | 370 | Competition(
|
356 | 371 | CompetitionId.M772_MODEL,
|
357 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.M772_MODEL], |
| 372 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[ |
| 373 | + CompetitionId.M772_MODEL |
| 374 | + ], |
358 | 375 | 0.14,
|
359 | 376 | ),
|
360 | 377 | Competition(
|
361 | 378 | CompetitionId.B3_MODEL,
|
362 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B3_MODEL], |
| 379 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[ |
| 380 | + CompetitionId.B3_MODEL |
| 381 | + ], |
363 | 382 | 0.29,
|
364 | 383 | ),
|
365 | 384 | Competition(
|
366 | 385 | CompetitionId.B14_MODEL,
|
367 |
| - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B14_MODEL], |
| 386 | + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[ |
| 387 | + CompetitionId.B14_MODEL |
| 388 | + ], |
368 | 389 | 0.57,
|
369 | 390 | ),
|
370 | 391 | ],
|
371 | 392 | ),
|
372 |
| - |
373 |
| - |
374 | 393 | ]
|
375 | 394 |
|
376 | 395 | for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:
|
|
400 | 419 |
|
401 | 420 | # validators number of pages to eval over miners on each step.
|
402 | 421 | pages_per_eval_unpack = 5 # With sample unpacking
|
403 |
| -pages_per_eval_pack = 18 |
| 422 | +pages_per_eval_pack = 11 |
404 | 423 |
|
405 | 424 | # validator eval batch size.
|
406 | 425 | batch_size = 1
|
407 | 426 | # validator eval batch min to keep for next loop.
|
408 | 427 | sample_min = 5
|
409 | 428 | # Max number of uids that can be either pending eval or currently being evaluated.
|
410 | 429 | # We allow the sample_min per competition + 10 additional models to be held at any one time.
|
411 |
| -updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10 |
| 430 | +updated_models_limit = ( |
| 431 | + sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10 |
| 432 | +) |
412 | 433 | # time required between updates to the chain.
|
413 | 434 | chain_update_cadence = dt.timedelta(minutes=20)
|
414 | 435 | # Number of blocks required between retrying evaluation of a model.
|
|
0 commit comments