Skip to content

Commit 8f1aff2

Browse files
authored
Merge pull request #233 from macrocosm-os/dev
Release 6.0.0
2 parents 707aeb0 + 6ee2120 commit 8f1aff2

File tree

14 files changed

+780
-41
lines changed

14 files changed

+780
-41
lines changed

competitions/data.py

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ class CompetitionId(IntEnum):
1616

1717
B14_MODEL_MULTI_DATASET = 5
1818

19+
TTS_V0 = 6
20+
1921
# Overwrite the default __repr__, which doesn't work with
2022
# bt.logging for some unknown reason.
2123
def __repr__(self) -> str:

constants/__init__.py

+230-6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from taoverse.model.competition.epsilon import LinearDecay
99
from taoverse.model.eval.normalization import NormalizationId
1010
from taoverse.model.eval.task import EvalTask
11+
from taoverse.model.tts.e2tts import E2TTS
12+
1113
from transformers import (
1214
BartForCausalLM,
1315
FalconForCausalLM,
@@ -32,7 +34,7 @@
3234
# ---------------------------------
3335

3436
# Release
35-
__version__ = "5.1.1"
37+
__version__ = "6.0.0"
3638

3739
# Validator schema version
3840
__validator_version__ = "4.6.0"
@@ -52,6 +54,9 @@
5254
# The root directory of this project.
5355
ROOT_DIR = Path(__file__).parent.parent
5456

57+
# block to start the tts competition
58+
BLOCK_TTS = 5_177_981
59+
5560
# Minimum stake to consider a validator when checking for miners with weights.
5661
# This corresponded to top-10 validator on july 31st, 2024
5762
WEIGHT_SYNC_VALI_MIN_STAKE = 200_000
@@ -62,6 +67,7 @@
6267

6368
# Validator eval batch size.
6469
BATCH_SIZE = 1
70+
BATCH_SIZE_TTS = 1
6571
# Validators number of pages to eval over miners on each step.
6672

6773
# This will be used before activation block BLOCK_MULTI_DATASETS
@@ -75,6 +81,7 @@
7581
PAGES_PER_EVAL_PES2OX = 2
7682
PAGES_PER_EVAL_FINEMATH3P = 6
7783
PAGES_PER_EVAL_WEBMATH3P = 6
84+
PAGES_PER_EVAL_PPLSPEECH = 1
7885

7986
# Maximum number of batches to use for evaluation per dataset.
8087
MAX_BATCHES_PER_DATASET = 50
@@ -101,14 +108,17 @@
101108
Gemma2ForCausalLM,
102109
Qwen2ForCausalLM,
103110
}
111+
ALLOWED_MODEL_TYPES_TTS = {
112+
E2TTS
113+
}
104114

105115

106116
# Synchronize on blocks roughly every 30 minutes.
107117
SYNC_BLOCK_CADENCE = 150
108118
# Delay at least as long as the sync block cadence with an additional buffer.
109119
EVAL_BLOCK_DELAY = SYNC_BLOCK_CADENCE + 100
110120

111-
MODEL_CONSTRAINTS_BY_COMPETITION_ID: Dict[CompetitionId, ModelConstraints] = {
121+
MODEL_CONSTRAINTS_BY_COMPETITION_ID_TMP: Dict[CompetitionId, ModelConstraints] = {
112122
CompetitionId.B3_MODEL: ModelConstraints(
113123
max_model_parameter_size=3_400_000_000,
114124
min_model_parameter_size=3_200_000_000,
@@ -120,7 +130,7 @@
120130
"attn_implementation": "flash_attention_2",
121131
},
122132
eval_block_delay=EVAL_BLOCK_DELAY,
123-
epsilon_func=LinearDecay(0.005, 0.0005, 50400),
133+
epsilon_func=LinearDecay(0.005, 0.0005, 7200 * 7),
124134
max_bytes=15 * 1024 * 1024 * 1024,
125135
),
126136
CompetitionId.B14_MODEL: ModelConstraints(
@@ -134,19 +144,60 @@
134144
"attn_implementation": "flash_attention_2",
135145
},
136146
eval_block_delay=EVAL_BLOCK_DELAY,
137-
epsilon_func=LinearDecay(0.005, 0.0005, 72000),
147+
epsilon_func=LinearDecay(0.005, 0.0005, 7200 * 10),
138148
max_bytes=29 * 1024 * 1024 * 1024,
139149
),
140150
}
141151

152+
MODEL_CONSTRAINTS_BY_COMPETITION_ID: Dict[CompetitionId, ModelConstraints] = {
153+
CompetitionId.B3_MODEL: ModelConstraints(
154+
max_model_parameter_size=3_400_000_000,
155+
min_model_parameter_size=3_200_000_000,
156+
sequence_length=4096,
157+
allowed_architectures=ALLOWED_MODEL_TYPES_2,
158+
tokenizer="Xenova/gpt-4",
159+
kwargs={
160+
"torch_dtype": torch.bfloat16,
161+
"attn_implementation": "flash_attention_2",
162+
},
163+
eval_block_delay=EVAL_BLOCK_DELAY,
164+
epsilon_func=LinearDecay(0.005, 0.0001, 7200 * 2),
165+
max_bytes=15 * 1024 * 1024 * 1024,
166+
),
167+
CompetitionId.B14_MODEL: ModelConstraints(
168+
max_model_parameter_size=13_900_000_000,
169+
min_model_parameter_size=13_700_000_000,
170+
sequence_length=4096,
171+
allowed_architectures=ALLOWED_MODEL_TYPES_2,
172+
tokenizer="Xenova/gpt-4",
173+
kwargs={
174+
"torch_dtype": torch.bfloat16,
175+
"attn_implementation": "flash_attention_2",
176+
},
177+
eval_block_delay=EVAL_BLOCK_DELAY,
178+
epsilon_func=LinearDecay(0.005, 0.0001, 7200 * 2),
179+
max_bytes=29 * 1024 * 1024 * 1024,
180+
),
181+
CompetitionId.TTS_V0: ModelConstraints(
182+
max_model_parameter_size=400_000_000,
183+
min_model_parameter_size=350_000_000,
184+
sequence_length=None,
185+
allowed_architectures=ALLOWED_MODEL_TYPES_TTS,
186+
tokenizer="e2tts",
187+
eval_block_delay=EVAL_BLOCK_DELAY,
188+
epsilon_func=LinearDecay(0.005, 0.0005, 7200 * 7),
189+
max_bytes=2 * 1024 * 1024 * 1024,
190+
),
191+
192+
}
142193
# Schedule of competitions by block.
143194
COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
144195
(
145196
0,
146197
[
147198
Competition(
148199
CompetitionId.B3_MODEL,
149-
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B3_MODEL],
200+
MODEL_CONSTRAINTS_BY_COMPETITION_ID_TMP[CompetitionId.B3_MODEL],
150201
0.3,
151202
eval_tasks=[
152203
EvalTask(
@@ -219,7 +270,7 @@
219270
),
220271
Competition(
221272
CompetitionId.B14_MODEL,
222-
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B14_MODEL],
273+
MODEL_CONSTRAINTS_BY_COMPETITION_ID_TMP[CompetitionId.B14_MODEL],
223274
0.7,
224275
eval_tasks=[
225276
EvalTask(
@@ -292,6 +343,179 @@
292343
),
293344
],
294345
),
346+
(
347+
BLOCK_TTS,
348+
[
349+
Competition(
350+
CompetitionId.TTS_V0,
351+
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.TTS_V0],
352+
0.55,
353+
eval_tasks=[
354+
EvalTask(
355+
name="PPL_SPEECH",
356+
method_id=EvalMethodId.WER,
357+
dataset_id=DatasetId.PPLSPEECH,
358+
normalization_id=NormalizationId.NONE,
359+
dataset_kwargs={
360+
"batch_size": BATCH_SIZE_TTS,
361+
"num_pages": PAGES_PER_EVAL_PPLSPEECH,
362+
"target_sr": 24000,
363+
"target_rms": 0.1,
364+
"ref_audio_max_duration": 15,
365+
"hop_length": 256
366+
},
367+
weight=1.0,
368+
),
369+
],
370+
),
371+
Competition(
372+
CompetitionId.B3_MODEL,
373+
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B3_MODEL],
374+
0.15,
375+
eval_tasks=[
376+
EvalTask(
377+
name="FINEWEB",
378+
method_id=EvalMethodId.TEXT_LOSS,
379+
dataset_id=DatasetId.FINEWEB,
380+
normalization_id=NormalizationId.NONE,
381+
dataset_kwargs={
382+
"batch_size": BATCH_SIZE,
383+
"num_pages": PAGES_PER_EVAL_FINEWEB,
384+
},
385+
weight=0.3,
386+
),
387+
EvalTask(
388+
name="FINEWEB_EDU2",
389+
method_id=EvalMethodId.TEXT_LOSS,
390+
dataset_id=DatasetId.FINEWEB2,
391+
normalization_id=NormalizationId.NONE,
392+
dataset_kwargs={
393+
"batch_size": BATCH_SIZE,
394+
"num_pages": PAGES_PER_EVAL_FINEWEB2,
395+
},
396+
weight=0.25,
397+
),
398+
EvalTask(
399+
name="STACKV2_DEDUP",
400+
method_id=EvalMethodId.TEXT_LOSS,
401+
dataset_id=DatasetId.STACK2,
402+
normalization_id=NormalizationId.NONE,
403+
dataset_kwargs={
404+
"batch_size": BATCH_SIZE,
405+
"num_pages": PAGES_PER_EVAL_STACK2,
406+
},
407+
weight=0.35,
408+
),
409+
EvalTask(
410+
name="PES2OX",
411+
method_id=EvalMethodId.TEXT_LOSS,
412+
dataset_id=DatasetId.PES2OX,
413+
normalization_id=NormalizationId.NONE,
414+
dataset_kwargs={
415+
"batch_size": BATCH_SIZE,
416+
"num_pages": PAGES_PER_EVAL_PES2OX,
417+
},
418+
weight=0.05,
419+
),
420+
EvalTask(
421+
name="FINEMATH_3P",
422+
method_id=EvalMethodId.TEXT_LOSS,
423+
dataset_id=DatasetId.FINEMATH3P,
424+
normalization_id=NormalizationId.NONE,
425+
dataset_kwargs={
426+
"batch_size": BATCH_SIZE,
427+
"num_pages": PAGES_PER_EVAL_FINEMATH3P,
428+
},
429+
weight=0.03,
430+
),
431+
EvalTask(
432+
name="INFIWEBMATH_3P",
433+
method_id=EvalMethodId.TEXT_LOSS,
434+
dataset_id=DatasetId.WEBMATH3P,
435+
normalization_id=NormalizationId.NONE,
436+
dataset_kwargs={
437+
"batch_size": BATCH_SIZE,
438+
"num_pages": PAGES_PER_EVAL_WEBMATH3P,
439+
},
440+
weight=0.02,
441+
),
442+
],
443+
),
444+
Competition(
445+
CompetitionId.B14_MODEL,
446+
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B14_MODEL],
447+
0.3,
448+
eval_tasks=[
449+
EvalTask(
450+
name="FINEWEB",
451+
method_id=EvalMethodId.TEXT_LOSS,
452+
dataset_id=DatasetId.FINEWEB,
453+
normalization_id=NormalizationId.NONE,
454+
dataset_kwargs={
455+
"batch_size": BATCH_SIZE,
456+
"num_pages": PAGES_PER_EVAL_FINEWEB,
457+
},
458+
weight=0.3,
459+
),
460+
EvalTask(
461+
name="FINEWEB_EDU2",
462+
method_id=EvalMethodId.TEXT_LOSS,
463+
dataset_id=DatasetId.FINEWEB2,
464+
normalization_id=NormalizationId.NONE,
465+
dataset_kwargs={
466+
"batch_size": BATCH_SIZE,
467+
"num_pages": PAGES_PER_EVAL_FINEWEB2,
468+
},
469+
weight=0.25,
470+
),
471+
EvalTask(
472+
name="STACKV2_DEDUP",
473+
method_id=EvalMethodId.TEXT_LOSS,
474+
dataset_id=DatasetId.STACK2,
475+
normalization_id=NormalizationId.NONE,
476+
dataset_kwargs={
477+
"batch_size": BATCH_SIZE,
478+
"num_pages": PAGES_PER_EVAL_STACK2,
479+
},
480+
weight=0.35,
481+
),
482+
EvalTask(
483+
name="PES2OX",
484+
method_id=EvalMethodId.TEXT_LOSS,
485+
dataset_id=DatasetId.PES2OX,
486+
normalization_id=NormalizationId.NONE,
487+
dataset_kwargs={
488+
"batch_size": BATCH_SIZE,
489+
"num_pages": PAGES_PER_EVAL_PES2OX,
490+
},
491+
weight=0.05,
492+
),
493+
EvalTask(
494+
name="FINEMATH_3P",
495+
method_id=EvalMethodId.TEXT_LOSS,
496+
dataset_id=DatasetId.FINEMATH3P,
497+
normalization_id=NormalizationId.NONE,
498+
dataset_kwargs={
499+
"batch_size": BATCH_SIZE,
500+
"num_pages": PAGES_PER_EVAL_FINEMATH3P,
501+
},
502+
weight=0.03,
503+
),
504+
EvalTask(
505+
name="INFIWEBMATH_3P",
506+
method_id=EvalMethodId.TEXT_LOSS,
507+
dataset_id=DatasetId.WEBMATH3P,
508+
normalization_id=NormalizationId.NONE,
509+
dataset_kwargs={
510+
"batch_size": BATCH_SIZE,
511+
"num_pages": PAGES_PER_EVAL_WEBMATH3P,
512+
},
513+
weight=0.02,
514+
),
515+
],
516+
),
517+
],
518+
),
295519
]
296520

297521
for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:

neurons/validator.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@
7373
import constants
7474
import pretrain as pt
7575
from competitions.data import CompetitionId
76-
from model.retry import should_retry_model
7776
from neurons import config
77+
from pretrain.models.retry import should_retry_model
7878
from pretrain.dataset import SubsetLoader
7979
from pretrain.datasets.factory import DatasetLoaderFactory
8080
from pretrain.eval.sample import EvalSample
@@ -1006,7 +1006,7 @@ async def run_step(self):
10061006
samples,
10071007
self.config.device,
10081008
),
1009-
ttl=430,
1009+
ttl=480, # 8 minutes
10101010
mode="spawn",
10111011
)
10121012
del model_i
@@ -1315,6 +1315,7 @@ def log_step(
13151315
"PES2OX": "laion/Pes2oX-fulltext",
13161316
"FINEMATH_3P": "HuggingFaceTB/finemath:finemath-3p",
13171317
"INFIWEBMATH_3P": "HuggingFaceTB/finemath:infiwebmath-3p",
1318+
"PPL_SPEECH": "MLCommons/peoples_speech"
13181319
}
13191320
dataset_name = (
13201321
task_to_dataset_name[task_name]

0 commit comments

Comments
 (0)