Merge pull request #181 from macrocosm-os/dev

cryptal-mc · web-flow · commit 8468a0495ab3 · 2024-10-10T21:22:18.000+02:00
Release 4.5.1
diff --git a/constants/__init__.py b/constants/__init__.py
@@ -24,10 +24,7 @@
     ModelConstraints,
     NormValidationConstraints,
 )
-from taoverse.model.competition.epsilon import (
-    FixedEpsilon,
-    LinearDecay
-)
+from taoverse.model.competition.epsilon import FixedEpsilon, LinearDecay
 from competitions.data import CompetitionId
 
 from typing import Dict, List, Tuple
@@ -37,7 +34,7 @@
 # ---------------------------------
 
 # Release
-__version__ = "4.5.0"
+__version__ = "4.5.1"
 
 # Validator schema version
 __validator_version__ = "3.2.0"
@@ -98,7 +95,7 @@
 DATASET_BY_COMPETITION_ID: Dict[CompetitionId, str] = {
     CompetitionId.M772_MODEL: pt.dataset.SubsetFalconLoader,
     CompetitionId.B3_MODEL: pt.dataset.SubsetFalconLoader,
-    CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader,    
+    CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader,
     CompetitionId.B14_MODEL: pt.dataset.SubsetFineWebEdu2Loader,
 }
 
@@ -159,7 +156,9 @@
 }
 
 # Defined model constraints by competition id with decaying epsilon
-MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[CompetitionId, ModelConstraints] = {
+MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[
+    CompetitionId, ModelConstraints
+] = {
     CompetitionId.M772_MODEL: ModelConstraints(
         max_model_parameter_size=772_000_000,
         min_model_parameter_size=572_000_000,
@@ -215,7 +214,9 @@
 }
 
 # Defined model constraints by competition id with decaying epsilon
-MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[CompetitionId, ModelConstraints] = {
+MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[
+    CompetitionId, ModelConstraints
+] = {
     CompetitionId.M772_MODEL: ModelConstraints(
         max_model_parameter_size=772_000_000,
         min_model_parameter_size=572_000_000,
@@ -251,7 +252,7 @@
             "attn_implementation": "flash_attention_2",
         },
         eval_block_delay=0,
-        epsilon_func=LinearDecay(0.005, 0.0001, 100800),
+        epsilon_func=LinearDecay(0.005, 0.0001, 50400),
         max_bytes=29 * 1024 * 1024 * 1024,
     ),
 }
@@ -309,22 +310,30 @@
         [
             Competition(
                 CompetitionId.M772_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.M772_MODEL
+                ],
                 0.14,
             ),
             Competition(
                 CompetitionId.B3_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.B3_MODEL
+                ],
                 0.29,
             ),
             Competition(
                 CompetitionId.B7_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B7_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.B7_MODEL
+                ],
                 0.15,
             ),
             Competition(
                 CompetitionId.B14_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.B14_MODEL
+                ],
                 0.42,
             ),
         ],
@@ -334,17 +343,23 @@
         [
             Competition(
                 CompetitionId.M772_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.M772_MODEL
+                ],
                 0.14,
             ),
             Competition(
                 CompetitionId.B3_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.B3_MODEL
+                ],
                 0.29,
             ),
             Competition(
                 CompetitionId.B14_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
+                    CompetitionId.B14_MODEL
+                ],
                 0.57,
             ),
         ],
@@ -354,23 +369,27 @@
         [
             Competition(
                 CompetitionId.M772_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.M772_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[
+                    CompetitionId.M772_MODEL
+                ],
                 0.14,
             ),
             Competition(
                 CompetitionId.B3_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B3_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[
+                    CompetitionId.B3_MODEL
+                ],
                 0.29,
             ),
             Competition(
                 CompetitionId.B14_MODEL,
-                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B14_MODEL],
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[
+                    CompetitionId.B14_MODEL
+                ],
                 0.57,
             ),
         ],
     ),
-    
-
 ]
 
 for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:
@@ -400,15 +419,17 @@
 
 # validators number of pages to eval over miners on each step.
 pages_per_eval_unpack = 5  # With sample unpacking
-pages_per_eval_pack = 18
+pages_per_eval_pack = 11
 
 # validator eval batch size.
 batch_size = 1
 # validator eval batch min to keep for next loop.
 sample_min = 5
 # Max number of uids that can be either pending eval or currently being evaluated.
 # We allow the sample_min per competition + 10 additional models to be held at any one time.
-updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10
+updated_models_limit = (
+    sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10
+)
 # time required between updates to the chain.
 chain_update_cadence = dt.timedelta(minutes=20)
 # Number of blocks required between retrying evaluation of a model.
diff --git a/neurons/miner.py b/neurons/miner.py
@@ -278,7 +278,7 @@ async def main(config: bt.config):
             
     # Init model.
     # Init model.
-    tokenizer = ft.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
+    tokenizer = pt.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
     model = await load_starting_model(config, metagraph, chain_metadata_store, kwargs)
     model = model.train()
     model = model.to(config.device)
@@ -410,11 +410,11 @@ async def main(config: bt.config):
                 )
 
                 # First, reload the best model from the training run.
-                model_to_upload = ft.mining.load_local_model(
+                model_to_upload = pt.mining.load_local_model(
                     model_dir, model_constraints.kwargs
                 )
                 
-                await ft.mining.push(
+                await pt.mining.push(
                     model_to_upload,
                     config.hf_repo_id,
                     wallet,                    
diff --git a/neurons/validator.py b/neurons/validator.py
@@ -648,7 +648,7 @@ def clean_models(self):
 
                 self.local_store.delete_unreferenced_models(
                     valid_models_by_hotkey=evaluated_hotkeys_to_model_id,
-                    grace_period_seconds=300,
+                    grace_period_seconds=600,
                 )
             except Exception as e:
                 bt.logging.error(f"Error in clean loop: {e}")
@@ -891,11 +891,12 @@ async def run_step(self):
                                 tokenizer.eos_token_id,
                                 pack_samples,
                             ),
-                            ttl=400,
+                            ttl=430,
                             mode="spawn",
                         )
 
                     del model_i
+                    
                 except Exception as e:
                     bt.logging.error(
                         f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity."
diff --git a/pretrain/mining.py b/pretrain/mining.py
@@ -46,6 +46,7 @@
 
 from competitions.data import CompetitionId
 
+
 def model_path(base_dir: str, run_id: str) -> str:
     """
     Constructs a file path for storing the model relating to a training run.
@@ -96,7 +97,8 @@ async def push(
 
     bt.logging.debug("Started uploading model to hugging face...")
     model_id = await remote_model_store.upload_model(
-        Model(id=model_id, pt_model=model), model_constraints)
+        Model(id=model_id, pt_model=model), model_constraints
+    )
 
     bt.logging.success("Uploaded model to hugging face.")
 
@@ -190,6 +192,7 @@ def load_local_model(model_dir: str, kwargs: Dict[str, Any]) -> PreTrainedModel:
         **kwargs,
     )
 
+
 async def load_remote_model(
     uid: int,
     download_dir: str,
@@ -245,7 +248,7 @@ async def load_best_model(
     remote_model_store: Optional[RemoteModelStore] = None,
 ) -> PreTrainedModel:
     """Loads the model from the best performing miner to download_dir"""
-    best_uid = ft.graph.best_uid(competition_id=competition_id)
+    best_uid = pt.graph.best_uid(competition_id=competition_id)
     if best_uid is None:
         raise ValueError(f"No best models found for {competition_id}")
 
diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,4 @@ transformers==4.44.1
 wandb
 datasets
 flash-attn
-taoverse==1.0.5
+taoverse==1.0.6