diff --git a/.gitignore b/.gitignore index c8d67582a..9f603bf02 100644 --- a/.gitignore +++ b/.gitignore @@ -50,4 +50,5 @@ dataset/** !dataset/**/.gitkeep models data -config.toml \ No newline at end of file +config.toml +sd-scripts \ No newline at end of file diff --git a/.release b/.release index ea068bb28..f242ab11b 100644 --- a/.release +++ b/.release @@ -1 +1 @@ -v23.0.14 \ No newline at end of file +v23.0.15 \ No newline at end of file diff --git a/README.md b/README.md index 26d742bb1..e818fe406 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,9 @@ The GUI allows you to set the training parameters and generate and run the requi - [No module called tkinter](#no-module-called-tkinter) - [SDXL training](#sdxl-training) - [Change History](#change-history) - - [2024/03/13 (v23.0.14)](#20240313-v23014) - - [2024/03/13 (v23.0.13)](#20240313-v23013) + - [2024/03/20 (v23.0.15)](#20240320-v23015) + - [2024/03/19 (v23.0.14)](#20240319-v23014) + - [2024/03/19 (v23.0.13)](#20240319-v23013) - [2024/03/16 (v23.0.12)](#20240316-v23012) - [New Features \& Improvements](#new-features--improvements) - [Software Updates](#software-updates) @@ -381,11 +382,19 @@ The documentation in this section will be moved to a separate document later. ## Change History -### 2024/03/13 (v23.0.14) +### 2024/03/21 (v23.0.15) + +- Add support for toml dataset configuration fole to all trainers +- Add new setup menu option to install Triton 2.1.0 for Windows +- Add support for LyCORIS BOFT and DoRA and QLyCORIS options for LoHA, LoKr and LoCon +- Fix issue with vae path validation +- Other fixes + +### 2024/03/19 (v23.0.14) - Fix blip caption issue -- -### 2024/03/13 (v23.0.13) + +### 2024/03/19 (v23.0.13) - Fix issue with image samples. diff --git a/examples/stable_cascade/test.toml b/examples/stable_cascade/test.toml new file mode 100644 index 000000000..e69de29bb diff --git a/kohya_gui/class_advanced_training.py b/kohya_gui/class_advanced_training.py index ba3d98640..8b448b862 100644 --- a/kohya_gui/class_advanced_training.py +++ b/kohya_gui/class_advanced_training.py @@ -7,6 +7,7 @@ list_files, list_dirs, create_refresh_button, + document_symbol ) @@ -38,8 +39,7 @@ def __init__( headless (bool): Run in headless mode without GUI. finetuning (bool): Enable model fine-tuning. training_type (str): The type of training to be performed. - default_vae_dir (str): Default directory for VAE models. - default_output_dir (str): Default directory for output files. + config (dict): Configuration options for the training process. """ self.headless = headless self.finetuning = finetuning @@ -368,10 +368,6 @@ def list_state_dirs(path): outputs=self.resume, show_progress=False, ) - # self.max_train_epochs = gr.Textbox( - # label='Max train epoch', - # placeholder='(Optional) Override number of epoch', - # ) self.max_data_loader_n_workers = gr.Textbox( label="Max num workers for DataLoader", placeholder="(Optional) Override number of epoch. Default: 8", @@ -437,7 +433,7 @@ def list_log_tracker_config_files(path): "open_folder_small", ) self.log_tracker_config_button = gr.Button( - "📂", elem_id="open_folder_small", visible=(not headless) + document_symbol, elem_id="open_folder_small", visible=(not headless) ) self.log_tracker_config_button.click( get_any_file_path, diff --git a/kohya_gui/class_folders.py b/kohya_gui/class_folders.py index 6d206abd0..845c165bf 100644 --- a/kohya_gui/class_folders.py +++ b/kohya_gui/class_folders.py @@ -1,6 +1,6 @@ import gradio as gr import os -from .common_gui import get_folder_path, scriptdir, list_dirs, create_refresh_button +from .common_gui import get_folder_path, scriptdir, list_dirs, list_files, create_refresh_button class Folders: """ diff --git a/kohya_gui/class_source_model.py b/kohya_gui/class_source_model.py index 218b7a680..0c92905b9 100644 --- a/kohya_gui/class_source_model.py +++ b/kohya_gui/class_source_model.py @@ -2,7 +2,7 @@ import os from .common_gui import ( - get_any_file_path, + get_file_path, get_folder_path, set_pretrained_model_name_or_path_input, scriptdir, @@ -61,6 +61,8 @@ def __init__( self.current_train_data_dir = self.config.get( "train_data_dir", os.path.join(scriptdir, "data") ) + self.current_dataset_config_dir = self.config.get('dataset_config_dir', os.path.join(scriptdir, "dataset_config")) + model_checkpoints = list( list_files( @@ -79,6 +81,21 @@ def list_models(path): def list_train_data_dirs(path): self.current_train_data_dir = path if not path == "" else "." return list(list_dirs(path)) + + def list_dataset_config_dirs(path: str) -> list: + """ + List directories and toml files in the dataset_config directory. + + Parameters: + - path (str): The path to list directories and files from. + + Returns: + - list: A list of directories and files. + """ + current_dataset_config_dir = path if not path == "" else "." + # Lists all .json files in the current configuration directory, used for populating dropdown choices. + return list(list_files(current_dataset_config_dir, exts=[".toml"], all=True)) + with gr.Column(), gr.Group(): # Define the input elements @@ -107,7 +124,7 @@ def list_train_data_dirs(path): visible=(not headless), ) self.pretrained_model_name_or_path_file.click( - get_any_file_path, + get_file_path, inputs=self.pretrained_model_name_or_path, outputs=self.pretrained_model_name_or_path, show_progress=False, @@ -124,7 +141,15 @@ def list_train_data_dirs(path): outputs=self.pretrained_model_name_or_path, show_progress=False, ) - + + with gr.Column(), gr.Row(): + self.output_name = gr.Textbox( + label="Trained Model output name", + placeholder="(Name of the model to output)", + value="last", + interactive=True, + ) + with gr.Row(): with gr.Column(), gr.Row(): self.train_data_dir = gr.Dropdown( label=( @@ -158,6 +183,36 @@ def list_train_data_dirs(path): outputs=self.train_data_dir, show_progress=False, ) + with gr.Column(), gr.Row(): + # Toml directory dropdown + self.dataset_config = gr.Dropdown( + label='Dataset config file (Optional. Select the toml configuration file to use for the dataset)', + choices=[""] + list_dataset_config_dirs(self.current_dataset_config_dir), + value="", + interactive=True, + allow_custom_value=True, + ) + # Refresh button for dataset_config directory + create_refresh_button(self.dataset_config, lambda: None, lambda: {"choices": [""] + list_dataset_config_dirs(self.current_dataset_config_dir)}, "open_folder_small") + # Toml directory button + self.dataset_config_folder = gr.Button( + document_symbol, elem_id='open_folder_small', elem_classes=["tool"], visible=(not self.headless) + ) + + # Toml directory button click event + self.dataset_config_folder.click( + get_file_path, + inputs=[self.dataset_config, gr.Textbox(value='*.toml', visible=False), gr.Textbox(value='Dataset config types', visible=False)], + outputs=self.dataset_config, + show_progress=False, + ) + # Change event for dataset_config directory dropdown + self.dataset_config.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_dataset_config_dirs(path)), + inputs=self.dataset_config, + outputs=self.dataset_config, + show_progress=False, + ) with gr.Row(): with gr.Column(): @@ -181,12 +236,6 @@ def list_train_data_dirs(path): gr.Box(visible=False) with gr.Row(): - self.output_name = gr.Textbox( - label="Trained Model output name", - placeholder="(Name of the model to output)", - value="last", - interactive=True, - ) self.training_comment = gr.Textbox( label="Training comment", placeholder="(Optional) Add training comment to be included in metadata", diff --git a/kohya_gui/common_gui.py b/kohya_gui/common_gui.py index bf0f74275..b2dee8c9b 100644 --- a/kohya_gui/common_gui.py +++ b/kohya_gui/common_gui.py @@ -54,6 +54,23 @@ ENV_EXCLUSION = ["COLAB_GPU", "RUNPOD_POD_ID"] +def calculate_max_train_steps( + total_steps: int, + train_batch_size: int, + gradient_accumulation_steps: int, + epoch: int, + reg_factor: int, +): + return int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + def check_if_model_exist( output_name: str, output_dir: str, save_model_as: str, headless: bool = False ) -> bool: @@ -1077,6 +1094,11 @@ def run_cmd_advanced_training(**kwargs): if color_aug: run_cmd += " --color_aug" + dataset_config = kwargs.get("dataset_config") + if dataset_config: + dataset_config = os.path.abspath(os.path.normpath(dataset_config)) + run_cmd += f' --dataset_config="{dataset_config}"' + dataset_repeats = kwargs.get("dataset_repeats") if dataset_repeats: run_cmd += f' --dataset_repeats="{dataset_repeats}"' @@ -1753,6 +1775,13 @@ def validate_path( if key in ["output_dir", "logging_dir"]: if not validate_path(value, key, create_if_missing=True): return False + elif key in ["vae"]: + # Check if it matches the Hugging Face model pattern + if re.match(r"^[\w-]+\/[\w-]+$", value): + log.info("Checking vae... huggingface.co model, skipping validation") + else: + if not validate_path(value, key): + return False else: if key not in ["pretrained_model_name_or_path"]: if not validate_path(value, key): diff --git a/kohya_gui/dreambooth_gui.py b/kohya_gui/dreambooth_gui.py index a40e829f8..3ac9f8a87 100644 --- a/kohya_gui/dreambooth_gui.py +++ b/kohya_gui/dreambooth_gui.py @@ -58,6 +58,7 @@ def save_configuration( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, learning_rate_te, @@ -189,6 +190,7 @@ def open_configuration( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, learning_rate_te, @@ -315,6 +317,7 @@ def train_model( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, learning_rate_te, @@ -421,6 +424,7 @@ def train_model( log_tracker_config=log_tracker_config, resume=resume, vae=vae, + dataset_config=dataset_config, ): return @@ -429,102 +433,90 @@ def train_model( ): return - # if sdxl: - # output_message( - # msg='Dreambooth training is not compatible with SDXL models yet..', - # headless=headless_bool, - # ) - # return - - # if optimizer == 'Adafactor' and lr_warmup != '0': - # output_message( - # msg="Warning: lr_scheduler is set to 'Adafactor', so 'LR warmup (% of steps)' will be considered 0.", - # title='Warning', - # headless=headless_bool, - # ) - # lr_warmup = '0' - - # Get a list of all subfolders in train_data_dir, excluding hidden folders - subfolders = [ - f - for f in os.listdir(train_data_dir) - if os.path.isdir(os.path.join(train_data_dir, f)) and not f.startswith(".") - ] - - # Check if subfolders are present. If not let the user know and return - if not subfolders: - log.info(f"No {subfolders} were found in train_data_dir can't train...") - return - - total_steps = 0 + if dataset_config: + log.info("Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations...") + else: + # Get a list of all subfolders in train_data_dir, excluding hidden folders + subfolders = [ + f + for f in os.listdir(train_data_dir) + if os.path.isdir(os.path.join(train_data_dir, f)) and not f.startswith(".") + ] - # Loop through each subfolder and extract the number of repeats - for folder in subfolders: - # Extract the number of repeats from the folder name - try: - repeats = int(folder.split("_")[0]) - except ValueError: - log.info( - f"Subfolder {folder} does not have a proper repeat value, please correct the name or remove it... can't train..." - ) - continue - - # Count the number of images in the folder - num_images = len( - [ - f - for f, lower_f in ( - (file, file.lower()) - for file in os.listdir(os.path.join(train_data_dir, folder)) + # Check if subfolders are present. If not let the user know and return + if not subfolders: + log.info(f"No {subfolders} were found in train_data_dir can't train...") + return + + total_steps = 0 + + # Loop through each subfolder and extract the number of repeats + for folder in subfolders: + # Extract the number of repeats from the folder name + try: + repeats = int(folder.split("_")[0]) + except ValueError: + log.info( + f"Subfolder {folder} does not have a proper repeat value, please correct the name or remove it... can't train..." ) - if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) - ] - ) + continue + + # Count the number of images in the folder + num_images = len( + [ + f + for f, lower_f in ( + (file, file.lower()) + for file in os.listdir(os.path.join(train_data_dir, folder)) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) - if num_images == 0: - log.info(f"{folder} folder contain no images, skipping...") - else: - # Calculate the total number of steps for this folder - steps = repeats * num_images - total_steps += steps + if num_images == 0: + log.info(f"{folder} folder contain no images, skipping...") + else: + # Calculate the total number of steps for this folder + steps = repeats * num_images + total_steps += steps - # Print the result - log.info(f"Folder {folder} : steps {steps}") + # Print the result + log.info(f"Folder {folder} : steps {steps}") - if total_steps == 0: - log.info(f"No images were found in folder {train_data_dir}... please rectify!") - return + if total_steps == 0: + log.info(f"No images were found in folder {train_data_dir}... please rectify!") + return - # Print the result - # log.info(f"{total_steps} total steps") + # Print the result + # log.info(f"{total_steps} total steps") - if reg_data_dir == "": - reg_factor = 1 - else: - log.info( - f"Regularisation images are used... Will double the number of steps required..." - ) - reg_factor = 2 - - if max_train_steps == "" or max_train_steps == "0": - # calculate max_train_steps - max_train_steps = int( - math.ceil( - float(total_steps) - / int(train_batch_size) - / int(gradient_accumulation_steps) - * int(epoch) - * int(reg_factor) + if reg_data_dir == "": + reg_factor = 1 + else: + log.info( + f"Regularisation images are used... Will double the number of steps required..." + ) + reg_factor = 2 + + if max_train_steps == "" or max_train_steps == "0": + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + log.info( + f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" ) - ) - log.info( - f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" - ) # calculate stop encoder training if int(stop_text_encoder_training_pct) == -1: stop_text_encoder_training = -1 - elif stop_text_encoder_training_pct == None: + elif stop_text_encoder_training_pct == None or (not max_train_steps == "" or not max_train_steps == "0"): stop_text_encoder_training = 0 else: stop_text_encoder_training = math.ceil( @@ -532,7 +524,10 @@ def train_model( ) log.info(f"stop_text_encoder_training = {stop_text_encoder_training}") - lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + if not max_train_steps == "": + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 log.info(f"lr_warmup_steps = {lr_warmup_steps}") # run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db.py"' @@ -564,6 +559,7 @@ def train_model( "caption_extension": caption_extension, "clip_skip": clip_skip, "color_aug": color_aug, + "dataset_config": dataset_config, "enable_bucket": enable_bucket, "epoch": epoch, "flip_aug": flip_aug, @@ -788,6 +784,7 @@ def dreambooth_tab( source_model.train_data_dir, folders.reg_data_dir, folders.output_dir, + source_model.dataset_config, basic_training.max_resolution, basic_training.learning_rate, basic_training.learning_rate_te, diff --git a/kohya_gui/finetune_gui.py b/kohya_gui/finetune_gui.py index 4ca796faa..a7dc033a4 100644 --- a/kohya_gui/finetune_gui.py +++ b/kohya_gui/finetune_gui.py @@ -62,6 +62,7 @@ def save_configuration( train_dir, image_folder, output_dir, + dataset_config, logging_dir, max_resolution, min_bucket_reso, @@ -108,6 +109,7 @@ def save_configuration( output_name, max_token_length, max_train_epochs, + max_train_steps, max_data_loader_n_workers, full_fp16, color_aug, @@ -200,6 +202,7 @@ def open_configuration( train_dir, image_folder, output_dir, + dataset_config, logging_dir, max_resolution, min_bucket_reso, @@ -246,6 +249,7 @@ def open_configuration( output_name, max_token_length, max_train_epochs, + max_train_steps, max_data_loader_n_workers, full_fp16, color_aug, @@ -345,6 +349,7 @@ def train_model( train_dir, image_folder, output_dir, + dataset_config, logging_dir, max_resolution, min_bucket_reso, @@ -391,6 +396,7 @@ def train_model( output_name, max_token_length, max_train_epochs, + max_train_steps, max_data_loader_n_workers, full_fp16, color_aug, @@ -455,93 +461,99 @@ def train_model( logging_dir=logging_dir, log_tracker_config=log_tracker_config, resume=resume, + dataset_config=dataset_config ): return if not print_only_bool and check_if_model_exist(output_name, output_dir, save_model_as, headless_bool): return - # create caption json file - if generate_caption_database: - run_cmd = fr'"{PYTHON}" "{scriptdir}/sd-scripts/finetune/merge_captions_to_metadata.py"' - if caption_extension == "": - run_cmd += f' --caption_extension=".caption"' - else: - run_cmd += f" --caption_extension={caption_extension}" - run_cmd += fr' "{image_folder}"' - run_cmd += fr' "{train_dir}/{caption_metadata_filename}"' - if full_path: - run_cmd += f" --full_path" - - log.info(run_cmd) - - env = os.environ.copy() - env['PYTHONPATH'] = fr"{scriptdir}{os.pathsep}{scriptdir}/sd-scripts{os.pathsep}{env.get('PYTHONPATH', '')}" - - if not print_only_bool: - # Run the command - subprocess.run(run_cmd, shell=True, env=env) - - # create images buckets - if generate_image_buckets: - run_cmd = fr'"{PYTHON}" "{scriptdir}/sd-scripts/finetune/prepare_buckets_latents.py"' - run_cmd += fr' "{image_folder}"' - run_cmd += fr' "{train_dir}/{caption_metadata_filename}"' - run_cmd += fr' "{train_dir}/{latent_metadata_filename}"' - run_cmd += fr' "{pretrained_model_name_or_path}"' - run_cmd += f" --batch_size={batch_size}" - run_cmd += f" --max_resolution={max_resolution}" - run_cmd += f" --min_bucket_reso={min_bucket_reso}" - run_cmd += f" --max_bucket_reso={max_bucket_reso}" - run_cmd += f" --mixed_precision={mixed_precision}" - # if flip_aug: - # run_cmd += f' --flip_aug' - if full_path: - run_cmd += f" --full_path" - if sdxl_checkbox and sdxl_no_half_vae: - log.info("Using mixed_precision = no because no half vae is selected...") - run_cmd += f' --mixed_precision="no"' - - log.info(run_cmd) - - env = os.environ.copy() - env['PYTHONPATH'] = fr"{scriptdir}{os.pathsep}{scriptdir}/sd-scripts{os.pathsep}{env.get('PYTHONPATH', '')}" - - if not print_only_bool: - # Run the command - subprocess.run(run_cmd, shell=True, env=env) - - image_num = len( - [ - f - for f, lower_f in ( - (file, file.lower()) for file in os.listdir(image_folder) + if dataset_config: + log.info("Dataset config toml file used, skipping caption json file, image buckets, total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps creation...") + else: + # create caption json file + if generate_caption_database: + run_cmd = fr'"{PYTHON}" "{scriptdir}/sd-scripts/finetune/merge_captions_to_metadata.py"' + if caption_extension == "": + run_cmd += f' --caption_extension=".caption"' + else: + run_cmd += f" --caption_extension={caption_extension}" + run_cmd += fr' "{image_folder}"' + run_cmd += fr' "{train_dir}/{caption_metadata_filename}"' + if full_path: + run_cmd += f" --full_path" + + log.info(run_cmd) + + env = os.environ.copy() + env['PYTHONPATH'] = fr"{scriptdir}{os.pathsep}{scriptdir}/sd-scripts{os.pathsep}{env.get('PYTHONPATH', '')}" + + if not print_only_bool: + # Run the command + subprocess.run(run_cmd, shell=True, env=env) + + # create images buckets + if generate_image_buckets: + run_cmd = fr'"{PYTHON}" "{scriptdir}/sd-scripts/finetune/prepare_buckets_latents.py"' + run_cmd += fr' "{image_folder}"' + run_cmd += fr' "{train_dir}/{caption_metadata_filename}"' + run_cmd += fr' "{train_dir}/{latent_metadata_filename}"' + run_cmd += fr' "{pretrained_model_name_or_path}"' + run_cmd += f" --batch_size={batch_size}" + run_cmd += f" --max_resolution={max_resolution}" + run_cmd += f" --min_bucket_reso={min_bucket_reso}" + run_cmd += f" --max_bucket_reso={max_bucket_reso}" + run_cmd += f" --mixed_precision={mixed_precision}" + # if flip_aug: + # run_cmd += f' --flip_aug' + if full_path: + run_cmd += f" --full_path" + if sdxl_checkbox and sdxl_no_half_vae: + log.info("Using mixed_precision = no because no half vae is selected...") + run_cmd += f' --mixed_precision="no"' + + log.info(run_cmd) + + env = os.environ.copy() + env['PYTHONPATH'] = fr"{scriptdir}{os.pathsep}{scriptdir}/sd-scripts{os.pathsep}{env.get('PYTHONPATH', '')}" + + if not print_only_bool: + # Run the command + subprocess.run(run_cmd, shell=True, env=env) + + image_num = len( + [ + f + for f, lower_f in ( + (file, file.lower()) for file in os.listdir(image_folder) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) + log.info(f"image_num = {image_num}") + + repeats = int(image_num) * int(dataset_repeats) + log.info(f"repeats = {str(repeats)}") + + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(repeats) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) ) - if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) - ] - ) - log.info(f"image_num = {image_num}") - - repeats = int(image_num) * int(dataset_repeats) - log.info(f"repeats = {str(repeats)}") - - # calculate max_train_steps - max_train_steps = int( - math.ceil( - float(repeats) - / int(train_batch_size) - / int(gradient_accumulation_steps) - * int(epoch) ) - ) - # Divide by two because flip augmentation create two copied of the source images - if flip_aug: - max_train_steps = int(math.ceil(float(max_train_steps) / 2)) + # Divide by two because flip augmentation create two copied of the source images + if flip_aug and max_train_steps: + max_train_steps = int(math.ceil(float(max_train_steps) / 2)) - log.info(f"max_train_steps = {max_train_steps}") - - lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + if max_train_steps != "": + log.info(f"max_train_steps = {max_train_steps}") + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 log.info(f"lr_warmup_steps = {lr_warmup_steps}") run_cmd = "accelerate launch" @@ -581,6 +593,7 @@ def train_model( "caption_extension": caption_extension, "clip_skip": clip_skip, "color_aug": color_aug, + "dataset_config": dataset_config, "dataset_repeats": dataset_repeats, "enable_bucket": True, "flip_aug": flip_aug, @@ -861,6 +874,7 @@ def list_presets(path): train_dir, image_folder, output_dir, + source_model.dataset_config, logging_dir, max_resolution, min_bucket_reso, @@ -906,6 +920,7 @@ def list_presets(path): output_name, advanced_training.max_token_length, basic_training.max_train_epochs, + basic_training.max_train_steps, advanced_training.max_data_loader_n_workers, advanced_training.full_fp16, advanced_training.color_aug, diff --git a/kohya_gui/lora_gui.py b/kohya_gui/lora_gui.py index 82ea563ba..69ca484d0 100644 --- a/kohya_gui/lora_gui.py +++ b/kohya_gui/lora_gui.py @@ -55,6 +55,32 @@ presets_dir = rf"{scriptdir}/presets" +def update_network_args_with_kohya_lora_vars( + network_args: str, kohya_lora_var_list: list, vars: dict +) -> str: + """ + Update network arguments with Kohya LoRA variables. + + Args: + network_args (str): The network arguments. + kohya_lora_var_list (list): The list of Kohya LoRA variables. + vars (dict): The dictionary of variables. + + Returns: + str: The updated network arguments. + """ + # Filter out variables that are in the Kohya LoRA variable list and have a value + kohya_lora_vars = { + key: value for key, value in vars if key in kohya_lora_var_list and value + } + + # Iterate over the Kohya LoRA variables and append them to the network arguments + for key, value in kohya_lora_vars.items(): + # Append each variable as a key-value pair to the network_args + network_args += f' {key}="{value}"' + return network_args + + def save_configuration( save_as, file_path, @@ -66,6 +92,7 @@ def save_configuration( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, lr_scheduler, @@ -138,6 +165,8 @@ def save_configuration( multires_noise_discount, LoRA_type, factor, + bypass_mode, + dora_wd, use_cp, use_tucker, use_scalar, @@ -193,18 +222,24 @@ def save_configuration( original_file_path = file_path + # Determine whether to save as a new file or overwrite the existing file save_as_bool = True if save_as.get("label") == "True" else False + # If saving as a new file, get the file path for saving if save_as_bool: log.info("Save as...") file_path = get_saveasfile_path(file_path) + # If not saving as a new file, check if a file path was provided else: log.info("Save...") + # If no file path was provided, get the file path for saving if file_path == None or file_path == "": file_path = get_saveasfile_path(file_path) - # log.info(file_path) + # Log the file path for debugging purposes + log.debug(file_path) + # If no file path was provided, return the original file path if file_path == None or file_path == "": return original_file_path # In case a file_path was provided and the user decide to cancel the open action @@ -215,12 +250,14 @@ def save_configuration( if not os.path.exists(destination_directory): os.makedirs(destination_directory) + # Save the configuration file SaveConfigFile( parameters=parameters, file_path=file_path, exclusion=["file_path", "save_as"], ) + # Return the file path of the saved configuration return file_path @@ -236,6 +273,7 @@ def open_configuration( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, lr_scheduler, @@ -308,6 +346,8 @@ def open_configuration( multires_noise_discount, LoRA_type, factor, + bypass_mode, + dora_wd, use_cp, use_tucker, use_scalar, @@ -362,10 +402,13 @@ def open_configuration( # Get list of function parameters and values parameters = list(locals().items()) + # Convert 'ask_for_file' and 'apply_preset' from string to boolean based on their 'label' value + # This corrects a critical oversight in the original code, where `.get("label")` method calls were + # made on boolean variables instead of dictionaries ask_for_file = True if ask_for_file.get("label") == "True" else False apply_preset = True if apply_preset.get("label") == "True" else False - # Check if we are "applying" a preset or a config + # Determines if a preset configuration is being applied if apply_preset: if training_preset != "none": log.info(f"Applying preset {training_preset}...") @@ -378,11 +421,14 @@ def open_configuration( # Update the value of `training_preset` by directly assigning an empty string value parameters[training_preset_index] = ("training_preset", "none") + # Store the original file path for potential reuse original_file_path = file_path + # Request a file path from the user if required if ask_for_file: file_path = get_file_path(file_path) + # Proceed if the file path is valid (not empty or None) if not file_path == "" and not file_path == None: # Load variables from JSON file with open(file_path, "r") as f: @@ -392,22 +438,20 @@ def open_configuration( # Update values to fix deprecated options, set appropriate optimizer if it is set to True, etc. my_data = update_my_data(my_data) else: + # Reset the file path to the original if the operation was cancelled or invalid file_path = original_file_path # In case a file_path was provided and the user decides to cancel the open action - my_data = {} + my_data = {} # Initialize an empty dict if no data was loaded values = [file_path] + # Iterate over parameters to set their values from `my_data` or use default if not found for key, value in parameters: - # Set the value in the dictionary to the corresponding value in `my_data`, or the default value if not found if not key in ["ask_for_file", "apply_preset", "file_path"]: json_value = my_data.get(key) - # if isinstance(json_value, str) and json_value == '': - # # If the JSON value is an empty string, use the default value - # values.append(value) - # else: - # Otherwise, use the JSON value if not None, otherwise use the default value + # Append the value from JSON if present; otherwise, use the parameter's default value values.append(json_value if json_value is not None else value) - # This next section is about making the LoCon parameters visible if LoRA_type = 'Standard' + # Display LoCon parameters based on the 'LoRA_type' from the loaded data + # This section dynamically adjusts visibility of certain parameters in the UI if my_data.get("LoRA_type", "Standard") in { "LoCon", "Kohya DyLoRA", @@ -438,6 +482,7 @@ def train_model( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, lr_scheduler, @@ -510,6 +555,8 @@ def train_model( multires_noise_discount, LoRA_type, factor, + bypass_mode, + dora_wd, use_cp, use_tucker, use_scalar, @@ -579,6 +626,7 @@ def train_model( resume=resume, vae=vae, lora_network_weights=lora_network_weights, + dataset_config=dataset_config, ): return @@ -599,8 +647,9 @@ def train_model( ) return - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if output_dir != "": + if not os.path.exists(output_dir): + os.makedirs(output_dir) if stop_text_encoder_training_pct > 0: output_message( @@ -620,78 +669,87 @@ def train_model( if unet_lr == "": unet_lr = 0 - # Get a list of all subfolders in train_data_dir - subfolders = [ - f - for f in os.listdir(train_data_dir) - if os.path.isdir(os.path.join(train_data_dir, f)) - ] - - total_steps = 0 - - # Loop through each subfolder and extract the number of repeats - for folder in subfolders: - try: - # Extract the number of repeats from the folder name - repeats = int(folder.split("_")[0]) - - # Count the number of images in the folder - num_images = len( - [ - f - for f, lower_f in ( - (file, file.lower()) - for file in os.listdir(os.path.join(train_data_dir, folder)) - ) - if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) - ] - ) + if dataset_config: + log.info( + "Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations..." + ) + else: + # Get a list of all subfolders in train_data_dir + subfolders = [ + f + for f in os.listdir(train_data_dir) + if os.path.isdir(os.path.join(train_data_dir, f)) + ] + + total_steps = 0 + + # Loop through each subfolder and extract the number of repeats + for folder in subfolders: + try: + # Extract the number of repeats from the folder name + repeats = int(folder.split("_")[0]) + + # Count the number of images in the folder + num_images = len( + [ + f + for f, lower_f in ( + (file, file.lower()) + for file in os.listdir(os.path.join(train_data_dir, folder)) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) - log.info(f"Folder {folder}: {num_images} images found") + log.info(f"Folder {folder}: {num_images} images found") - # Calculate the total number of steps for this folder - steps = repeats * num_images + # Calculate the total number of steps for this folder + steps = repeats * num_images - # log.info the result - log.info(f"Folder {folder}: {steps} steps") + # log.info the result + log.info(f"Folder {folder}: {steps} steps") - total_steps += steps + total_steps += steps - except ValueError: - # Handle the case where the folder name does not contain an underscore - log.info(f"Error: '{folder}' does not contain an underscore, skipping...") + except ValueError: + # Handle the case where the folder name does not contain an underscore + log.info( + f"Error: '{folder}' does not contain an underscore, skipping..." + ) - if reg_data_dir == "": - reg_factor = 1 - else: - log.warning( - "Regularisation images are used... Will double the number of steps required..." - ) - reg_factor = 2 - - log.info(f"Total steps: {total_steps}") - log.info(f"Train batch size: {train_batch_size}") - log.info(f"Gradient accumulation steps: {gradient_accumulation_steps}") - log.info(f"Epoch: {epoch}") - log.info(f"Regulatization factor: {reg_factor}") - - if max_train_steps == "" or max_train_steps == "0": - # calculate max_train_steps - max_train_steps = int( - math.ceil( - float(total_steps) - / int(train_batch_size) - / int(gradient_accumulation_steps) - * int(epoch) - * int(reg_factor) + if reg_data_dir == "": + reg_factor = 1 + else: + log.warning( + "Regularisation images are used... Will double the number of steps required..." + ) + reg_factor = 2 + + log.info(f"Total steps: {total_steps}") + log.info(f"Train batch size: {train_batch_size}") + log.info(f"Gradient accumulation steps: {gradient_accumulation_steps}") + log.info(f"Epoch: {epoch}") + log.info(f"Regulatization factor: {reg_factor}") + + if max_train_steps == "" or max_train_steps == "0": + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + log.info( + f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" ) - ) - log.info( - f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" - ) # calculate stop encoder training - if stop_text_encoder_training_pct == None: + if stop_text_encoder_training_pct == None or ( + not max_train_steps == "" or not max_train_steps == "0" + ): stop_text_encoder_training = 0 else: stop_text_encoder_training = math.ceil( @@ -699,7 +757,10 @@ def train_model( ) log.info(f"stop_text_encoder_training = {stop_text_encoder_training}") - lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + if not max_train_steps == "": + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 log.info(f"lr_warmup_steps = {lr_warmup_steps}") run_cmd = "accelerate launch" @@ -717,37 +778,43 @@ def train_model( else: run_cmd += rf' "{scriptdir}/sd-scripts/train_network.py"' + network_args = "" + + if LoRA_type == "LyCORIS/BOFT": + network_module = "lycoris.kohya" + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" module_dropout="{module_dropout}" use_tucker="{use_tucker}" use_scalar="{use_scalar}" rank_dropout="{rank_dropout}" rank_dropout_scale="{rank_dropout_scale}" constrain="{constrain}" rescaled="{rescaled}" algo="boft" train_norm="{train_norm}"' + if LoRA_type == "LyCORIS/Diag-OFT": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "module_dropout={module_dropout}" "use_tucker={use_tucker}" "use_scalar={use_scalar}" "rank_dropout_scale={rank_dropout_scale}" "constrain={constrain}" "rescaled={rescaled}" "algo=diag-oft" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" module_dropout="{module_dropout}" use_tucker="{use_tucker}" use_scalar="{use_scalar}" rank_dropout="{rank_dropout}" rank_dropout_scale="{rank_dropout_scale}" constrain="{constrain}" rescaled="{rescaled}" algo="diag-oft" train_norm="{train_norm}"' if LoRA_type == "LyCORIS/DyLoRA": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "use_tucker={use_tucker}" "block_size={unit}" "rank_dropout={rank_dropout}" "module_dropout={module_dropout}" "algo=dylora" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" use_tucker="{use_tucker}" block_size="{unit}" rank_dropout="{rank_dropout}" module_dropout="{module_dropout}" algo="dylora" train_norm="{train_norm}"' if LoRA_type == "LyCORIS/GLoRA": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "rank_dropout={rank_dropout}" "module_dropout={module_dropout}" "rank_dropout_scale={rank_dropout_scale}" "algo=glora" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" rank_dropout="{rank_dropout}" module_dropout="{module_dropout}" rank_dropout_scale="{rank_dropout_scale}" algo="glora" train_norm="{train_norm}"' if LoRA_type == "LyCORIS/iA3": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "train_on_input={train_on_input}" "algo=ia3"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" train_on_input="{train_on_input}" algo="ia3"' if LoRA_type == "LoCon" or LoRA_type == "LyCORIS/LoCon": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "rank_dropout={rank_dropout}" "module_dropout={module_dropout}" "use_tucker={use_tucker}" "use_scalar={use_scalar}" "rank_dropout_scale={rank_dropout_scale}" "algo=locon" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" rank_dropout="{rank_dropout}" bypass_mode="{bypass_mode}" dora_wd="{dora_wd}" module_dropout="{module_dropout}" use_tucker="{use_tucker}" use_scalar="{use_scalar}" rank_dropout_scale="{rank_dropout_scale}" algo="locon" train_norm="{train_norm}"' if LoRA_type == "LyCORIS/LoHa": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "rank_dropout={rank_dropout}" "module_dropout={module_dropout}" "use_tucker={use_tucker}" "use_scalar={use_scalar}" "rank_dropout_scale={rank_dropout_scale}" "algo=loha" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" rank_dropout="{rank_dropout}" bypass_mode="{bypass_mode}" dora_wd="{dora_wd}" module_dropout="{module_dropout}" use_tucker="{use_tucker}" use_scalar="{use_scalar}" rank_dropout_scale="{rank_dropout_scale}" algo="loha" train_norm="{train_norm}"' if LoRA_type == "LyCORIS/LoKr": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "conv_dim={conv_dim}" "conv_alpha={conv_alpha}" "rank_dropout={rank_dropout}" "module_dropout={module_dropout}" "factor={factor}" "use_cp={use_cp}" "use_scalar={use_scalar}" "decompose_both={decompose_both}" "rank_dropout_scale={rank_dropout_scale}" "algo=lokr" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" conv_dim="{conv_dim}" conv_alpha="{conv_alpha}" rank_dropout="{rank_dropout}" bypass_mode="{bypass_mode}" dora_wd="{dora_wd}" module_dropout="{module_dropout}" factor="{factor}" use_cp="{use_cp}" use_scalar="{use_scalar}" decompose_both="{decompose_both}" rank_dropout_scale="{rank_dropout_scale}" algo="lokr" train_norm="{train_norm}"' if LoRA_type == "LyCORIS/Native Fine-Tuning": network_module = "lycoris.kohya" - network_args = f' "preset={LyCORIS_preset}" "rank_dropout={rank_dropout}" "module_dropout={module_dropout}" "use_tucker={use_tucker}" "use_scalar={use_scalar}" "rank_dropout_scale={rank_dropout_scale}" "algo=full" "train_norm={train_norm}"' + network_args = f' preset="{LyCORIS_preset}" rank_dropout="{rank_dropout}" module_dropout="{module_dropout}" use_tucker="{use_tucker}" use_scalar="{use_scalar}" rank_dropout_scale="{rank_dropout_scale}" algo="full" train_norm="{train_norm}"' if LoRA_type in ["Kohya LoCon", "Standard"]: kohya_lora_var_list = [ @@ -762,25 +829,14 @@ def train_model( "rank_dropout", "module_dropout", ] - network_module = "networks.lora" - kohya_lora_vars = { - key: value - for key, value in vars().items() - if key in kohya_lora_var_list and value - } - - network_args = "" - if LoRA_type == "Kohya LoCon": - network_args += f' conv_dim="{conv_dim}" conv_alpha="{conv_alpha}"' - - for key, value in kohya_lora_vars.items(): - if value: - network_args += f' {key}="{value}"' + network_args += update_network_args_with_kohya_lora_vars( + network_args=network_args, + kohya_lora_var_list=kohya_lora_var_list, + vars=vars().items(), + ) - if LoRA_type in [ - "LoRA-FA", - ]: + if LoRA_type in ["LoRA-FA"]: kohya_lora_var_list = [ "down_lr_weight", "mid_lr_weight", @@ -793,21 +849,12 @@ def train_model( "rank_dropout", "module_dropout", ] - network_module = "networks.lora_fa" - kohya_lora_vars = { - key: value - for key, value in vars().items() - if key in kohya_lora_var_list and value - } - - network_args = "" - if LoRA_type == "Kohya LoCon": - network_args += f' conv_dim="{conv_dim}" conv_alpha="{conv_alpha}"' - - for key, value in kohya_lora_vars.items(): - if value: - network_args += f' {key}="{value}"' + network_args += update_network_args_with_kohya_lora_vars( + network_args=network_args, + kohya_lora_var_list=kohya_lora_var_list, + vars=vars().items(), + ) if LoRA_type in ["Kohya DyLoRA"]: kohya_lora_var_list = [ @@ -825,153 +872,134 @@ def train_model( "module_dropout", "unit", ] - network_module = "networks.dylora" - kohya_lora_vars = { - key: value - for key, value in vars().items() - if key in kohya_lora_var_list and value - } - - network_args = "" - - for key, value in kohya_lora_vars.items(): - if value: - network_args += f' {key}="{value}"' - - network_train_text_encoder_only = False - network_train_unet_only = False - - # Convert learning rates to float once and store the result for re-use - if text_encoder_lr is None: - output_message( - msg="Please input valid Text Encoder learning rate (between 0 and 1)", - headless=headless_bool, + network_args += update_network_args_with_kohya_lora_vars( + network_args=network_args, + kohya_lora_var_list=kohya_lora_var_list, + vars=vars().items(), ) - return - if unet_lr is None: + # Convert learning rates to float once and store the result for re-use + learning_rate = float(learning_rate) if learning_rate is not None else 0.0 + text_encoder_lr_float = float(text_encoder_lr) if text_encoder_lr is not None else 0.0 + unet_lr_float = float(unet_lr) if unet_lr is not None else 0.0 + + # Determine the training configuration based on learning rate values + # Sets flags for training specific components based on the provided learning rates. + if float(learning_rate) == unet_lr_float == text_encoder_lr_float == 0: output_message( - msg="Please input valid Unet learning rate (between 0 and 1)", - headless=headless_bool, + msg="Please input learning rate values.", headless=headless_bool ) return - text_encoder_lr_float = float(text_encoder_lr) - unet_lr_float = float(unet_lr) - - - # Determine the training configuration based on learning rate values - if text_encoder_lr_float == 0 and unet_lr_float == 0: - if float(learning_rate) == 0: - output_message( - msg="Please input learning rate values.", headless=headless_bool - ) - return - elif text_encoder_lr_float != 0 and unet_lr_float == 0: - network_train_text_encoder_only = True - elif text_encoder_lr_float == 0 and unet_lr_float != 0: - network_train_unet_only = True - # If both learning rates are non-zero, no specific flags need to be set - - run_cmd += run_cmd_advanced_training( - adaptive_noise_scale=adaptive_noise_scale, - additional_parameters=additional_parameters, - bucket_no_upscale=bucket_no_upscale, - bucket_reso_steps=bucket_reso_steps, - cache_latents=cache_latents, - cache_latents_to_disk=cache_latents_to_disk, - cache_text_encoder_outputs=( + # Flag to train text encoder only if its learning rate is non-zero and unet's is zero. + network_train_text_encoder_only = text_encoder_lr_float != 0 and unet_lr_float == 0 + # Flag to train unet only if its learning rate is non-zero and text encoder's is zero. + network_train_unet_only = text_encoder_lr_float == 0 and unet_lr_float != 0 + + + # Define a dictionary of parameters + run_cmd_params = { + "adaptive_noise_scale": adaptive_noise_scale, + "additional_parameters": additional_parameters, + "bucket_no_upscale": bucket_no_upscale, + "bucket_reso_steps": bucket_reso_steps, + "cache_latents": cache_latents, + "cache_latents_to_disk": cache_latents_to_disk, + "cache_text_encoder_outputs": ( True if sdxl and sdxl_cache_text_encoder_outputs else None ), - caption_dropout_every_n_epochs=caption_dropout_every_n_epochs, - caption_dropout_rate=caption_dropout_rate, - caption_extension=caption_extension, - clip_skip=clip_skip, - color_aug=color_aug, - debiased_estimation_loss=debiased_estimation_loss, - dim_from_weights=dim_from_weights, - enable_bucket=enable_bucket, - epoch=epoch, - flip_aug=flip_aug, - fp8_base=fp8_base, - full_bf16=full_bf16, - full_fp16=full_fp16, - gradient_accumulation_steps=gradient_accumulation_steps, - gradient_checkpointing=gradient_checkpointing, - keep_tokens=keep_tokens, - learning_rate=learning_rate, - logging_dir=logging_dir, - log_tracker_name=log_tracker_name, - log_tracker_config=log_tracker_config, - lora_network_weights=lora_network_weights, - lr_scheduler=lr_scheduler, - lr_scheduler_args=lr_scheduler_args, - lr_scheduler_num_cycles=lr_scheduler_num_cycles, - lr_scheduler_power=lr_scheduler_power, - lr_warmup_steps=lr_warmup_steps, - max_bucket_reso=max_bucket_reso, - max_data_loader_n_workers=max_data_loader_n_workers, - max_grad_norm=max_grad_norm, - max_resolution=max_resolution, - max_timestep=max_timestep, - max_token_length=max_token_length, - max_train_epochs=max_train_epochs, - max_train_steps=max_train_steps, - mem_eff_attn=mem_eff_attn, - min_bucket_reso=min_bucket_reso, - min_snr_gamma=min_snr_gamma, - min_timestep=min_timestep, - mixed_precision=mixed_precision, - multires_noise_discount=multires_noise_discount, - multires_noise_iterations=multires_noise_iterations, - network_alpha=network_alpha, - network_args=network_args, - network_dim=network_dim, - network_dropout=network_dropout, - network_module=network_module, - network_train_unet_only=network_train_unet_only, - network_train_text_encoder_only=network_train_text_encoder_only, - no_half_vae=True if sdxl and sdxl_no_half_vae else None, - # no_token_padding=no_token_padding, - noise_offset=noise_offset, - noise_offset_type=noise_offset_type, - optimizer=optimizer, - optimizer_args=optimizer_args, - output_dir=output_dir, - output_name=output_name, - persistent_data_loader_workers=persistent_data_loader_workers, - pretrained_model_name_or_path=pretrained_model_name_or_path, - prior_loss_weight=prior_loss_weight, - random_crop=random_crop, - reg_data_dir=reg_data_dir, - resume=resume, - save_every_n_epochs=save_every_n_epochs, - save_every_n_steps=save_every_n_steps, - save_last_n_steps=save_last_n_steps, - save_last_n_steps_state=save_last_n_steps_state, - save_model_as=save_model_as, - save_precision=save_precision, - save_state=save_state, - scale_v_pred_loss_like_noise_pred=scale_v_pred_loss_like_noise_pred, - scale_weight_norms=scale_weight_norms, - seed=seed, - shuffle_caption=shuffle_caption, - stop_text_encoder_training=stop_text_encoder_training, - text_encoder_lr=text_encoder_lr, - train_batch_size=train_batch_size, - train_data_dir=train_data_dir, - training_comment=training_comment, - unet_lr=unet_lr, - use_wandb=use_wandb, - v2=v2, - v_parameterization=v_parameterization, - v_pred_like_loss=v_pred_like_loss, - vae=vae, - vae_batch_size=vae_batch_size, - wandb_api_key=wandb_api_key, - wandb_run_name=wandb_run_name, - weighted_captions=weighted_captions, - xformers=xformers, - ) + "caption_dropout_every_n_epochs": caption_dropout_every_n_epochs, + "caption_dropout_rate": caption_dropout_rate, + "caption_extension": caption_extension, + "clip_skip": clip_skip, + "color_aug": color_aug, + "dataset_config": dataset_config, + "debiased_estimation_loss": debiased_estimation_loss, + "dim_from_weights": dim_from_weights, + "enable_bucket": enable_bucket, + "epoch": epoch, + "flip_aug": flip_aug, + "fp8_base": fp8_base, + "full_bf16": full_bf16, + "full_fp16": full_fp16, + "gradient_accumulation_steps": gradient_accumulation_steps, + "gradient_checkpointing": gradient_checkpointing, + "keep_tokens": keep_tokens, + "learning_rate": learning_rate, + "logging_dir": logging_dir, + "log_tracker_name": log_tracker_name, + "log_tracker_config": log_tracker_config, + "lora_network_weights": lora_network_weights, + "lr_scheduler": lr_scheduler, + "lr_scheduler_args": lr_scheduler_args, + "lr_scheduler_num_cycles": lr_scheduler_num_cycles, + "lr_scheduler_power": lr_scheduler_power, + "lr_warmup_steps": lr_warmup_steps, + "max_bucket_reso": max_bucket_reso, + "max_data_loader_n_workers": max_data_loader_n_workers, + "max_grad_norm": max_grad_norm, + "max_resolution": max_resolution, + "max_timestep": max_timestep, + "max_token_length": max_token_length, + "max_train_epochs": max_train_epochs, + "max_train_steps": max_train_steps, + "mem_eff_attn": mem_eff_attn, + "min_bucket_reso": min_bucket_reso, + "min_snr_gamma": min_snr_gamma, + "min_timestep": min_timestep, + "mixed_precision": mixed_precision, + "multires_noise_discount": multires_noise_discount, + "multires_noise_iterations": multires_noise_iterations, + "network_alpha": network_alpha, + "network_args": network_args, + "network_dim": network_dim, + "network_dropout": network_dropout, + "network_module": network_module, + "network_train_unet_only": network_train_unet_only, + "network_train_text_encoder_only": network_train_text_encoder_only, + "no_half_vae": True if sdxl and sdxl_no_half_vae else None, + "noise_offset": noise_offset, + "noise_offset_type": noise_offset_type, + "optimizer": optimizer, + "optimizer_args": optimizer_args, + "output_dir": output_dir, + "output_name": output_name, + "persistent_data_loader_workers": persistent_data_loader_workers, + "pretrained_model_name_or_path": pretrained_model_name_or_path, + "prior_loss_weight": prior_loss_weight, + "random_crop": random_crop, + "reg_data_dir": reg_data_dir, + "resume": resume, + "save_every_n_epochs": save_every_n_epochs, + "save_every_n_steps": save_every_n_steps, + "save_last_n_steps": save_last_n_steps, + "save_last_n_steps_state": save_last_n_steps_state, + "save_model_as": save_model_as, + "save_precision": save_precision, + "save_state": save_state, + "scale_v_pred_loss_like_noise_pred": scale_v_pred_loss_like_noise_pred, + "scale_weight_norms": scale_weight_norms, + "seed": seed, + "shuffle_caption": shuffle_caption, + "stop_text_encoder_training": stop_text_encoder_training, + "text_encoder_lr": text_encoder_lr, + "train_batch_size": train_batch_size, + "train_data_dir": train_data_dir, + "training_comment": training_comment, + "unet_lr": unet_lr, + "use_wandb": use_wandb, + "v2": v2, + "v_parameterization": v_parameterization, + "v_pred_like_loss": v_pred_like_loss, + "vae": vae, + "vae_batch_size": vae_batch_size, + "wandb_api_key": wandb_api_key, + "wandb_run_name": wandb_run_name, + "weighted_captions": weighted_captions, + "xformers": xformers, + } + + # Use the ** syntax to unpack the dictionary when calling the function + run_cmd += run_cmd_advanced_training(**run_cmd_params) run_cmd += run_cmd_sample( sample_every_n_steps, @@ -1012,15 +1040,6 @@ def train_model( # Run the command executor.execute_command(run_cmd=run_cmd, env=env) - # # check if output_dir/last is a folder... therefore it is a diffuser model - # last_dir = pathlib.Path(f'{output_dir}/{output_name}') - - # if not last_dir.is_dir(): - # # Copy inference model for v2 if required - # save_inference_file( - # output_dir, v2, v_parameterization, output_name - # ) - def lora_tab( train_data_dir_input=gr.Dropdown(), @@ -1087,9 +1106,10 @@ def list_presets(path): "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", - "LyCORIS/DyLoRA", "LyCORIS/iA3", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", "LyCORIS/GLoRA", "LyCORIS/LoCon", "LyCORIS/LoHa", @@ -1165,69 +1185,84 @@ def list_presets(path): # Add SDXL Parameters sdxl_params = SDXLParameters(source_model.sdxl_checkbox) - with gr.Row(): - factor = gr.Slider( - label="LoKr factor", - value=-1, - minimum=-1, - maximum=64, - step=1, - visible=False, - ) - use_cp = gr.Checkbox( - value=False, - label="Use CP decomposition", - info="A two-step approach utilizing tensor decomposition and fine-tuning to accelerate convolution layers in large neural networks, resulting in significant CPU speedups with minor accuracy drops.", - visible=False, - ) - use_tucker = gr.Checkbox( - value=False, - label="Use Tucker decomposition", - info="Efficiently decompose tensor shapes, resulting in a sequence of convolution layers with varying dimensions and Hadamard product implementation through multiplication of two distinct tensors.", - visible=False, - ) - use_scalar = gr.Checkbox( - value=False, - label="Use Scalar", - info="Train an additional scalar in front of the weight difference, use a different weight initialization strategy.", - visible=False, - ) - rank_dropout_scale = gr.Checkbox( - value=False, - label="Rank Dropout Scale", - info="Adjusts the scale of the rank dropout to maintain the average dropout rate, ensuring more consistent regularization across different layers.", - visible=False, - ) - constrain = gr.Number( - value="0.0", - label="Constrain OFT", - info="Limits the norm of the oft_blocks, ensuring that their magnitude does not exceed a specified threshold, thus controlling the extent of the transformation applied.", - visible=False, - ) - rescaled = gr.Checkbox( - value=False, - label="Rescaled OFT", - info="applies an additional scaling factor to the oft_blocks, allowing for further adjustment of their impact on the model's transformations.", - visible=False, - ) - train_norm = gr.Checkbox( - value=False, - label="Train Norm", - info="Selects trainable layers in a network, but trains normalization layers identically across methods as they lack matrix decomposition.", - visible=False, - ) - decompose_both = gr.Checkbox( - value=False, - label="LoKr decompose both", - info="Controls whether both input and output dimensions of the layer's weights are decomposed into smaller matrices for reparameterization.", - visible=False, - ) - train_on_input = gr.Checkbox( - value=True, - label="iA3 train on input", - info="Set if we change the information going into the system (True) or the information coming out of it (False).", - visible=False, - ) + # LyCORIS Specific parameters + with gr.Accordion("LyCORIS", visible=False) as lycoris_accordion: + with gr.Row(): + factor = gr.Slider( + label="LoKr factor", + value=-1, + minimum=-1, + maximum=64, + step=1, + visible=False, + ) + bypass_mode = gr.Checkbox( + value=False, + label="Bypass mode", + info="Designed for bnb 8bit/4bit linear layer. (QLyCORIS)", + visible=False, + ) + dora_wd = gr.Checkbox( + value=False, + label="DoRA Weight Decompose", + info="Enable the DoRA method for these algorithms", + visible=False, + ) + use_cp = gr.Checkbox( + value=False, + label="Use CP decomposition", + info="A two-step approach utilizing tensor decomposition and fine-tuning to accelerate convolution layers in large neural networks, resulting in significant CPU speedups with minor accuracy drops.", + visible=False, + ) + use_tucker = gr.Checkbox( + value=False, + label="Use Tucker decomposition", + info="Efficiently decompose tensor shapes, resulting in a sequence of convolution layers with varying dimensions and Hadamard product implementation through multiplication of two distinct tensors.", + visible=False, + ) + use_scalar = gr.Checkbox( + value=False, + label="Use Scalar", + info="Train an additional scalar in front of the weight difference, use a different weight initialization strategy.", + visible=False, + ) + with gr.Row(): + rank_dropout_scale = gr.Checkbox( + value=False, + label="Rank Dropout Scale", + info="Adjusts the scale of the rank dropout to maintain the average dropout rate, ensuring more consistent regularization across different layers.", + visible=False, + ) + constrain = gr.Number( + value="0.0", + label="Constrain OFT", + info="Limits the norm of the oft_blocks, ensuring that their magnitude does not exceed a specified threshold, thus controlling the extent of the transformation applied.", + visible=False, + ) + rescaled = gr.Checkbox( + value=False, + label="Rescaled OFT", + info="applies an additional scaling factor to the oft_blocks, allowing for further adjustment of their impact on the model's transformations.", + visible=False, + ) + train_norm = gr.Checkbox( + value=False, + label="Train Norm", + info="Selects trainable layers in a network, but trains normalization layers identically across methods as they lack matrix decomposition.", + visible=False, + ) + decompose_both = gr.Checkbox( + value=False, + label="LoKr decompose both", + info="Controls whether both input and output dimensions of the layer's weights are decomposed into smaller matrices for reparameterization.", + visible=False, + ) + train_on_input = gr.Checkbox( + value=True, + label="iA3 train on input", + info="Set if we change the information going into the system (True) or the information coming out of it (False).", + visible=False, + ) with gr.Row() as network_row: network_dim = gr.Slider( @@ -1297,7 +1332,7 @@ def list_presets(path): step=0.01, info="can specify `module_dropout` to dropout each rank with specified probability. Recommended range 0.1 to 0.3", ) - with gr.Row(visible=False) as kohya_dylora: + with gr.Row(visible=False): unit = gr.Slider( minimum=1, maximum=64, @@ -1324,6 +1359,7 @@ def update_LoRA_settings( "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/GLoRA", @@ -1343,6 +1379,7 @@ def update_LoRA_settings( "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/LoHa", @@ -1364,16 +1401,6 @@ def update_LoRA_settings( }, }, }, - "kohya_dylora": { - "gr_type": gr.Row, - "update_params": { - "visible": LoRA_type - in { - "Kohya DyLoRA", - "LyCORIS/DyLoRA", - }, - }, - }, "lora_network_weights": { "gr_type": gr.Textbox, "update_params": { @@ -1384,6 +1411,7 @@ def update_LoRA_settings( "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/GLoRA", @@ -1403,6 +1431,7 @@ def update_LoRA_settings( "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/GLoRA", @@ -1422,6 +1451,7 @@ def update_LoRA_settings( "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/GLoRA", @@ -1449,6 +1479,7 @@ def update_LoRA_settings( in { "LyCORIS/LoHa", "LyCORIS/LoKr", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", } else 512 @@ -1465,6 +1496,7 @@ def update_LoRA_settings( in { "LyCORIS/LoHa", "LyCORIS/LoKr", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", } else 512 @@ -1472,6 +1504,28 @@ def update_LoRA_settings( "value": network_dim, # if network_dim > 512 else network_dim, }, }, + "bypass_mode": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + }, + }, + }, + "dora_wd": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + }, + }, + }, "use_cp": { "gr_type": gr.Checkbox, "update_params": { @@ -1486,6 +1540,7 @@ def update_LoRA_settings( "update_params": { "visible": LoRA_type in { + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/LoCon", @@ -1499,6 +1554,7 @@ def update_LoRA_settings( "update_params": { "visible": LoRA_type in { + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/LoCon", "LyCORIS/LoHa", @@ -1512,6 +1568,7 @@ def update_LoRA_settings( "update_params": { "visible": LoRA_type in { + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/GLoRA", "LyCORIS/LoCon", @@ -1526,6 +1583,7 @@ def update_LoRA_settings( "update_params": { "visible": LoRA_type in { + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", }, }, @@ -1535,6 +1593,7 @@ def update_LoRA_settings( "update_params": { "visible": LoRA_type in { + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", }, }, @@ -1545,6 +1604,7 @@ def update_LoRA_settings( "visible": LoRA_type in { "LyCORIS/DyLoRA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/GLoRA", "LyCORIS/LoCon", @@ -1593,6 +1653,7 @@ def update_LoRA_settings( "Kohya DyLoRA", "Kohya LoCon", "LoRA-FA", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/DyLoRA", "LyCORIS/GLoRA", @@ -1611,6 +1672,8 @@ def update_LoRA_settings( in { "LoCon", "Kohya DyLoRA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", "LyCORIS/GLoRA", "LyCORIS/LoCon", "LyCORIS/LoHa", @@ -1628,6 +1691,7 @@ def update_LoRA_settings( "visible": LoRA_type in { "LoCon", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "Kohya DyLoRA", "LyCORIS/GLoRA", @@ -1648,6 +1712,34 @@ def update_LoRA_settings( in { "LyCORIS/DyLoRA", "LyCORIS/iA3", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + "unit": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "Kohya DyLoRA", + "LyCORIS/DyLoRA", + }, + }, + }, + "lycoris_accordion": { + "gr_type": gr.Accordion, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/DyLoRA", + "LyCORIS/iA3", + "LyCORIS/BOFT", "LyCORIS/Diag-OFT", "LyCORIS/GLoRA", "LyCORIS/LoCon", @@ -1739,13 +1831,14 @@ def update_LoRA_settings( network_row, convolution_row, kohya_advanced_lora, - kohya_dylora, lora_network_weights, lora_network_weights_file, dim_from_weights, factor, conv_dim, network_dim, + bypass_mode, + dora_wd, use_cp, use_tucker, use_scalar, @@ -1760,6 +1853,8 @@ def update_LoRA_settings( rank_dropout, module_dropout, LyCORIS_preset, + unit, + lycoris_accordion, ], ) @@ -1815,6 +1910,7 @@ def update_LoRA_settings( source_model.train_data_dir, folders.reg_data_dir, folders.output_dir, + source_model.dataset_config, basic_training.max_resolution, basic_training.learning_rate, basic_training.lr_scheduler, @@ -1886,6 +1982,8 @@ def update_LoRA_settings( advanced_training.multires_noise_discount, LoRA_type, factor, + bypass_mode, + dora_wd, use_cp, use_tucker, use_scalar, diff --git a/kohya_gui/textual_inversion_gui.py b/kohya_gui/textual_inversion_gui.py index a9e94408e..f4b59613b 100644 --- a/kohya_gui/textual_inversion_gui.py +++ b/kohya_gui/textual_inversion_gui.py @@ -58,6 +58,7 @@ def save_configuration( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, lr_scheduler, @@ -192,6 +193,7 @@ def open_configuration( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, lr_scheduler, @@ -319,6 +321,7 @@ def train_model( train_data_dir, reg_data_dir, output_dir, + dataset_config, max_resolution, learning_rate, lr_scheduler, @@ -424,6 +427,7 @@ def train_model( log_tracker_config=log_tracker_config, resume=resume, vae=vae, + dataset_config=dataset_config, ): return @@ -440,68 +444,71 @@ def train_model( ): return - # Get a list of all subfolders in train_data_dir - subfolders = [ - f - for f in os.listdir(train_data_dir) - if os.path.isdir(os.path.join(train_data_dir, f)) - ] - - total_steps = 0 - - # Loop through each subfolder and extract the number of repeats - for folder in subfolders: - # Extract the number of repeats from the folder name - repeats = int(folder.split("_")[0]) - - # Count the number of images in the folder - num_images = len( - [ - f - for f, lower_f in ( - (file, file.lower()) - for file in os.listdir(os.path.join(train_data_dir, folder)) - ) - if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) - ] - ) + if dataset_config: + log.info("Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations...") + else: + # Get a list of all subfolders in train_data_dir + subfolders = [ + f + for f in os.listdir(train_data_dir) + if os.path.isdir(os.path.join(train_data_dir, f)) + ] - # Calculate the total number of steps for this folder - steps = repeats * num_images - total_steps += steps + total_steps = 0 - # Print the result - log.info(f"Folder {folder}: {steps} steps") + # Loop through each subfolder and extract the number of repeats + for folder in subfolders: + # Extract the number of repeats from the folder name + repeats = int(folder.split("_")[0]) - # Print the result - # log.info(f"{total_steps} total steps") + # Count the number of images in the folder + num_images = len( + [ + f + for f, lower_f in ( + (file, file.lower()) + for file in os.listdir(os.path.join(train_data_dir, folder)) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) - if reg_data_dir == "": - reg_factor = 1 - else: - log.info( - "Regularisation images are used... Will double the number of steps required..." - ) - reg_factor = 2 - - # calculate max_train_steps - if max_train_steps == "" or max_train_steps == "0": - max_train_steps = int( - math.ceil( - float(total_steps) - / int(train_batch_size) - / int(gradient_accumulation_steps) - * int(epoch) - * int(reg_factor) + # Calculate the total number of steps for this folder + steps = repeats * num_images + total_steps += steps + + # Print the result + log.info(f"Folder {folder}: {steps} steps") + + # Print the result + # log.info(f"{total_steps} total steps") + + if reg_data_dir == "": + reg_factor = 1 + else: + log.info( + "Regularisation images are used... Will double the number of steps required..." ) - ) - else: - max_train_steps = int(max_train_steps) + reg_factor = 2 + + # calculate max_train_steps + if max_train_steps == "" or max_train_steps == "0": + max_train_steps = int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + else: + max_train_steps = int(max_train_steps) - log.info(f"max_train_steps = {max_train_steps}") + log.info(f"max_train_steps = {max_train_steps}") # calculate stop encoder training - if stop_text_encoder_training_pct == None: + if stop_text_encoder_training_pct == None or (not max_train_steps == "" or not max_train_steps == "0"): stop_text_encoder_training = 0 else: stop_text_encoder_training = math.ceil( @@ -509,7 +516,10 @@ def train_model( ) log.info(f"stop_text_encoder_training = {stop_text_encoder_training}") - lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + if not max_train_steps == "": + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 log.info(f"lr_warmup_steps = {lr_warmup_steps}") run_cmd = "accelerate launch" @@ -538,6 +548,7 @@ def train_model( caption_extension=caption_extension, clip_skip=clip_skip, color_aug=color_aug, + dataset_config=dataset_config, enable_bucket=enable_bucket, epoch=epoch, flip_aug=flip_aug, @@ -843,6 +854,7 @@ def list_embedding_files(path): source_model.train_data_dir, folders.reg_data_dir, folders.output_dir, + source_model.dataset_config, basic_training.max_resolution, basic_training.learning_rate, basic_training.lr_scheduler, diff --git a/setup/setup_windows.py b/setup/setup_windows.py index b91f027e2..7fe9307a7 100644 --- a/setup/setup_windows.py +++ b/setup/setup_windows.py @@ -22,9 +22,12 @@ def cudnn_install(): "nvidia-cudnn-cu11 8.9.5.29", reinstall=True, ) - + # Original path with "..\\venv" - original_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..\\venv\\Lib\\site-packages\\nvidia\\cudnn\\bin") + original_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "..\\venv\\Lib\\site-packages\\nvidia\\cudnn\\bin", + ) # Normalize the path to resolve "..\\venv" cudnn_src = os.path.abspath(original_path) cudnn_dest = os.path.join(sysconfig.get_paths()["purelib"], "torch", "lib") @@ -35,7 +38,7 @@ def cudnn_install(): # check for different files filecmp.clear_cache() for file in os.listdir(cudnn_src): - if file.lower().endswith('.dll'): # Check if the file is a .dll file + if file.lower().endswith(".dll"): # Check if the file is a .dll file src_file = os.path.join(cudnn_src, file) dest_file = os.path.join(cudnn_dest, file) # if dest file exists, check if it's different @@ -110,16 +113,18 @@ def install_kohya_ss_torch2(headless: bool = False): setup_common.check_repo_version() if not setup_common.check_python_version(): exit(1) - + setup_common.update_submodule() - + setup_common.install("pip") setup_common.install_requirements( "requirements_windows_torch2.txt", check_no_verify_flag=False ) - - setup_common.configure_accelerate(run_accelerate=not headless) # False if headless is True and vice versa + + setup_common.configure_accelerate( + run_accelerate=not headless + ) # False if headless is True and vice versa def install_bitsandbytes_0_35_0(): @@ -147,6 +152,7 @@ def install_bitsandbytes_0_41_1(): reinstall=True, ) + def install_bitsandbytes_0_41_2(): log.info("Installing bitsandbytes 0.41.2...") setup_common.install( @@ -155,21 +161,34 @@ def install_bitsandbytes_0_41_2(): reinstall=True, ) + +def install_triton_2_1_0(): + log.info("Installing triton 2.1.0...") + setup_common.install( + "--upgrade https://huggingface.co/Rodeszones/CogVLM-grounding-generalist-hf-quant4/resolve/main/triton-2.1.0-cp310-cp310-win_amd64.whl?download=true", + "triton 2.1.0", + reinstall=True, + ) + + def main_menu(headless: bool = False): if headless: install_kohya_ss_torch2(headless=headless) else: setup_common.clear_screen() while True: - print("\nKohya_ss GUI setup menu:\n") - print("1. Install kohya_ss gui") - print("2. (Optional) Install cudnn files (if you want to use latest supported cudnn version)") - print("3. (Optional) Install specific bitsandbytes versions") - print("4. (Optional) Manually configure accelerate") - print("5. (Optional) Start Kohya_ss GUI in browser") - print("6. Quit") - - choice = input("\nEnter your choice: ") + print("\nKohya_ss setup menu:\n") + print("1. Install kohya_ss GUI") + print( + "2. (Optional) Install CuDNN files (to use the latest supported CuDNN version)" + ) + print("3. (Optional) Install Triton 2.1.0 for Windows") + print("4. (Optional) Install specific version of bitsandbytes") + print("5. (Optional) Manually configure Accelerate") + print("6. (Optional) Launch Kohya_ss GUI in browser") + print("7. Exit Setup") + + choice = input("\nSelect an option: ") print("") if choice == "1": @@ -177,22 +196,25 @@ def main_menu(headless: bool = False): elif choice == "2": cudnn_install() elif choice == "3": + install_triton_2_1_0() + elif choice == "4": while True: - print("1. (Optional) Force installation of bitsandbytes 0.35.0") + print("\nBitsandBytes Installation Menu:") + print("1. Force install Bitsandbytes 0.35.0") print( - "2. (Optional) Force installation of bitsandbytes 0.40.1 for new optimizer options support and pre-bugfix results" + "2. Force install Bitsandbytes 0.40.1 (supports new optimizer options, pre-bugfix results)" ) print( - "3. (Optional) Force installation of bitsandbytes 0.41.1 for new optimizer options support" + "3. Force installation Bitsandbytes 0.41.1 (supports new optimizer options)" ) print( - "4. (Recommended) Force installation of bitsandbytes 0.41.2 for new optimizer options support" + "4. (Recommended) Force install Bitsandbytes 0.41.2 (supports new optimizer options)" ) print( - "5. (Danger) Install bitsandbytes-windows (this package has been reported to cause issues for most... avoid...)" + "5. (Warning) Install bitsandbytes-windows (may cause issues, use with caution)" ) - print("6. Exit") - choice_torch = input("\nEnter your choice: ") + print("6. Return to Previous Menu:") + choice_torch = input("\nSelect an option: ") print("") if choice_torch == "1": @@ -215,29 +237,29 @@ def main_menu(headless: bool = False): elif choice_torch == "6": break else: - print("Invalid choice. Please enter a number between 1-3.") - elif choice == "4": - setup_common.run_cmd("accelerate config") + print("Invalid choice. Please chose an option between 1-6.") elif choice == "5": + setup_common.run_cmd("accelerate config") + elif choice == "6": subprocess.Popen( "start cmd /k .\gui.bat --inbrowser", shell=True ) # /k keep the terminal open on quit. /c would close the terminal instead - elif choice == "6": - print("Quitting the program.") + elif choice == "7": + print("Exiting setup.") break else: - print("Invalid choice. Please enter a number between 1-5.") + print("Invalid selection. Please choose an option between 1-7.") if __name__ == "__main__": setup_common.ensure_base_requirements() setup_common.setup_logging() - + # Setup argument parser parser = argparse.ArgumentParser(description="Your Script Description") - parser.add_argument('--headless', action='store_true', help='Run in headless mode') + parser.add_argument("--headless", action="store_true", help="Run in headless mode") # Parse arguments args = parser.parse_args() - + main_menu(headless=args.headless) diff --git a/test/config/Diag-OFT-AdamW8bit-toml.json b/test/config/Diag-OFT-AdamW8bit-toml.json new file mode 100644 index 000000000..228a53bda --- /dev/null +++ b/test/config/Diag-OFT-AdamW8bit-toml.json @@ -0,0 +1,131 @@ +{ + "LoRA_type": "LyCORIS/Diag-OFT", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "bypass_mode": true, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 4, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 8, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 16, + "network_dim": 32, + "network_dropout": 0, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "Diag-OFT-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 4, + "train_data_dir": "", + "train_norm": false, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/LoKR-AdamW8bit-toml.json b/test/config/LoKR-AdamW8bit-toml.json new file mode 100644 index 000000000..d99201cda --- /dev/null +++ b/test/config/LoKR-AdamW8bit-toml.json @@ -0,0 +1,132 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "--lr_scheduler_type \"CosineAnnealingLR\" --lr_scheduler_args \"T_max=1000\" \"eta_min=0e-0\"", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.1, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 150, + "factor": 6, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": 1, + "learning_rate": 1.0, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "custom", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0.1, + "multires_noise_iterations": 6, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Prodigy", + "optimizer_args": "\"d0=1e-5\" \"d_coef=1.0\" \"weight_decay=0.4\" \"decouple=True\" \"safeguard_warmup=True\" \"use_bias_correction=True\"", + "output_dir": "./test/output", + "output_name": "LoKR-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 15, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "bf16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": true, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 2, + "train_data_dir": "", + "train_norm": false, + "train_on_input": false, + "training_comment": "KoopaTroopa", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/TI-AdamW8bit-toml.json b/test/config/TI-AdamW8bit-toml.json new file mode 100644 index 000000000..e583a5392 --- /dev/null +++ b/test/config/TI-AdamW8bit-toml.json @@ -0,0 +1,96 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "enable_bucket": true, + "epoch": 4, + "flip_aug": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "init_word": "*", + "keep_tokens": "0", + "learning_rate": 0.0001, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "80", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "num_vectors_per_token": 8, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "TI-Adamw8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "template": "style template", + "token_string": "zxc", + "train_batch_size": 4, + "train_data_dir": "", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weights": "", + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/TI-AdamW8bit.json b/test/config/TI-AdamW8bit.json new file mode 100644 index 000000000..9785c01f8 --- /dev/null +++ b/test/config/TI-AdamW8bit.json @@ -0,0 +1,96 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "", + "enable_bucket": true, + "epoch": 4, + "flip_aug": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "init_word": "*", + "keep_tokens": "0", + "learning_rate": 0.0001, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "num_vectors_per_token": 8, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "TI-Adamw8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "template": "style template", + "token_string": "zxc", + "train_batch_size": 4, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weights": "", + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dataset-finetune.toml b/test/config/dataset-finetune.toml new file mode 100644 index 000000000..702663f8f --- /dev/null +++ b/test/config/dataset-finetune.toml @@ -0,0 +1,14 @@ +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 1 +enable_bucket = true +min_bucket_reso = 64 +max_bucket_reso = 1024 +bucket_reso_steps = 32 +bucket_no_upscale = false + + [[datasets.subsets]] + image_dir = '.\test\img\10_darius kawasaki person' + num_repeats = 10 + metadata_file = '.\test\config\meta-1_lat.json' \ No newline at end of file diff --git a/test/config/dataset.toml b/test/config/dataset.toml new file mode 100644 index 000000000..2f90028a2 --- /dev/null +++ b/test/config/dataset.toml @@ -0,0 +1,15 @@ +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 1 +enable_bucket = true +min_bucket_reso = 64 +max_bucket_reso = 1024 +bucket_reso_steps = 32 +bucket_no_upscale = true + + [[datasets.subsets]] + image_dir = '.\test\img\10_darius kawasaki person' + num_repeats = 10 + class_tokens = 'darius kawasaki person' + caption_extension = '.txt' \ No newline at end of file diff --git a/test/config/dreambooth-AdamW8bit-toml.json b/test/config/dreambooth-AdamW8bit-toml.json new file mode 100644 index 000000000..3dfeb3dd9 --- /dev/null +++ b/test/config/dreambooth-AdamW8bit-toml.json @@ -0,0 +1,95 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "enable_bucket": true, + "epoch": 1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 5e-05, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "db-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 4, + "train_data_dir": "", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-AdamW8bit.json b/test/config/dreambooth-AdamW8bit.json index 1771c8a79..2b4ae0187 100644 --- a/test/config/dreambooth-AdamW8bit.json +++ b/test/config/dreambooth-AdamW8bit.json @@ -1,11 +1,6 @@ { - "LoRA_type": "Standard", - "LyCORIS_preset": "full", "adaptive_noise_scale": 0, "additional_parameters": "", - "block_alphas": "", - "block_dims": "", - "block_lr_zero_threshold": "", "bucket_no_upscale": true, "bucket_reso_steps": 64, "cache_latents": true, @@ -15,20 +10,10 @@ "caption_extension": "", "clip_skip": 2, "color_aug": false, - "constrain": 0.0, - "conv_alpha": 1, - "conv_block_alphas": "", - "conv_block_dims": "", - "conv_dim": 1, - "debiased_estimation_loss": false, - "decompose_both": false, - "dim_from_weights": false, - "down_lr_weight": "", + "dataset_config": "", "enable_bucket": true, "epoch": 1, - "factor": -1, "flip_aug": false, - "fp8_base": false, "full_bf16": false, "full_fp16": false, "gpu_ids": "", @@ -36,10 +21,12 @@ "gradient_checkpointing": false, "keep_tokens": "0", "learning_rate": 5e-05, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, "log_tracker_config": "", "log_tracker_name": "", "logging_dir": "./test/logs", - "lora_network_weights": "", "lr_scheduler": "constant", "lr_scheduler_args": "", "lr_scheduler_num_cycles": "", @@ -47,26 +34,21 @@ "lr_warmup": 0, "max_bucket_reso": 2048, "max_data_loader_n_workers": "0", - "max_grad_norm": 1, "max_resolution": "512,512", "max_timestep": 1000, "max_token_length": "75", "max_train_epochs": "", "max_train_steps": "", "mem_eff_attn": false, - "mid_lr_weight": "", "min_bucket_reso": 256, "min_snr_gamma": 0, "min_timestep": 0, "mixed_precision": "bf16", "model_list": "runwayml/stable-diffusion-v1-5", - "module_dropout": 0, "multi_gpu": false, "multires_noise_discount": 0, "multires_noise_iterations": 0, - "network_alpha": 1, - "network_dim": 8, - "network_dropout": 0, + "no_token_padding": false, "noise_offset": 0.05, "noise_offset_type": "Original", "num_cpu_threads_per_process": 2, @@ -80,10 +62,7 @@ "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", "prior_loss_weight": 1.0, "random_crop": false, - "rank_dropout": 0, - "rank_dropout_scale": false, "reg_data_dir": "", - "rescaled": false, "resume": "", "sample_every_n_epochs": 0, "sample_every_n_steps": 25, @@ -97,30 +76,17 @@ "save_precision": "fp16", "save_state": false, "scale_v_pred_loss_like_noise_pred": false, - "scale_weight_norms": 0, "sdxl": false, - "sdxl_cache_text_encoder_outputs": false, - "sdxl_no_half_vae": true, "seed": "1234", "shuffle_caption": false, "stop_text_encoder_training": 0, - "text_encoder_lr": 0.0, "train_batch_size": 4, "train_data_dir": "./test/img", - "train_norm": false, - "train_on_input": true, - "training_comment": "", - "unet_lr": 0.0, - "unit": 1, - "up_lr_weight": "", - "use_cp": false, - "use_scalar": false, - "use_tucker": false, "use_wandb": false, "v2": false, "v_parameterization": false, "v_pred_like_loss": 0, - "vae": "", + "vae": "stabilityai/vae", "vae_batch_size": 0, "wandb_api_key": "", "wandb_run_name": "", diff --git a/test/config/finetune-AdamW-toml.json b/test/config/finetune-AdamW-toml.json new file mode 100644 index 000000000..9a818ebbc --- /dev/null +++ b/test/config/finetune-AdamW-toml.json @@ -0,0 +1,100 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "batch_size": "8", + "block_lr": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "caption_metadata_filename": "meta-1_cap.json", + "clip_skip": 1, + "color_aug": false, + "create_buckets": false, + "create_caption": true, + "dataset_config": "D:/kohya_ss/test/config/dataset-finetune.toml", + "dataset_repeats": "50", + "epoch": 2, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "full_path": true, + "gpu_ids": "", + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": false, + "image_folder": "", + "keep_tokens": 0, + "latent_metadata_filename": "meta-1_lat.json", + "learning_rate": 1e-05, + "learning_rate_te": 5e-06, + "learning_rate_te1": 5e-06, + "learning_rate_te2": 0.0, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/ft", + "lr_scheduler": "cosine_with_restarts", + "lr_scheduler_args": "", + "lr_warmup": 10, + "max_bucket_reso": "1024", + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "20", + "mem_eff_attn": false, + "min_bucket_reso": "256", + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "test_ft-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "random_crop": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "bf16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_checkbox": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "train_batch_size": 4, + "train_dir": "./test", + "train_text_encoder": true, + "use_latent_files": "No", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/finetune-AdamW.json b/test/config/finetune-AdamW.json index eabb4bf58..36a5fff31 100644 --- a/test/config/finetune-AdamW.json +++ b/test/config/finetune-AdamW.json @@ -15,12 +15,14 @@ "color_aug": false, "create_buckets": false, "create_caption": true, + "dataset_config": "", "dataset_repeats": "50", "epoch": 2, "flip_aug": false, "full_bf16": false, "full_fp16": false, "full_path": true, + "gpu_ids": "", "gradient_accumulation_steps": 1.0, "gradient_checkpointing": false, "image_folder": ".\\test\\img\\10_darius kawasaki person", @@ -30,6 +32,8 @@ "learning_rate_te": 5e-06, "learning_rate_te1": 5e-06, "learning_rate_te2": 0.0, + "log_tracker_config": "", + "log_tracker_name": "", "logging_dir": "./test/ft", "lr_scheduler": "cosine_with_restarts", "lr_scheduler_args": "", @@ -40,17 +44,21 @@ "max_timestep": 1000, "max_token_length": "75", "max_train_epochs": "", + "max_train_steps": "", "mem_eff_attn": false, "min_bucket_reso": "256", "min_snr_gamma": 0, "min_timestep": 0, "mixed_precision": "bf16", "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, "multires_noise_discount": 0, "multires_noise_iterations": 0, "noise_offset": 0, "noise_offset_type": "Original", "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, "optimizer": "AdamW", "optimizer_args": "", "output_dir": "./test/output", @@ -86,6 +94,7 @@ "v_pred_like_loss": 0, "vae_batch_size": 0, "wandb_api_key": "", + "wandb_run_name": "", "weighted_captions": false, "xformers": "xformers" } \ No newline at end of file diff --git a/test/config/locon-AdamW8bit-toml.json b/test/config/locon-AdamW8bit-toml.json new file mode 100644 index 000000000..e3cbf033b --- /dev/null +++ b/test/config/locon-AdamW8bit-toml.json @@ -0,0 +1,130 @@ +{ + "LoRA_type": "Standard", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 1, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 1, + "network_dim": 8, + "network_dropout": 0, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "locon-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 4, + "train_data_dir": "", + "train_norm": false, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/meta-1_lat.json b/test/config/meta-1_lat.json new file mode 100644 index 000000000..ead4d1e14 --- /dev/null +++ b/test/config/meta-1_lat.json @@ -0,0 +1,58 @@ +{ + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki.jpg": { + "caption": "a painting of a steam punk skull with a gas mask , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_2.jpg": { + "caption": "a painting of a man with a skull on his head , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_3.jpg": { + "caption": "a painting of a woman with a helmet on her head , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_4.jpg": { + "caption": "a painting of a horned man with a goat head , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_5.jpg": { + "caption": "a painting of a man playing a piano , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_6.jpg": { + "caption": "a painting of a robot sitting on a rock , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_7.jpg": { + "caption": "a painting of a soldier with a helmet on , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_8.jpg": { + "caption": "a painting of a giant crab with a large body , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + } +} \ No newline at end of file