Updates

bmaltais · bmaltais · commit 3e6438684dd3 · 2023-08-05T13:35:42.000-04:00
diff --git a/README.md b/README.md
@@ -502,3 +502,6 @@ If you come across a `FileNotFoundError`, it is likely due to an installation is
   - Fix finetuning latent caching issue when doing SDXL models in fp16
   - Add SDXL merge lora support. You can now merge LoRAs into an SDXL checkpoint.
   - Add SDPA CrossAttention option to trainers.
+  - Merge latest kohya_ss sd-scripts code
+  - Fix Dreambooth support for SDXL training
+  - Update to latest bitsandbytes release
diff --git a/dreambooth_gui.py b/dreambooth_gui.py
@@ -79,6 +79,7 @@ def save_configuration(
     enable_bucket,
     gradient_checkpointing,
     full_fp16,
+    full_bf16,
     no_token_padding,
     stop_text_encoder_training,
     min_bucket_reso,
@@ -192,6 +193,7 @@ def open_configuration(
     enable_bucket,
     gradient_checkpointing,
     full_fp16,
+    full_bf16,
     no_token_padding,
     stop_text_encoder_training,
     min_bucket_reso,
@@ -304,6 +306,7 @@ def train_model(
     enable_bucket,
     gradient_checkpointing,
     full_fp16,
+    full_bf16,
     no_token_padding,
     stop_text_encoder_training_pct,
     min_bucket_reso,
@@ -520,7 +523,13 @@ def train_model(
     lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100))
     log.info(f'lr_warmup_steps = {lr_warmup_steps}')
 
-    run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db.py"'
+    # run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db.py"'
+    run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process}'
+    if sdxl:
+        run_cmd += f' "./sdxl_train.py"'
+    else:
+        run_cmd += f' "./train_db.py"'
+        
     if v2:
         run_cmd += ' --v2'
     if v_parameterization:
@@ -551,6 +560,8 @@ def train_model(
     #     run_cmd += f' --resume={resume}'
     if not float(prior_loss_weight) == 1.0:
         run_cmd += f' --prior_loss_weight={prior_loss_weight}'
+    if full_bf16:
+        run_cmd += ' --full_bf16'
     if not vae == '':
         run_cmd += f' --vae="{vae}"'
     if not output_name == '':
@@ -696,6 +707,9 @@ def dreambooth_tab(
                 lr_scheduler_value='cosine',
                 lr_warmup_value='10',
             )
+            full_bf16 = gr.Checkbox(
+                label='Full bf16', value = False
+            )
             with gr.Accordion('Advanced Configuration', open=False):
                 advanced_training = AdvancedTraining(headless=headless)
                 advanced_training.color_aug.change(
@@ -765,6 +779,7 @@ def dreambooth_tab(
             basic_training.enable_bucket,
             advanced_training.gradient_checkpointing,
             advanced_training.full_fp16,
+            full_bf16,
             advanced_training.no_token_padding,
             basic_training.stop_text_encoder_training,
             basic_training.min_bucket_reso,
diff --git a/library/class_advanced_training.py b/library/class_advanced_training.py
@@ -19,8 +19,12 @@ def noise_offset_type_change(noise_offset_type):
             self.no_token_padding = gr.Checkbox(
                 label='No token padding', value=False
             )
-            self.gradient_accumulation_steps = gr.Number(
-                label='Gradient accumulate steps', value='1'
+            self.gradient_accumulation_steps = gr.Slider(
+                label='Gradient accumulate steps', 
+                info='Number of updates steps to accumulate before performing a backward/update pass',
+                value='1',
+                minimum=1, maximum=120,
+                step=1
             )
             self.weighted_captions = gr.Checkbox(
                 label='Weighted captions', value=False
diff --git a/library/class_dreambooth_gui.py b/library/class_dreambooth_gui.py
@@ -36,6 +36,9 @@ def __init__(
                 lr_scheduler_value='cosine',
                 lr_warmup_value='10',
             )
+            self.full_bf16 = gr.Checkbox(
+                label='Full bf16', value = False
+            )
             with gr.Accordion('Advanced Configuration', open=False):
                 self.advanced_training = AdvancedTraining(headless=headless)
                 self.advanced_training.color_aug.change(
diff --git a/library/tensorboard_gui.py b/library/tensorboard_gui.py
@@ -3,57 +3,58 @@
 from easygui import msgbox
 import subprocess
 import time
-
+import webbrowser
 from library.custom_logging import setup_logging
 
 # Set up logging
 log = setup_logging()
 
-tensorboard_proc = None   # I know... bad but heh
+tensorboard_proc = None
 TENSORBOARD = 'tensorboard' if os.name == 'posix' else 'tensorboard.exe'
 
-
-def start_tensorboard(logging_dir):
+def start_tensorboard(logging_dir, wait_time=5):
     global tensorboard_proc
 
     if not os.listdir(logging_dir):
         log.info('Error: log folder is empty')
         msgbox(msg='Error: log folder is empty')
         return
 
-    run_cmd = [f'{TENSORBOARD}', '--logdir', f'{logging_dir}', '--host', '0.0.0.0', '--port', '6006']
+    run_cmd = [TENSORBOARD, '--logdir', logging_dir, '--host', '0.0.0.0', '--port', '6006']
 
     log.info(run_cmd)
     if tensorboard_proc is not None:
-        log.info(
-            'Tensorboard is already running. Terminating existing process before starting new one...'
-        )
+        log.info('Tensorboard is already running. Terminating existing process before starting new one...')
         stop_tensorboard()
 
     # Start background process
     log.info('Starting tensorboard...')
-    tensorboard_proc = subprocess.Popen(run_cmd)
+    try:
+        tensorboard_proc = subprocess.Popen(run_cmd)
+    except Exception as e:
+        log.error('Failed to start Tensorboard:', e)
+        return
 
     # Wait for some time to allow TensorBoard to start up
-    time.sleep(5)
+    time.sleep(wait_time)
 
     # Open the TensorBoard URL in the default browser
     log.info('Opening tensorboard url in browser...')
-    import webbrowser
-
     webbrowser.open('http://localhost:6006')
 
-
 def stop_tensorboard():
-    if not tensorboard_proc == None:
+    global tensorboard_proc
+    if tensorboard_proc is not None:
         log.info('Stopping tensorboard process...')
-        tensorboard_proc.kill()
-        tensorboard_proc = None
-        log.info('...process stopped')
+        try:
+            tensorboard_proc.terminate()
+            tensorboard_proc = None
+            log.info('...process stopped')
+        except Exception as e:
+            log.error('Failed to stop Tensorboard:', e)
     else:
         log.info('Tensorboard is not running...')
 
-
 def gradio_tensorboard():
     with gr.Row():
         button_start_tensorboard = gr.Button('Start tensorboard')
diff --git a/presets/lora/SDXL-LoHa-training-config-by-AI_Characters.json b/presets/lora/SDXL-LoHa-training-config-by-AI_Characters.json
@@ -0,0 +1,95 @@
+{
+    "LoRA_type": "LyCORIS/LoHa",
+    "adaptive_noise_scale": 0,
+    "additional_parameters": "",
+    "block_alphas": "",
+    "block_dims": "",
+    "block_lr_zero_threshold": "",
+    "bucket_no_upscale": false,
+    "bucket_reso_steps": 32,
+    "cache_latents": true,
+    "cache_latents_to_disk": true,
+    "caption_dropout_every_n_epochs": 0.0,
+    "caption_dropout_rate": 0.05,
+    "caption_extension": ".txt",
+    "clip_skip": "1",
+    "color_aug": false,
+    "conv_alpha": 16,
+    "conv_block_alphas": "",
+    "conv_block_dims": "",
+    "conv_dim": 16,
+    "decompose_both": false,
+    "dim_from_weights": false,
+    "down_lr_weight": "",
+    "enable_bucket": true,
+    "epoch": 1,
+    "factor": -1,
+    "flip_aug": false,
+    "full_bf16": false,
+    "full_fp16": false,
+    "gradient_accumulation_steps": 1.0,
+    "gradient_checkpointing": true,
+    "keep_tokens": "0",
+    "learning_rate": 0.001,
+    "lora_network_weights": "",
+    "lr_scheduler": "constant",
+    "lr_scheduler_num_cycles": "1",
+    "lr_scheduler_power": "",
+    "lr_warmup": 0,
+    "max_bucket_reso": 2048,
+    "max_data_loader_n_workers": "0",
+    "max_resolution": "1024,1024",
+    "max_timestep": 1000,
+    "max_token_length": "75",
+    "max_train_epochs": "100",
+    "mem_eff_attn": false,
+    "mid_lr_weight": "",
+    "min_bucket_reso": 256,
+    "min_snr_gamma": 5,
+    "min_timestep": 0,
+    "mixed_precision": "fp16",
+    "module_dropout": 0,
+    "multires_noise_discount": 0,
+    "multires_noise_iterations": 0,
+    "network_alpha": 32,
+    "network_dim": 32,
+    "network_dropout": 0,
+    "no_token_padding": false,
+    "noise_offset": 0,
+    "noise_offset_type": "Original",
+    "num_cpu_threads_per_process": 2,
+    "optimizer": "AdamW8bit",
+    "optimizer_args": "",
+    "persistent_data_loader_workers": false,
+    "prior_loss_weight": 1.0,
+    "random_crop": false,
+    "rank_dropout": 0,
+    "save_every_n_epochs": 100,
+    "save_every_n_steps": 0,
+    "save_last_n_steps": 0,
+    "save_last_n_steps_state": 0,
+    "save_precision": "fp16",
+    "scale_v_pred_loss_like_noise_pred": false,
+    "scale_weight_norms": 2.5,
+    "sdxl": true,
+    "sdxl_cache_text_encoder_outputs": false,
+    "sdxl_no_half_vae": true,
+    "seed": "",
+    "shuffle_caption": false,
+    "stop_text_encoder_training_pct": 0,
+    "text_encoder_lr": 0.001,
+    "train_batch_size": 8,
+    "train_on_input": true,
+    "training_comment": "",
+    "unet_lr": 0.001,
+    "unit": 1,
+    "up_lr_weight": "",
+    "use_cp": false,
+    "use_wandb": false,
+    "v2": false,
+    "v_parameterization": false,
+    "vae_batch_size": 0,
+    "wandb_api_key": "",
+    "weighted_captions": false,
+    "xformers": true
+}
diff --git a/requirements_linux.txt b/requirements_linux.txt
@@ -1,4 +1,4 @@
 torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 # no_verify leave this to specify not checking this a verification stage
-xformers==0.0.20 bitsandbytes==0.41.0
+xformers==0.0.20 bitsandbytes==0.41.1
 tensorboard==2.12.3 tensorflow==2.12.0
 -r requirements.txt
diff --git a/requirements_linux_docker.txt b/requirements_linux_docker.txt
@@ -1,5 +1,5 @@
 xformers==0.0.20
-bitsandbytes==0.35.0
+bitsandbytes==0.41.1
 accelerate==0.19.0
 tensorboard==2.12.1
 tensorflow==2.12.0
diff --git a/requirements_macos_amd64.txt b/requirements_macos_amd64.txt
@@ -1,4 +1,4 @@
 torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
-xformers bitsandbytes==0.35.0
+xformers bitsandbytes==0.41.1
 tensorflow-macos tensorboard==2.12.1
 -r requirements.txt
diff --git a/requirements_macos_arm64.txt b/requirements_macos_arm64.txt
@@ -1,4 +1,4 @@
 torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
-xformers bitsandbytes==0.35.0
+xformers bitsandbytes==0.41.1
 tensorflow-metal tensorboard==2.12.1
 -r requirements.txt
diff --git a/requirements_runpod.txt b/requirements_runpod.txt
@@ -1,5 +1,5 @@
 torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 # no_verify leave this to specify not checking this a verification stage
-xformers==0.0.20 bitsandbytes==0.41.0
+xformers==0.0.20 bitsandbytes==0.41.1
 tensorboard==2.12.3 tensorflow==2.12.0 wheel
 tensorrt
 -r requirements.txt
diff --git a/requirements_windows_torch1.txt b/requirements_windows_torch1.txt
@@ -1,5 +1,5 @@
 torch==1.12.1+cu116 torchvision==0.13.1+cu116 --index-url https://download.pytorch.org/whl/cu116 # no_verify
 https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl -U -I --no-deps # no_verify
-bitsandbytes==0.35.0
+bitsandbytes==0.35.0 # no_verify
 tensorboard==2.10.1 tensorflow==2.10.1
 -r requirements.txt
diff --git a/requirements_windows_torch2.txt b/requirements_windows_torch2.txt
@@ -1,6 +1,6 @@
 torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 # no_verify
 xformers==0.0.20
-# bitsandbytes==0.35.0
-https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl # no_verify
+bitsandbytes==0.35.0 # no_verify
+# https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl # no_verify
 tensorboard==2.12.3 tensorflow==2.12.0
 -r requirements.txt
diff --git a/setup/setup_windows.py b/setup/setup_windows.py
@@ -149,13 +149,22 @@ def install_kohya_ss_torch2():
     # run_cmd(f'accelerate config')
 
 
+def install_bitsandbytes_0_35_0():
+    log.info('Installing bitsandbytes 0.35.0...')
+    setup_common.install('--upgrade bitsandbytes==0.35.0', 'bitsandbytes 0.35.0', reinstall=True)
+    sync_bits_and_bytes_files()
+    
+def install_bitsandbytes_0_41_1():
+    log.info('Installing bitsandbytes 0.41.1...')
+    setup_common.install('--upgrade https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl', 'bitsandbytes 0.41.1', reinstall=True)
+
 def main_menu():
     setup_common.clear_screen()
     while True:
         print('\nKohya_ss GUI setup menu:\n')
         print('1. Install kohya_ss gui')
         print('2. (Optional) Install cudann files (avoid unless you really need it)')
-        print('3. (Danger) Install bitsandbytes-windows (this package has been reported to cause issues for most... avoid...)')
+        print('3. (Optional) Install specific bitsandbytes versions')
         print('4. (Optional) Manually configure accelerate')
         print('5. (Optional) Start Kohya_ss GUI in browser')
         print('6. Quit')
@@ -184,7 +193,27 @@ def main_menu():
         elif choice == '2':
             cudann_install()
         elif choice == '3':
-            setup_common.install('--upgrade bitsandbytes-windows', reinstall=True)
+            while True:
+                print('1. (Optional) Force installation of bitsandbytes 0.35.0')
+                print('2. (Optional) Force installation of bitsandbytes 0.41.1 for new optimizer options support')
+                print('3. (Danger) Install bitsandbytes-windows (this package has been reported to cause issues for most... avoid...)')
+                print('4. Cancel')
+                choice_torch = input('\nEnter your choice: ')
+                print('')
+
+                if choice_torch == '1':
+                    install_bitsandbytes_0_35_0()
+                    break
+                elif choice_torch == '2':
+                    install_bitsandbytes_0_41_1()
+                    break
+                elif choice_torch == '4':
+                    setup_common.install('--upgrade bitsandbytes-windows', reinstall=True)
+                    break
+                elif choice_torch == '4':
+                    break
+                else:
+                    print('Invalid choice. Please enter a number between 1-3.')
         elif choice == '4':
             setup_common.run_cmd('accelerate config')
         elif choice == '5':