Skip to content

Commit 3e64386

Browse files
committed
Updates
1 parent 08eb3ad commit 3e64386

14 files changed

Lines changed: 181 additions & 31 deletions

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,6 @@ If you come across a `FileNotFoundError`, it is likely due to an installation is
502502
- Fix finetuning latent caching issue when doing SDXL models in fp16
503503
- Add SDXL merge lora support. You can now merge LoRAs into an SDXL checkpoint.
504504
- Add SDPA CrossAttention option to trainers.
505+
- Merge latest kohya_ss sd-scripts code
506+
- Fix Dreambooth support for SDXL training
507+
- Update to latest bitsandbytes release

dreambooth_gui.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def save_configuration(
7979
enable_bucket,
8080
gradient_checkpointing,
8181
full_fp16,
82+
full_bf16,
8283
no_token_padding,
8384
stop_text_encoder_training,
8485
min_bucket_reso,
@@ -192,6 +193,7 @@ def open_configuration(
192193
enable_bucket,
193194
gradient_checkpointing,
194195
full_fp16,
196+
full_bf16,
195197
no_token_padding,
196198
stop_text_encoder_training,
197199
min_bucket_reso,
@@ -304,6 +306,7 @@ def train_model(
304306
enable_bucket,
305307
gradient_checkpointing,
306308
full_fp16,
309+
full_bf16,
307310
no_token_padding,
308311
stop_text_encoder_training_pct,
309312
min_bucket_reso,
@@ -520,7 +523,13 @@ def train_model(
520523
lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100))
521524
log.info(f'lr_warmup_steps = {lr_warmup_steps}')
522525

523-
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db.py"'
526+
# run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db.py"'
527+
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process}'
528+
if sdxl:
529+
run_cmd += f' "./sdxl_train.py"'
530+
else:
531+
run_cmd += f' "./train_db.py"'
532+
524533
if v2:
525534
run_cmd += ' --v2'
526535
if v_parameterization:
@@ -551,6 +560,8 @@ def train_model(
551560
# run_cmd += f' --resume={resume}'
552561
if not float(prior_loss_weight) == 1.0:
553562
run_cmd += f' --prior_loss_weight={prior_loss_weight}'
563+
if full_bf16:
564+
run_cmd += ' --full_bf16'
554565
if not vae == '':
555566
run_cmd += f' --vae="{vae}"'
556567
if not output_name == '':
@@ -696,6 +707,9 @@ def dreambooth_tab(
696707
lr_scheduler_value='cosine',
697708
lr_warmup_value='10',
698709
)
710+
full_bf16 = gr.Checkbox(
711+
label='Full bf16', value = False
712+
)
699713
with gr.Accordion('Advanced Configuration', open=False):
700714
advanced_training = AdvancedTraining(headless=headless)
701715
advanced_training.color_aug.change(
@@ -765,6 +779,7 @@ def dreambooth_tab(
765779
basic_training.enable_bucket,
766780
advanced_training.gradient_checkpointing,
767781
advanced_training.full_fp16,
782+
full_bf16,
768783
advanced_training.no_token_padding,
769784
basic_training.stop_text_encoder_training,
770785
basic_training.min_bucket_reso,

library/class_advanced_training.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@ def noise_offset_type_change(noise_offset_type):
1919
self.no_token_padding = gr.Checkbox(
2020
label='No token padding', value=False
2121
)
22-
self.gradient_accumulation_steps = gr.Number(
23-
label='Gradient accumulate steps', value='1'
22+
self.gradient_accumulation_steps = gr.Slider(
23+
label='Gradient accumulate steps',
24+
info='Number of updates steps to accumulate before performing a backward/update pass',
25+
value='1',
26+
minimum=1, maximum=120,
27+
step=1
2428
)
2529
self.weighted_captions = gr.Checkbox(
2630
label='Weighted captions', value=False

library/class_dreambooth_gui.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def __init__(
3636
lr_scheduler_value='cosine',
3737
lr_warmup_value='10',
3838
)
39+
self.full_bf16 = gr.Checkbox(
40+
label='Full bf16', value = False
41+
)
3942
with gr.Accordion('Advanced Configuration', open=False):
4043
self.advanced_training = AdvancedTraining(headless=headless)
4144
self.advanced_training.color_aug.change(

library/tensorboard_gui.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,57 +3,58 @@
33
from easygui import msgbox
44
import subprocess
55
import time
6-
6+
import webbrowser
77
from library.custom_logging import setup_logging
88

99
# Set up logging
1010
log = setup_logging()
1111

12-
tensorboard_proc = None # I know... bad but heh
12+
tensorboard_proc = None
1313
TENSORBOARD = 'tensorboard' if os.name == 'posix' else 'tensorboard.exe'
1414

15-
16-
def start_tensorboard(logging_dir):
15+
def start_tensorboard(logging_dir, wait_time=5):
1716
global tensorboard_proc
1817

1918
if not os.listdir(logging_dir):
2019
log.info('Error: log folder is empty')
2120
msgbox(msg='Error: log folder is empty')
2221
return
2322

24-
run_cmd = [f'{TENSORBOARD}', '--logdir', f'{logging_dir}', '--host', '0.0.0.0', '--port', '6006']
23+
run_cmd = [TENSORBOARD, '--logdir', logging_dir, '--host', '0.0.0.0', '--port', '6006']
2524

2625
log.info(run_cmd)
2726
if tensorboard_proc is not None:
28-
log.info(
29-
'Tensorboard is already running. Terminating existing process before starting new one...'
30-
)
27+
log.info('Tensorboard is already running. Terminating existing process before starting new one...')
3128
stop_tensorboard()
3229

3330
# Start background process
3431
log.info('Starting tensorboard...')
35-
tensorboard_proc = subprocess.Popen(run_cmd)
32+
try:
33+
tensorboard_proc = subprocess.Popen(run_cmd)
34+
except Exception as e:
35+
log.error('Failed to start Tensorboard:', e)
36+
return
3637

3738
# Wait for some time to allow TensorBoard to start up
38-
time.sleep(5)
39+
time.sleep(wait_time)
3940

4041
# Open the TensorBoard URL in the default browser
4142
log.info('Opening tensorboard url in browser...')
42-
import webbrowser
43-
4443
webbrowser.open('http://localhost:6006')
4544

46-
4745
def stop_tensorboard():
48-
if not tensorboard_proc == None:
46+
global tensorboard_proc
47+
if tensorboard_proc is not None:
4948
log.info('Stopping tensorboard process...')
50-
tensorboard_proc.kill()
51-
tensorboard_proc = None
52-
log.info('...process stopped')
49+
try:
50+
tensorboard_proc.terminate()
51+
tensorboard_proc = None
52+
log.info('...process stopped')
53+
except Exception as e:
54+
log.error('Failed to stop Tensorboard:', e)
5355
else:
5456
log.info('Tensorboard is not running...')
5557

56-
5758
def gradio_tensorboard():
5859
with gr.Row():
5960
button_start_tensorboard = gr.Button('Start tensorboard')
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{
2+
"LoRA_type": "LyCORIS/LoHa",
3+
"adaptive_noise_scale": 0,
4+
"additional_parameters": "",
5+
"block_alphas": "",
6+
"block_dims": "",
7+
"block_lr_zero_threshold": "",
8+
"bucket_no_upscale": false,
9+
"bucket_reso_steps": 32,
10+
"cache_latents": true,
11+
"cache_latents_to_disk": true,
12+
"caption_dropout_every_n_epochs": 0.0,
13+
"caption_dropout_rate": 0.05,
14+
"caption_extension": ".txt",
15+
"clip_skip": "1",
16+
"color_aug": false,
17+
"conv_alpha": 16,
18+
"conv_block_alphas": "",
19+
"conv_block_dims": "",
20+
"conv_dim": 16,
21+
"decompose_both": false,
22+
"dim_from_weights": false,
23+
"down_lr_weight": "",
24+
"enable_bucket": true,
25+
"epoch": 1,
26+
"factor": -1,
27+
"flip_aug": false,
28+
"full_bf16": false,
29+
"full_fp16": false,
30+
"gradient_accumulation_steps": 1.0,
31+
"gradient_checkpointing": true,
32+
"keep_tokens": "0",
33+
"learning_rate": 0.001,
34+
"lora_network_weights": "",
35+
"lr_scheduler": "constant",
36+
"lr_scheduler_num_cycles": "1",
37+
"lr_scheduler_power": "",
38+
"lr_warmup": 0,
39+
"max_bucket_reso": 2048,
40+
"max_data_loader_n_workers": "0",
41+
"max_resolution": "1024,1024",
42+
"max_timestep": 1000,
43+
"max_token_length": "75",
44+
"max_train_epochs": "100",
45+
"mem_eff_attn": false,
46+
"mid_lr_weight": "",
47+
"min_bucket_reso": 256,
48+
"min_snr_gamma": 5,
49+
"min_timestep": 0,
50+
"mixed_precision": "fp16",
51+
"module_dropout": 0,
52+
"multires_noise_discount": 0,
53+
"multires_noise_iterations": 0,
54+
"network_alpha": 32,
55+
"network_dim": 32,
56+
"network_dropout": 0,
57+
"no_token_padding": false,
58+
"noise_offset": 0,
59+
"noise_offset_type": "Original",
60+
"num_cpu_threads_per_process": 2,
61+
"optimizer": "AdamW8bit",
62+
"optimizer_args": "",
63+
"persistent_data_loader_workers": false,
64+
"prior_loss_weight": 1.0,
65+
"random_crop": false,
66+
"rank_dropout": 0,
67+
"save_every_n_epochs": 100,
68+
"save_every_n_steps": 0,
69+
"save_last_n_steps": 0,
70+
"save_last_n_steps_state": 0,
71+
"save_precision": "fp16",
72+
"scale_v_pred_loss_like_noise_pred": false,
73+
"scale_weight_norms": 2.5,
74+
"sdxl": true,
75+
"sdxl_cache_text_encoder_outputs": false,
76+
"sdxl_no_half_vae": true,
77+
"seed": "",
78+
"shuffle_caption": false,
79+
"stop_text_encoder_training_pct": 0,
80+
"text_encoder_lr": 0.001,
81+
"train_batch_size": 8,
82+
"train_on_input": true,
83+
"training_comment": "",
84+
"unet_lr": 0.001,
85+
"unit": 1,
86+
"up_lr_weight": "",
87+
"use_cp": false,
88+
"use_wandb": false,
89+
"v2": false,
90+
"v_parameterization": false,
91+
"vae_batch_size": 0,
92+
"wandb_api_key": "",
93+
"weighted_captions": false,
94+
"xformers": true
95+
}

requirements_linux.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 # no_verify leave this to specify not checking this a verification stage
2-
xformers==0.0.20 bitsandbytes==0.41.0
2+
xformers==0.0.20 bitsandbytes==0.41.1
33
tensorboard==2.12.3 tensorflow==2.12.0
44
-r requirements.txt

requirements_linux_docker.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
xformers==0.0.20
2-
bitsandbytes==0.35.0
2+
bitsandbytes==0.41.1
33
accelerate==0.19.0
44
tensorboard==2.12.1
55
tensorflow==2.12.0

requirements_macos_amd64.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
2-
xformers bitsandbytes==0.35.0
2+
xformers bitsandbytes==0.41.1
33
tensorflow-macos tensorboard==2.12.1
44
-r requirements.txt

requirements_macos_arm64.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
2-
xformers bitsandbytes==0.35.0
2+
xformers bitsandbytes==0.41.1
33
tensorflow-metal tensorboard==2.12.1
44
-r requirements.txt

0 commit comments

Comments
 (0)