Skip to content

Commit

Permalink
Merge pull request kohya-ss#79 from mgz-dev/tensorboard-improvements
Browse files Browse the repository at this point in the history
expand details in tensorboard logs
  • Loading branch information
kohya-ss authored Jan 21, 2023
2 parents 7dbcef7 e5d9f48 commit 7ee808d
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
12 changes: 12 additions & 0 deletions library/train_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1408,5 1408,17 @@ def save_state_on_train_end(args: argparse.Namespace, accelerator):
model_name = DEFAULT_LAST_OUTPUT_NAME if args.output_name is None else args.output_name
accelerator.save_state(os.path.join(args.output_dir, LAST_STATE_NAME.format(model_name)))

def generate_step_logs(args: argparse.Namespace, current_loss, avr_loss, lr_scheduler):
logs = {"loss/current": current_loss, "loss/average": avr_loss}

if args.network_train_unet_only:
logs["lr/unet"] = lr_scheduler.get_last_lr()[0]
elif args.network_train_text_encoder_only:
logs["lr/textencoder"] = lr_scheduler.get_last_lr()[0]
else:
logs["lr/textencoder"] = lr_scheduler.get_last_lr()[0]
logs["lr/unet"] = lr_scheduler.get_last_lr()[-1]

return logs

# endregion
11 changes: 6 additions & 5 deletions train_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,20 346,21 @@ def train(args):
global_step = 1

current_loss = loss.detach().item()
if args.logging_dir is not None:
logs = {"loss": current_loss, "lr": lr_scheduler.get_last_lr()[0]}
accelerator.log(logs, step=global_step)

loss_total = current_loss
avr_loss = loss_total / (step 1)
logs = {"loss": avr_loss} # , "lr": lr_scheduler.get_last_lr()[0]}
progress_bar.set_postfix(**logs)

if args.logging_dir is not None:
logs = train_util.generate_step_logs(args, current_loss, avr_loss, lr_scheduler)

accelerator.log(logs, step=global_step)

if global_step >= args.max_train_steps:
break

if args.logging_dir is not None:
logs = {"epoch_loss": loss_total / len(train_dataloader)}
logs = {"loss/epoch": loss_total / len(train_dataloader)}
accelerator.log(logs, step=epoch 1)

accelerator.wait_for_everyone()
Expand Down

0 comments on commit 7ee808d

Please sign in to comment.