sd-scripts-HunYuanDiT/networks/lora.py at main · PNuwa/sd-scripts-HunYuanDiT

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

863

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

# LoRA network module

# reference:

# https://github.com/microsoft/LoRA/blob/main/loralib/layers.py

# https://github.com/cloneofsimo/lora/blob/master/lora_diffusion/lora.py

import math

import os

from typing import Dict, List, Optional, Tuple, Type, Union

from diffusers import AutoencoderKL

from transformers import CLIPTextModel

import numpy as np

import torch

import re

from library.utils import setup_logging

setup_logging()

import logging

logger = logging.getLogger(__name__)

RE_UPDOWN = re.compile(r"(up|down)_blocks_(\d+)_(resnets|upsamplers|downsamplers|attentions)_(\d+)_")

class LoRAModule(torch.nn.Module):

"""

replaces forward method of the original Linear, instead of replacing the original Linear module.

"""

def __init__(

self,

lora_name,

org_module: torch.nn.Module,

multiplier=1.0,

lora_dim=4,

alpha=1,

dropout=None,

rank_dropout=None,

module_dropout=None,

):

"""if alpha == 0 or None, alpha is rank (no scaling)."""

super().__init__()

self.lora_name = lora_name

if org_module.__class__.__name__ == "Conv2d":

in_dim = org_module.in_channels

out_dim = org_module.out_channels

else:

in_dim = org_module.in_features

out_dim = org_module.out_features

# if limit_rank:

# self.lora_dim = min(lora_dim, in_dim, out_dim)

# if self.lora_dim != lora_dim:

# logger.info(f"{lora_name} dim (rank) is changed to: {self.lora_dim}")

# else:

self.lora_dim = lora_dim

if org_module.__class__.__name__ == "Conv2d":

kernel_size = org_module.kernel_size

stride = org_module.stride

padding = org_module.padding

self.lora_down = torch.nn.Conv2d(in_dim, self.lora_dim, kernel_size, stride, padding, bias=False)

self.lora_up = torch.nn.Conv2d(self.lora_dim, out_dim, (1, 1), (1, 1), bias=False)

else:

self.lora_down = torch.nn.Linear(in_dim, self.lora_dim, bias=False)

self.lora_up = torch.nn.Linear(self.lora_dim, out_dim, bias=False)

if type(alpha) == torch.Tensor:

alpha = alpha.detach().float().numpy() # without casting, bf16 causes error

alpha = self.lora_dim if alpha is None or alpha == 0 else alpha

self.scale = alpha / self.lora_dim

self.register_buffer("alpha", torch.tensor(alpha)) # 定数として扱える

# same as microsoft's

torch.nn.init.kaiming_uniform_(self.lora_down.weight, a=math.sqrt(5))

torch.nn.init.zeros_(self.lora_up.weight)

self.multiplier = multiplier

self.org_module = org_module # remove in applying

self.dropout = dropout

self.rank_dropout = rank_dropout

self.module_dropout = module_dropout

def apply_to(self):

self.org_forward = self.org_module.forward

self.org_module.forward = self.forward

del self.org_module

def forward(self, x):

org_forwarded = self.org_forward(x)

# module dropout

if self.module_dropout is not None and self.training:

if torch.rand(1) < self.module_dropout:

return org_forwarded

lx = self.lora_down(x)

# normal dropout

if self.dropout is not None and self.training:

lx = torch.nn.functional.dropout(lx, p=self.dropout)

# rank dropout

if self.rank_dropout is not None and self.training:

mask = torch.rand((lx.size(0), self.lora_dim), device=lx.device) > self.rank_dropout

if len(lx.size()) == 3:

mask = mask.unsqueeze(1) # for Text Encoder

elif len(lx.size()) == 4:

mask = mask.unsqueeze(-1).unsqueeze(-1) # for Conv2d

lx = lx * mask

# scaling for rank dropout: treat as if the rank is changed

# maskから計算することも考えられるが、augmentation的な効果を期待してrank_dropoutを用いる

scale = self.scale * (1.0 / (1.0 - self.rank_dropout)) # redundant for readability

else:

scale = self.scale

lx = self.lora_up(lx)

return org_forwarded + lx * self.multiplier * scale

class LoRAInfModule(LoRAModule):

def __init__(

self,

lora_name,

org_module: torch.nn.Module,

multiplier=1.0,

lora_dim=4,

alpha=1,

**kwargs,

):

# no dropout for inference

super().__init__(lora_name, org_module, multiplier, lora_dim, alpha)

self.org_module_ref = [org_module] # 後から参照できるように

self.enabled = True

# check regional or not by lora_name

self.text_encoder = False

if lora_name.startswith("lora_te_"):

self.regional = False

self.use_sub_prompt = True

self.text_encoder = True

elif "attn2_to_k" in lora_name or "attn2_to_v" in lora_name:

self.regional = False

self.use_sub_prompt = True

elif "time_emb" in lora_name:

self.regional = False

self.use_sub_prompt = False

else:

self.regional = True

self.use_sub_prompt = False

self.network: LoRANetwork = None

def set_network(self, network):

self.network = network

# freezeしてマージする

def merge_to(self, sd, dtype, device):

# get up/down weight

up_weight = sd["lora_up.weight"].to(torch.float).to(device)

down_weight = sd["lora_down.weight"].to(torch.float).to(device)

# extract weight from org_module

org_sd = self.org_module.state_dict()

weight = org_sd["weight"].to(torch.float)

# merge weight

if len(weight.size()) == 2:

# linear

weight = weight + self.multiplier * (up_weight @ down_weight) * self.scale

elif down_weight.size()[2:4] == (1, 1):

# conv2d 1x1

weight = (

weight

+ self.multiplier

* (up_weight.squeeze(3).squeeze(2) @ down_weight.squeeze(3).squeeze(2)).unsqueeze(2).unsqueeze(3)

* self.scale

)

else:

# conv2d 3x3

conved = torch.nn.functional.conv2d(down_weight.permute(1, 0, 2, 3), up_weight).permute(1, 0, 2, 3)

# logger.info(conved.size(), weight.size(), module.stride, module.padding)

weight = weight + self.multiplier * conved * self.scale

# set weight to org_module

org_sd["weight"] = weight.to(dtype)

self.org_module.load_state_dict(org_sd)

# 復元できるマージのため、このモジュールのweightを返す

def get_weight(self, multiplier=None):

if multiplier is None:

multiplier = self.multiplier

# get up/down weight from module

up_weight = self.lora_up.weight.to(torch.float)

down_weight = self.lora_down.weight.to(torch.float)

# pre-calculated weight

if len(down_weight.size()) == 2:

# linear

weight = self.multiplier * (up_weight @ down_weight) * self.scale

elif down_weight.size()[2:4] == (1, 1):

# conv2d 1x1

weight = (

self.multiplier

* (up_weight.squeeze(3).squeeze(2) @ down_weight.squeeze(3).squeeze(2)).unsqueeze(2).unsqueeze(3)

* self.scale

)

else:

# conv2d 3x3

conved = torch.nn.functional.conv2d(down_weight.permute(1, 0, 2, 3), up_weight).permute(1, 0, 2, 3)

weight = self.multiplier * conved * self.scale

return weight

def set_region(self, region):

self.region = region

self.region_mask = None

def default_forward(self, x):

# logger.info(f"default_forward {self.lora_name} {x.size()}")

return self.org_forward(x) + self.lora_up(self.lora_down(x)) * self.multiplier * self.scale

def forward(self, x):

if not self.enabled:

return self.org_forward(x)

if self.network is None or self.network.sub_prompt_index is None:

return self.default_forward(x)

if not self.regional and not self.use_sub_prompt:

return self.default_forward(x)

if self.regional:

return self.regional_forward(x)

else:

return self.sub_prompt_forward(x)

def get_mask_for_x(self, x):

# calculate size from shape of x

if len(x.size()) == 4:

h, w = x.size()[2:4]

area = h * w

else:

area = x.size()[1]

mask = self.network.mask_dic.get(area, None)

if mask is None or len(x.size()) == 2:

# emb_layers in SDXL doesn't have mask

# if "emb" not in self.lora_name:

# print(f"mask is None for resolution {self.lora_name}, {area}, {x.size()}")

mask_size = (1, x.size()[1]) if len(x.size()) == 2 else (1, *x.size()[1:-1], 1)

return torch.ones(mask_size, dtype=x.dtype, device=x.device) / self.network.num_sub_prompts

if len(x.size()) == 3:

mask = torch.reshape(mask, (1, -1, 1))

return mask

def regional_forward(self, x):

if "attn2_to_out" in self.lora_name:

return self.to_out_forward(x)

if self.network.mask_dic is None: # sub_prompt_index >= 3

return self.default_forward(x)

# apply mask for LoRA result

lx = self.lora_up(self.lora_down(x)) * self.multiplier * self.scale

mask = self.get_mask_for_x(lx)

# print("regional", self.lora_name, self.network.sub_prompt_index, lx.size(), mask.size())

# if mask.ndim > lx.ndim: # in some resolution, lx is 2d and mask is 3d (the reason is not checked)

# mask = mask.squeeze(-1)

lx = lx * mask

x = self.org_forward(x)

x = x + lx

if "attn2_to_q" in self.lora_name and self.network.is_last_network:

x = self.postp_to_q(x)

return x

def postp_to_q(self, x):

# repeat x to num_sub_prompts

has_real_uncond = x.size()[0] // self.network.batch_size == 3

qc = self.network.batch_size # uncond

qc += self.network.batch_size * self.network.num_sub_prompts # cond

if has_real_uncond:

qc += self.network.batch_size # real_uncond

query = torch.zeros((qc, x.size()[1], x.size()[2]), device=x.device, dtype=x.dtype)

query[: self.network.batch_size] = x[: self.network.batch_size]

for i in range(self.network.batch_size):

qi = self.network.batch_size + i * self.network.num_sub_prompts

query[qi : qi + self.network.num_sub_prompts] = x[self.network.batch_size + i]

if has_real_uncond:

query[-self.network.batch_size :] = x[-self.network.batch_size :]

# logger.info(f"postp_to_q {self.lora_name} {x.size()} {query.size()} {self.network.num_sub_prompts}")

return query

def sub_prompt_forward(self, x):

if x.size()[0] == self.network.batch_size: # if uncond in text_encoder, do not apply LoRA

return self.org_forward(x)

emb_idx = self.network.sub_prompt_index

if not self.text_encoder:

emb_idx += self.network.batch_size

# apply sub prompt of X

lx = x[emb_idx :: self.network.num_sub_prompts]

lx = self.lora_up(self.lora_down(lx)) * self.multiplier * self.scale

# logger.info(f"sub_prompt_forward {self.lora_name} {x.size()} {lx.size()} {emb_idx}")

x = self.org_forward(x)

x[emb_idx :: self.network.num_sub_prompts] += lx

return x

def to_out_forward(self, x):

# logger.info(f"to_out_forward {self.lora_name} {x.size()} {self.network.is_last_network}")

if self.network.is_last_network:

masks = [None] * self.network.num_sub_prompts

self.network.shared[self.lora_name] = (None, masks)

else:

lx, masks = self.network.shared[self.lora_name]

# call own LoRA

x1 = x[self.network.batch_size + self.network.sub_prompt_index :: self.network.num_sub_prompts]

lx1 = self.lora_up(self.lora_down(x1)) * self.multiplier * self.scale

if self.network.is_last_network:

lx = torch.zeros(

(self.network.num_sub_prompts * self.network.batch_size, *lx1.size()[1:]), device=lx1.device, dtype=lx1.dtype

)

self.network.shared[self.lora_name] = (lx, masks)

# logger.info(f"to_out_forward {lx.size()} {lx1.size()} {self.network.sub_prompt_index} {self.network.num_sub_prompts}")

lx[self.network.sub_prompt_index :: self.network.num_sub_prompts] += lx1

masks[self.network.sub_prompt_index] = self.get_mask_for_x(lx1)

# if not last network, return x and masks

x = self.org_forward(x)

if not self.network.is_last_network:

return x

lx, masks = self.network.shared.pop(self.lora_name)

# if last network, combine separated x with mask weighted sum

has_real_uncond = x.size()[0] // self.network.batch_size == self.network.num_sub_prompts + 2

out = torch.zeros((self.network.batch_size * (3 if has_real_uncond else 2), *x.size()[1:]), device=x.device, dtype=x.dtype)

out[: self.network.batch_size] = x[: self.network.batch_size] # uncond

if has_real_uncond:

out[-self.network.batch_size :] = x[-self.network.batch_size :] # real_uncond

# logger.info(f"to_out_forward {self.lora_name} {self.network.sub_prompt_index} {self.network.num_sub_prompts}")

# if num_sub_prompts > num of LoRAs, fill with zero

for i in range(len(masks)):

if masks[i] is None:

masks[i] = torch.zeros_like(masks[0])

mask = torch.cat(masks)

mask_sum = torch.sum(mask, dim=0) + 1e-4

for i in range(self.network.batch_size):

# 1枚の画像ごとに処理する

lx1 = lx[i * self.network.num_sub_prompts : (i + 1) * self.network.num_sub_prompts]

lx1 = lx1 * mask

lx1 = torch.sum(lx1, dim=0)

xi = self.network.batch_size + i * self.network.num_sub_prompts

x1 = x[xi : xi + self.network.num_sub_prompts]

x1 = x1 * mask

x1 = torch.sum(x1, dim=0)

x1 = x1 / mask_sum

x1 = x1 + lx1

out[self.network.batch_size + i] = x1

# logger.info(f"to_out_forward {x.size()} {out.size()} {has_real_uncond}")

return out

def parse_block_lr_kwargs(nw_kwargs):

down_lr_weight = nw_kwargs.get("down_lr_weight", None)

mid_lr_weight = nw_kwargs.get("mid_lr_weight", None)

up_lr_weight = nw_kwargs.get("up_lr_weight", None)

# 以上のいずれにも設定がない場合は無効としてNoneを返す

if down_lr_weight is None and mid_lr_weight is None and up_lr_weight is None:

return None, None, None

# extract learning rate weight for each block

if down_lr_weight is not None:

# if some parameters are not set, use zero

if "," in down_lr_weight:

down_lr_weight = [(float(s) if s else 0.0) for s in down_lr_weight.split(",")]

if mid_lr_weight is not None:

mid_lr_weight = float(mid_lr_weight)

if up_lr_weight is not None:

if "," in up_lr_weight:

up_lr_weight = [(float(s) if s else 0.0) for s in up_lr_weight.split(",")]

down_lr_weight, mid_lr_weight, up_lr_weight = get_block_lr_weight(

down_lr_weight, mid_lr_weight, up_lr_weight, float(nw_kwargs.get("block_lr_zero_threshold", 0.0))

)

return down_lr_weight, mid_lr_weight, up_lr_weight

def create_network(

multiplier: float,

network_dim: Optional[int],

network_alpha: Optional[float],

vae: AutoencoderKL,

text_encoder: Union[CLIPTextModel, List[CLIPTextModel]],

unet,

neuron_dropout: Optional[float] = None,

**kwargs,

):

if network_dim is None:

network_dim = 4 # default

if network_alpha is None:

network_alpha = 1.0

# extract dim/alpha for conv2d, and block dim

conv_dim = kwargs.get("conv_dim", None)

conv_alpha = kwargs.get("conv_alpha", None)

if conv_dim is not None:

conv_dim = int(conv_dim)

if conv_alpha is None:

conv_alpha = 1.0

else:

conv_alpha = float(conv_alpha)

# block dim/alpha/lr

block_dims = kwargs.get("block_dims", None)

down_lr_weight, mid_lr_weight, up_lr_weight = parse_block_lr_kwargs(kwargs)

# 以上のいずれかに指定があればblockごとのdim(rank)を有効にする

if block_dims is not None or down_lr_weight is not None or mid_lr_weight is not None or up_lr_weight is not None:

block_alphas = kwargs.get("block_alphas", None)

conv_block_dims = kwargs.get("conv_block_dims", None)

conv_block_alphas = kwargs.get("conv_block_alphas", None)

block_dims, block_alphas, conv_block_dims, conv_block_alphas = get_block_dims_and_alphas(

block_dims, block_alphas, network_dim, network_alpha, conv_block_dims, conv_block_alphas, conv_dim, conv_alpha

)

# remove block dim/alpha without learning rate

block_dims, block_alphas, conv_block_dims, conv_block_alphas = remove_block_dims_and_alphas(

block_dims, block_alphas, conv_block_dims, conv_block_alphas, down_lr_weight, mid_lr_weight, up_lr_weight

)

else:

block_alphas = None

conv_block_dims = None

conv_block_alphas = None

# rank/module dropout

rank_dropout = kwargs.get("rank_dropout", None)

if rank_dropout is not None:

rank_dropout = float(rank_dropout)

module_dropout = kwargs.get("module_dropout", None)

if module_dropout is not None:

module_dropout = float(module_dropout)

# すごく引数が多いな ( ^ω^)･･･

network = LoRANetwork(

text_encoder,

unet,

multiplier=multiplier,

lora_dim=network_dim,

alpha=network_alpha,

dropout=neuron_dropout,

rank_dropout=rank_dropout,

module_dropout=module_dropout,

conv_lora_dim=conv_dim,

conv_alpha=conv_alpha,

block_dims=block_dims,

block_alphas=block_alphas,

conv_block_dims=conv_block_dims,

conv_block_alphas=conv_block_alphas,

varbose=True,

)

if up_lr_weight is not None or mid_lr_weight is not None or down_lr_weight is not None:

network.set_block_lr_weight(up_lr_weight, mid_lr_weight, down_lr_weight)

return network

# このメソッドは外部から呼び出される可能性を考慮しておく

# network_dim, network_alpha にはデフォルト値が入っている。

# block_dims, block_alphas は両方ともNoneまたは両方とも値が入っている

# conv_dim, conv_alpha は両方ともNoneまたは両方とも値が入っている

def get_block_dims_and_alphas(

block_dims, block_alphas, network_dim, network_alpha, conv_block_dims, conv_block_alphas, conv_dim, conv_alpha

):

num_total_blocks = LoRANetwork.NUM_OF_BLOCKS * 2 + 1

def parse_ints(s):

return [int(i) for i in s.split(",")]

def parse_floats(s):

return [float(i) for i in s.split(",")]

# block_dimsとblock_alphasをパースする。必ず値が入る

if block_dims is not None:

block_dims = parse_ints(block_dims)

assert (

len(block_dims) == num_total_blocks

), f"block_dims must have {num_total_blocks} elements / block_dimsは{num_total_blocks}個指定してください"

else:

logger.warning(

f"block_dims is not specified. all dims are set to {network_dim} / block_dimsが指定されていません。すべてのdimは{network_dim}になります"

)

block_dims = [network_dim] * num_total_blocks

if block_alphas is not None:

block_alphas = parse_floats(block_alphas)

assert (

len(block_alphas) == num_total_blocks

), f"block_alphas must have {num_total_blocks} elements / block_alphasは{num_total_blocks}個指定してください"

else:

logger.warning(

f"block_alphas is not specified. all alphas are set to {network_alpha} / block_alphasが指定されていません。すべてのalphaは{network_alpha}になります"

)

block_alphas = [network_alpha] * num_total_blocks

# conv_block_dimsとconv_block_alphasを、指定がある場合のみパースする。指定がなければconv_dimとconv_alphaを使う

if conv_block_dims is not None:

conv_block_dims = parse_ints(conv_block_dims)

assert (

len(conv_block_dims) == num_total_blocks

), f"conv_block_dims must have {num_total_blocks} elements / conv_block_dimsは{num_total_blocks}個指定してください"

if conv_block_alphas is not None:

conv_block_alphas = parse_floats(conv_block_alphas)

assert (

len(conv_block_alphas) == num_total_blocks

), f"conv_block_alphas must have {num_total_blocks} elements / conv_block_alphasは{num_total_blocks}個指定してください"

else:

if conv_alpha is None:

conv_alpha = 1.0

logger.warning(

f"conv_block_alphas is not specified. all alphas are set to {conv_alpha} / conv_block_alphasが指定されていません。すべてのalphaは{conv_alpha}になります"

)

conv_block_alphas = [conv_alpha] * num_total_blocks

else:

if conv_dim is not None:

logger.warning(

f"conv_dim/alpha for all blocks are set to {conv_dim} and {conv_alpha} / すべてのブロックのconv_dimとalphaは{conv_dim}および{conv_alpha}になります"

)

conv_block_dims = [conv_dim] * num_total_blocks

conv_block_alphas = [conv_alpha] * num_total_blocks

else:

conv_block_dims = None

conv_block_alphas = None

return block_dims, block_alphas, conv_block_dims, conv_block_alphas

# 層別学習率用に層ごとの学習率に対する倍率を定義する、外部から呼び出される可能性を考慮しておく

def get_block_lr_weight(

down_lr_weight, mid_lr_weight, up_lr_weight, zero_threshold

) -> Tuple[List[float], List[float], List[float]]:

# パラメータ未指定時は何もせず、今までと同じ動作とする

if up_lr_weight is None and mid_lr_weight is None and down_lr_weight is None:

return None, None, None

max_len = LoRANetwork.NUM_OF_BLOCKS # フルモデル相当でのup,downの層の数

def get_list(name_with_suffix) -> List[float]:

import math

tokens = name_with_suffix.split("+")

name = tokens[0]

base_lr = float(tokens[1]) if len(tokens) > 1 else 0.0

if name == "cosine":

return [math.sin(math.pi * (i / (max_len - 1)) / 2) + base_lr for i in reversed(range(max_len))]

elif name == "sine":

return [math.sin(math.pi * (i / (max_len - 1)) / 2) + base_lr for i in range(max_len)]

elif name == "linear":

return [i / (max_len - 1) + base_lr for i in range(max_len)]

elif name == "reverse_linear":

return [i / (max_len - 1) + base_lr for i in reversed(range(max_len))]

elif name == "zeros":

return [0.0 + base_lr] * max_len

else:

logger.error(

"Unknown lr_weight argument %s is used. Valid arguments: / 不明なlr_weightの引数 %s が使われました。有効な引数:\n\tcosine, sine, linear, reverse_linear, zeros"

% (name)

)

return None

if type(down_lr_weight) == str:

down_lr_weight = get_list(down_lr_weight)

if type(up_lr_weight) == str:

up_lr_weight = get_list(up_lr_weight)

if (up_lr_weight != None and len(up_lr_weight) > max_len) or (down_lr_weight != None and len(down_lr_weight) > max_len):

logger.warning("down_weight or up_weight is too long. Parameters after %d-th are ignored." % max_len)

logger.warning("down_weightもしくはup_weightが長すぎます。%d個目以降のパラメータは無視されます。" % max_len)

up_lr_weight = up_lr_weight[:max_len]

down_lr_weight = down_lr_weight[:max_len]

if (up_lr_weight != None and len(up_lr_weight) < max_len) or (down_lr_weight != None and len(down_lr_weight) < max_len):

logger.warning("down_weight or up_weight is too short. Parameters after %d-th are filled with 1." % max_len)

logger.warning("down_weightもしくはup_weightが短すぎます。%d個目までの不足したパラメータは1で補われます。" % max_len)

if down_lr_weight != None and len(down_lr_weight) < max_len:

down_lr_weight = down_lr_weight + [1.0] * (max_len - len(down_lr_weight))

if up_lr_weight != None and len(up_lr_weight) < max_len:

up_lr_weight = up_lr_weight + [1.0] * (max_len - len(up_lr_weight))

if (up_lr_weight != None) or (mid_lr_weight != None) or (down_lr_weight != None):

logger.info("apply block learning rate / 階層別学習率を適用します。")

if down_lr_weight != None:

down_lr_weight = [w if w > zero_threshold else 0 for w in down_lr_weight]

logger.info(f"down_lr_weight (shallower -> deeper, 浅い層->深い層): {down_lr_weight}")

else:

logger.info("down_lr_weight: all 1.0, すべて1.0")

if mid_lr_weight != None:

mid_lr_weight = mid_lr_weight if mid_lr_weight > zero_threshold else 0

logger.info(f"mid_lr_weight: {mid_lr_weight}")

else:

logger.info("mid_lr_weight: 1.0")

if up_lr_weight != None:

up_lr_weight = [w if w > zero_threshold else 0 for w in up_lr_weight]

logger.info(f"up_lr_weight (deeper -> shallower, 深い層->浅い層): {up_lr_weight}")

else:

logger.info("up_lr_weight: all 1.0, すべて1.0")

return down_lr_weight, mid_lr_weight, up_lr_weight

# lr_weightが0のblockをblock_dimsから除外する、外部から呼び出す可能性を考慮しておく

def remove_block_dims_and_alphas(

block_dims, block_alphas, conv_block_dims, conv_block_alphas, down_lr_weight, mid_lr_weight, up_lr_weight

):

# set 0 to block dim without learning rate to remove the block

if down_lr_weight != None:

for i, lr in enumerate(down_lr_weight):

if lr == 0:

block_dims[i] = 0

if conv_block_dims is not None:

conv_block_dims[i] = 0

if mid_lr_weight != None:

if mid_lr_weight == 0:

block_dims[LoRANetwork.NUM_OF_BLOCKS] = 0

if conv_block_dims is not None:

conv_block_dims[LoRANetwork.NUM_OF_BLOCKS] = 0

if up_lr_weight != None:

for i, lr in enumerate(up_lr_weight):

if lr == 0:

block_dims[LoRANetwork.NUM_OF_BLOCKS + 1 + i] = 0

if conv_block_dims is not None:

conv_block_dims[LoRANetwork.NUM_OF_BLOCKS + 1 + i] = 0

return block_dims, block_alphas, conv_block_dims, conv_block_alphas

# 外部から呼び出す可能性を考慮しておく

def get_block_index(lora_name: str) -> int:

block_idx = -1 # invalid lora name

m = RE_UPDOWN.search(lora_name)

if m:

g = m.groups()

i = int(g[1])

j = int(g[3])

if g[2] == "resnets":

idx = 3 * i + j

elif g[2] == "attentions":

idx = 3 * i + j

elif g[2] == "upsamplers" or g[2] == "downsamplers":

idx = 3 * i + 2

if g[0] == "down":

block_idx = 1 + idx # 0に該当するLoRAは存在しない

elif g[0] == "up":

block_idx = LoRANetwork.NUM_OF_BLOCKS + 1 + idx

elif "mid_block_" in lora_name:

block_idx = LoRANetwork.NUM_OF_BLOCKS # idx=12

return block_idx

# Create network from weights for inference, weights are not loaded here (because can be merged)

def create_network_from_weights(multiplier, file, vae, text_encoder, unet, weights_sd=None, for_inference=False, **kwargs):

if weights_sd is None:

if os.path.splitext(file)[1] == ".safetensors":

from safetensors.torch import load_file, safe_open

weights_sd = load_file(file)

else:

weights_sd = torch.load(file, map_location="cpu")

# get dim/alpha mapping

modules_dim = {}

modules_alpha = {}

for key, value in weights_sd.items():

if "." not in key:

continue

lora_name = key.split(".")[0]

if "alpha" in key:

modules_alpha[lora_name] = value

elif "lora_down" in key:

dim = value.size()[0]

modules_dim[lora_name] = dim

# logger.info(lora_name, value.size(), dim)

# support old LoRA without alpha

for key in modules_dim.keys():

if key not in modules_alpha:

modules_alpha[key] = modules_dim[key]

module_class = LoRAInfModule if for_inference else LoRAModule

network = LoRANetwork(

text_encoder, unet, multiplier=multiplier, modules_dim=modules_dim, modules_alpha=modules_alpha, module_class=module_class

)

# block lr

down_lr_weight, mid_lr_weight, up_lr_weight = parse_block_lr_kwargs(kwargs)

if up_lr_weight is not None or mid_lr_weight is not None or down_lr_weight is not None:

network.set_block_lr_weight(up_lr_weight, mid_lr_weight, down_lr_weight)

return network, weights_sd

class LoRANetwork(torch.nn.Module):

NUM_OF_BLOCKS = 12 # フルモデル相当でのup,downの層の数

UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"]

UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]

TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]

LORA_PREFIX_UNET = "lora_unet"

LORA_PREFIX_TEXT_ENCODER = "lora_te"

# SDXL: must starts with LORA_PREFIX_TEXT_ENCODER

LORA_PREFIX_TEXT_ENCODER1 = "lora_te1"

LORA_PREFIX_TEXT_ENCODER2 = "lora_te2"

def __init__(

self,

text_encoder: Union[List[CLIPTextModel], CLIPTextModel],

unet,

multiplier: float = 1.0,

lora_dim: int = 4,

alpha: float = 1,

dropout: Optional[float] = None,

rank_dropout: Optional[float] = None,

module_dropout: Optional[float] = None,

conv_lora_dim: Optional[int] = None,

conv_alpha: Optional[float] = None,

block_dims: Optional[List[int]] = None,

block_alphas: Optional[List[float]] = None,

conv_block_dims: Optional[List[int]] = None,

conv_block_alphas: Optional[List[float]] = None,

modules_dim: Optional[Dict[str, int]] = None,

modules_alpha: Optional[Dict[str, int]] = None,

module_class: Type[object] = LoRAModule,

varbose: Optional[bool] = False,

) -> None:

"""

LoRA network: すごく引数が多いが、パターンは以下の通り

1. lora_dimとalphaを指定

2. lora_dim、alpha、conv_lora_dim、conv_alphaを指定

3. block_dimsとblock_alphasを指定 : Conv2d3x3には適用しない

4. block_dims、block_alphas、conv_block_dims、conv_block_alphasを指定 : Conv2d3x3にも適用する

5. modules_dimとmodules_alphaを指定 (推論用)

"""

super().__init__()

self.multiplier = multiplier

self.lora_dim = lora_dim

self.alpha = alpha

self.conv_lora_dim = conv_lora_dim

self.conv_alpha = conv_alpha

self.dropout = dropout

self.rank_dropout = rank_dropout

self.module_dropout = module_dropout

if modules_dim is not None:

logger.info(f"create LoRA network from weights")

elif block_dims is not None:

logger.info(f"create LoRA network from block_dims")

logger.info(

f"neuron dropout: p={self.dropout}, rank dropout: p={self.rank_dropout}, module dropout: p={self.module_dropout}"

)

logger.info(f"block_dims: {block_dims}")

logger.info(f"block_alphas: {block_alphas}")

if conv_block_dims is not None:

logger.info(f"conv_block_dims: {conv_block_dims}")

logger.info(f"conv_block_alphas: {conv_block_alphas}")

else:

logger.info(f"create LoRA network. base dim (rank): {lora_dim}, alpha: {alpha}")

logger.info(

f"neuron dropout: p={self.dropout}, rank dropout: p={self.rank_dropout}, module dropout: p={self.module_dropout}"

)

if self.conv_lora_dim is not None:

logger.info(

f"apply LoRA to Conv2d with kernel size (3,3). dim (rank): {self.conv_lora_dim}, alpha: {self.conv_alpha}"

)

# create module instances

def create_modules(

is_unet: bool,

text_encoder_idx: Optional[int], # None, 1, 2

root_module: torch.nn.Module,

target_replace_modules: List[torch.nn.Module],

) -> List[LoRAModule]:

prefix = (

self.LORA_PREFIX_UNET

if is_unet

else (

self.LORA_PREFIX_TEXT_ENCODER

if text_encoder_idx is None

else (self.LORA_PREFIX_TEXT_ENCODER1 if text_encoder_idx == 1 else self.LORA_PREFIX_TEXT_ENCODER2)

)

loras = []

skipped = []

for name, module in root_module.named_modules():

if module.__class__.__name__ in target_replace_modules:

for child_name, child_module in module.named_modules():

is_linear = child_module.__class__.__name__ == "Linear"

is_conv2d = child_module.__class__.__name__ == "Conv2d"

is_conv2d_1x1 = is_conv2d and child_module.kernel_size == (1, 1)

if is_linear or is_conv2d:

lora_name = prefix + "." + name + "." + child_name

lora_name = lora_name.replace(".", "_")

dim = None

alpha = None

if modules_dim is not None:

# モジュール指定あり

if lora_name in modules_dim:

dim = modules_dim[lora_name]

alpha = modules_alpha[lora_name]

elif is_unet and block_dims is not None:

# U-Netでblock_dims指定あり

block_idx = get_block_index(lora_name)

if is_linear or is_conv2d_1x1:

dim = block_dims[block_idx]

alpha = block_alphas[block_idx]

elif conv_block_dims is not None:

dim = conv_block_dims[block_idx]

alpha = conv_block_alphas[block_idx]

else:

# 通常、すべて対象とする

if is_linear or is_conv2d_1x1:

dim = self.lora_dim

alpha = self.alpha

elif self.conv_lora_dim is not None:

dim = self.conv_lora_dim

alpha = self.conv_alpha

if dim is None or dim == 0:

# skipした情報を出力

if is_linear or is_conv2d_1x1 or (self.conv_lora_dim is not None or conv_block_dims is not None):

skipped.append(lora_name)

continue

lora = module_class(

lora_name,

child_module,

self.multiplier,

dim,

alpha,

dropout=dropout,

rank_dropout=rank_dropout,

module_dropout=module_dropout,

)

loras.append(lora)

return loras, skipped

text_encoders = text_encoder if type(text_encoder) == list else [text_encoder]

# create LoRA for text encoder

# 毎回すべてのモジュールを作るのは無駄なので要検討

self.text_encoder_loras = []

skipped_te = []

for i, text_encoder in enumerate(text_encoders):

if len(text_encoders) > 1:

index = i + 1

logger.info(f"create LoRA for Text Encoder {index}:")

else:

index = None

logger.info(f"create LoRA for Text Encoder:")

text_encoder_loras, skipped = create_modules(False, index, text_encoder, LoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE)

self.text_encoder_loras.extend(text_encoder_loras)

skipped_te += skipped

logger.info(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.")

# extend U-Net target modules if conv2d 3x3 is enabled, or load from weights

target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE

if modules_dim is not None or self.conv_lora_dim is not None or conv_block_dims is not None:

target_modules += LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3

self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules)

logger.info(f"create LoRA for U-Net: {len(self.unet_loras)} modules.")

skipped = skipped_te + skipped_un

if varbose and len(skipped) > 0:

logger.warning(

f"because block_lr_weight is 0 or dim (rank) is 0, {len(skipped)} LoRA modules are skipped / block_lr_weightまたはdim (rank)が0の為、次の{len(skipped)}個のLoRAモジュールはスキップされます:"

)

for name in skipped:

logger.info(f"\t{name}")

self.up_lr_weight: List[float] = None

self.down_lr_weight: List[float] = None

self.mid_lr_weight: float = None

self.block_lr = False

# assertion

names = set()

for lora in self.text_encoder_loras + self.unet_loras:

assert lora.lora_name not in names, f"duplicated lora name: {lora.lora_name}"

names.add(lora.lora_name)

def set_multiplier(self, multiplier):

self.multiplier = multiplier

for lora in self.text_encoder_loras + self.unet_loras:

lora.multiplier = self.multiplier

def set_enabled(self, is_enabled):

for lora in self.text_encoder_loras + self.unet_loras:

lora.enabled = is_enabled

def load_weights(self, file):

if os.path.splitext(file)[1] == ".safetensors":

from safetensors.torch import load_file

weights_sd = load_file(file)

else:

weights_sd = torch.load(file, map_location="cpu")

info = self.load_state_dict(weights_sd, False)

return info

def apply_to(self, text_encoder, unet, apply_text_encoder=True, apply_unet=True):

if apply_text_encoder:

logger.info("enable LoRA for text encoder")

else:

self.text_encoder_loras = []

if apply_unet:

logger.info("enable LoRA for U-Net")

else:

self.unet_loras = []

for lora in self.text_encoder_loras + self.unet_loras:

lora.apply_to()

self.add_module(lora.lora_name, lora)

# マージできるかどうかを返す

def is_mergeable(self):

return True

# TODO refactor to common function with apply_to

def merge_to(self, text_encoder, unet, weights_sd, dtype, device):

apply_text_encoder = apply_unet = False

for key in weights_sd.keys():

if key.startswith(LoRANetwork.LORA_PREFIX_TEXT_ENCODER):

apply_text_encoder = True

elif key.startswith(LoRANetwork.LORA_PREFIX_UNET):

apply_unet = True

if apply_text_encoder:

logger.info("enable LoRA for text encoder")

else:

self.text_encoder_loras = []

if apply_unet:

logger.info("enable LoRA for U-Net")

else:

self.unet_loras = []

for lora in self.text_encoder_loras + self.unet_loras:

sd_for_lora = {}

for key in weights_sd.keys():

if key.startswith(lora.lora_name):

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

lora.py

lora.py

Files

lora.py

Latest commit

History

lora.py

File metadata and controls