Skip to content

Commit

Permalink
add functions to generate SPEAKER_INFORMATION_WEIGHTS list
Browse files Browse the repository at this point in the history
  • Loading branch information
KeiKinn committed Jul 31, 2023
1 parent a2ffd66 commit 91d7c77
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 1,7 @@
sample
.idea
.vscode
main.py
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
24 changes: 15 additions & 9 deletions matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 11,19 @@
from torch import Tensor
from torchaudio.sox_effects import apply_effects_tensor
from wavlm.WavLM import WavLM
from utils import generate_matrix_from_index


SPEAKER_INFORMATION_WEIGHTS = [
0, 0, 0, 0, 0, 0, # layer 0-5
1.0, 0, 0, 0,
0, 0, 0, 0, 0, 0, # layer 15
0, 0, 0, 0, 0, 0, # layer 16-21
0, # layer 22
0, 0 # layer 23-24
]
# SPEAKER_INFORMATION_WEIGHTS = [
# 0, 0, 0, 0, 0, 0, # layer 0-5
# 1.0, 0, 0, 0,
# 0, 0, 0, 0, 0, 0, # layer 15
# 0, 0, 0, 0, 0, 0, # layer 16-21
# 0, # layer 22
# 0, 0 # layer 23-24
# ]
SPEAKER_INFORMATION_LAYER = 6
SPEAKER_INFORMATION_WEIGHTS = generate_matrix_from_index(SPEAKER_INFORMATION_LAYER)


def fast_cosine_dist(source_feats: Tensor, matching_pool: Tensor, device: str = 'cpu') -> Tensor:
Expand Down Expand Up @@ -97,7 99,11 @@ def get_features(self, path, weights=None, vad_trigger_level=0):
sr = self.sr
if x.dim() == 1: x = x[None]

# replace assert sr == self.sr, f"input audio sample rate must be 16kHz. Got {sr}"

# replace if sr !=self.sr:
resampler = torchaudio.transforms.Resample(sr, self.sr)
x = resampler(x)
print(f"audio (sr = {sr}) has been resampled to {self.sr}")
if not sr == self.sr :
print(f"resample {sr} to {self.sr} in {path}")
x = torchaudio.functional.resample(x, orig_freq=sr, new_freq=self.sr)
Expand Down

0 comments on commit 91d7c77

Please sign in to comment.