Skip to content
Snippets Groups Projects
Commit 8d730956 authored by Julian Linke's avatar Julian Linke
Browse files

initial commit

parent 482f0286
No related merge requests found
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import fairseq
import torch, torchaudio
import numpy as np
import json
def main(exp_path, json_path):
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
cnt = 0
for corpus in corpora:
freqN_path = os.path.join(exp_path, corpus)
for freqN_vec in os.listdir(freqN_path):
if 'freqN' in freqN_vec:
cnt = cnt+1
print('counted {} freqN-files'.format(cnt))
freqN_convs = np.zeros((cnt, 320**2), dtype=np.float32)
spks_vec = np.zeros((cnt,), dtype=object)
idx = 0
print('... start combining freqN_*.npy-files coming from path {}'.format(exp_path))
for corpus in corpora:
style = corpus.split("_")[1] # second entry is always style
freqN_path = os.path.join(exp_path, corpus)
for freqN_vec in os.listdir(freqN_path):
if 'freqN' in freqN_vec:
#print(freqN_vec)
split = freqN_vec.split('_')[1].replace('.npy','')
print('read and append split {} ...'.format(os.path.join(freqN_path,freqN_vec)))
# combine
freqN_convs[idx,:] = np.load(os.path.join(freqN_path,freqN_vec))
spks_vec[idx] = f'{split}{style}'
idx = idx + 1
freq_path, splits_path = os.path.join(exp_path, 'splits_freqs'), os.path.join(exp_path, 'splits_labels')
print('\nwrite {}.npy and {}.npy'.format(freq_path, splits_path))
np.save(freq_path, freqN_convs)
np.save(splits_path, spks_vec)
print('\nwrite {}.tsv and {}.tsv'.format(freq_path, splits_path))
np.savetxt(freq_path+'.tsv', freqN_convs, delimiter='\t')
np.savetxt(splits_path+'.tsv', spks_vec, fmt='%s', delimiter='\t')
if __name__ == "__main__":
############################# EXP PATH ###########################
try:
exp_path = sys.argv[1]
print("exp path is: " + exp_path)
except:
print("ERROR: data_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[2]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
main(exp_path, json_path)
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import fairseq
import torch, torchaudio
import numpy as np
import json
def print_list(l):
for s in l:
print(s)
def count_freq(codebook_all_indexes, freq):
seq = []
for t in range(0, len(codebook_all_indexes)):
codebook_idx_at_time_t = codebook_all_indexes[t].item() # tensor(integer).item()
if VERBOSE:
print('(Verbose) frame={}: used codebook entry: {}'.format(t+1, codebook_idx_at_time_t))
seq.append(codebook_idx_at_time_t)
freq[codebook_idx_at_time_t] = freq[codebook_idx_at_time_t] + 1
return freq, seq
def calc_codebook_indexes(audio_path, freq, N):
x, fs = torchaudio.load(audio_path)
x = x.to(device) # torch.Size([1, 57120]) [1 x Samples]
if np.shape(x)[1] > 512:
C = model.quantize(x)
quantized_features = C[0][0] # torch.Size([178, 768]) [T x d]
codebook_G2_indices = C[1] # torch.Size([1, 178, 2]) [1 x T x G]; G=2
codebook_all_indexes = model.quantizer.to_codebook_index(codebook_G2_indices)[0] # torch.Size([178]) [T]
Nwav = len(codebook_all_indexes)
freq, seq = count_freq(codebook_all_indexes, freq)
N = N + Nwav
print('feature vectors: {}/{} (file/all)'.format(Nwav, N))
else:
print('WARNING: Input size of file is {} (smaller than Kernel size), skip ...'.format(len(x)))
return freq, Nwav
def main(exp_path, lst_path, json_path, model_path, VERBOSE):
# set model and device global
global model, device
device = torch.device('cuda')
# load existing model
model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([model_path])
model = model[0]
model = model.to(device)
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
# corpus loop
for corpus in corpora.keys():
# LOGFILE:
if VERBOSE:
sys.stdout = open(os.path.join(exp_path, 'logs', "codebook_freqs_{}.log".format(corpus)),"w")
# speaker list and style
spks = corpora[corpus]
style = corpus.split("_")[1] # second entry is always style
# prepare numpy arrays
freqN_spks = np.zeros((len(spks), 320**2), dtype=np.float32) # [SPKS x 120400]
spks_vec = np.zeros((len(spks),), dtype=object) # [SPKS x 1]
# speaker loop
for idx, spk in enumerate(spks):
print(f"\n--- speaker {spk} in corpus {corpus} ---")
freq, N = dict.fromkeys(range(1, 320**2+1), 0), 0
processed_files = []
# extract frequencies per speaker
with open(lst_path, 'r') as tsv:
rows = tsv.readlines()
for row in rows:
uttID, audio_path = row.split()
corpus_match = ('_').join(audio_path.split('/')[1].split('_')[1:]) # DATA/data_corpus_speakingstyle/spk/*wav
spk_match = audio_path.split('/')[2] # DATA/data_corpus_speakingstyle/spk/*wav
if corpus == corpus_match and spk == spk_match:
print('\nread wav-file {}'.format(audio_path))
freq, Nwav = calc_codebook_indexes(audio_path, freq, N)
N = N + Nwav
processed_files.append(audio_path)
print(f'(DONE) Found {N} observations for speaker {spk} ...')
# combine frequencies
freqN_vec = np.zeros((1, 320**2), dtype=np.float32)
os.system('mkdir -p {}'.format(os.path.join(exp_path, 'txt', corpus)))
os.system('mkdir -p {}'.format(os.path.join(exp_path, 'numpy', corpus)))
with open(os.path.join(exp_path, 'txt', corpus, 'freq_{}.txt'.format(spk)), 'w') as ffreq, \
open(os.path.join(exp_path, 'txt', corpus, 'freqN_{}.txt'.format(spk)), 'w') as ffreqN:
for i, code_entry in enumerate(freq.keys()):
ffreq.write('{}\t{}\n'.format(code_entry, freq[code_entry]))
ffreqN.write('{}\t{}\n'.format(code_entry, freq[code_entry]/N))
freqN_vec[0,i] = freq[code_entry]/N
np.save(os.path.join(exp_path, 'numpy', corpus, 'freqN_{}'.format(spk,style)), freqN_vec)
# combine frequencies per corpus
print(f"... speaker {spk} is column {idx} of array freq_{corpus}.npy!")
freqN_spks[idx,:] = freqN_vec
spks_vec[idx] = f'{spk}{style}'
# write combined frequencies
np.save(os.path.join(exp_path, 'numpy', corpus, 'freq_{}'.format(corpus)), freqN_spks)
np.save(os.path.join(exp_path, 'numpy', corpus, 'spkIDs_{}'.format(corpus)), spks_vec)
if __name__ == "__main__":
############################# EXP PATH ###########################
try:
exp_path = sys.argv[1]
print("\nexp path is: " + exp_path)
except:
print("ERROR: data_path not specified")
############################# LIST PATH ###########################
try:
lst_path = sys.argv[2]
print("list path is: " + lst_path)
except:
print("ERROR: lst_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[3]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
############################# MODEL PATH ###########################
try:
model_path = sys.argv[4]
print("model path is: " + model_path)
except:
print("ERROR: model_path not specified")
############################# VERBOSE #############################
global VERBOSE
try:
VERBOSE = int(sys.argv[5])
print("VERBOSE is " + str(VERBOSE) + "\n")
except:
print("VERBOSE is not specified, default is 0!")
VERBOSE = 0
main(exp_path, lst_path, json_path, model_path, VERBOSE)
# https://github.com/facebookresearch/fairseq/issues/3741
from omegaconf import DictConfig, OmegaConf, open_dict
import torch
cp_path = 'model/xlsr_53_56k.pt'
cp = torch.load(cp_path)
cfg = DictConfig(cp['cfg'])
dd = OmegaConf.to_container(cfg, resolve=True)
for k,v in dd.items():
if not isinstance(v, dict):
continue
for key, _ in v.items():
if key.split("_")[:2] == ["eval", "wer"]:
print(k,key)
with open_dict(cfg):
cfg.task.pop('eval_wer')
cfg.task.pop('eval_wer_config')
cfg.task.pop('eval_wer_tokenizer')
cfg.task.pop('eval_wer_post_process')
cfg.task.pop('autoregressive')
cp['cfg'] = cfg
torch.save(cp, 'model/xlsr_53_56k_new.pt')
\ No newline at end of file
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import json
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import pairwise_distances
import faiss
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as patches
# List of predefined colors for each corpus (you can add more if you like)
colors = ['tab:red', 'tab:blue',
'tab:green', 'tab:orange',
'tab:purple', 'tab:brown',
'tab:pink', 'tab:gray',
'tab:olive', 'tab:cyan']
# markersize
MS = 30
def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
X = np.load(X_path)
spks = np.load(y_path, allow_pickle=True)
A = np.load(pcaA_path)
b = np.load(pcab_path)
print('\nspkIDs:\n{}'.format(spks))
print('\ninput matrix:\n{}'.format(X))
labels = list(spks)
fig, ax = plt.subplots(figsize=(15,15))
cax = ax.matshow(X, interpolation='nearest')
ax.grid()
plt.xticks(range(len(labels)), labels, rotation=90);
plt.yticks(range(len(labels)), labels);
fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .8, .9, 1])
fig_path = os.path.join(out_path,'{}.png'.format(X_path.split('/')[-1].replace('.npy','')))
print('\nsave fig {}'.format(fig_path))
plt.savefig(fig_path)
colordict = {}
for corpus in corpora:
colordict[corpus] = []
assert len(corpora.keys()) <= len(colors), 'Not enough colors for corpora'
for idx, corpus in enumerate(corpora):
spks_tmp = corpora[corpus]
for spk in spks_tmp:
colordict[corpus].append(colors[idx])
print(colordict)
col = sum([lst for lst in colordict.values()], [])
print("\nProject with PCA (3 dimensions)...")
X_proj = np.dot(X, A) + b # [styles x 320^2] * [320^2 x 2] or [styles x corpora] + [corpora x 2]
print(f"\nshape(X_proj) = {np.shape(X_proj)}")
print('mean(X_proj,0): {}'.format(np.mean(X_proj,0)))
# 3D plot:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_proj[:,0], X_proj[:,1], X_proj[:,2], c=col)
# Set the labels and title
ax.set_xlabel('PCA1')
ax.set_ylabel('PCA2')
ax.set_zlabel('PCA3')
# Create the legend
for k,v in colordict.items():
ax.scatter([], [], [], c=v[0], label=k, alpha=1, s=MS)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.85, box.height])
# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1.175, .5))
# Save the plot
fig_path = os.path.join(out_path,'scatter3D_{}.png'.format(X_path.split('/')[-1].replace('.npy','')))
print('\nsave fig {}'.format(fig_path))
plt.savefig(fig_path)
fig_path = os.path.join(out_path,'scatter3D_{}.eps'.format(X_path.split('/')[-1].replace('.npy','')))
print('save fig {}'.format(fig_path))
plt.savefig(fig_path, format='eps')
plt.rcParams.update({'font.size': 50})
for proj in [(0, 1), (0, 2), (1, 2)]:
x, y = X_proj[:,proj[0]], X_proj[:,proj[1]]
fig, ax = plt.subplots(figsize=(18,16))
for i, txt in enumerate(spks):
print(f"speaker {txt}: x = {x[i]}, y = {y[i]}")
ax.plot(x[i], y[i], 'o', color=col[i], markersize=MS, alpha=.5, label=col[i])
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
cols = list(by_label.keys())
for old, new in zip(cols, corpora.keys()):
by_label[new] = by_label.pop(old)
ax.legend(by_label.values(), by_label.keys(), fontsize=30)
plt.xlabel(f'PCA{str(proj[0]+1)}')
plt.ylabel(f'PCA{str(proj[1]+1)}')
plt.grid()
plt.tight_layout()
fig_path = os.path.join(out_path,'scatter_proj{}_{}.png'.format(''.join([str(proj[0]),str(proj[1])]), X_path.split('/')[-1].replace('.npy','')))
print('\nsave fig {}'.format(fig_path))
plt.savefig(fig_path)
fig_path = os.path.join(out_path,'scatter_proj{}_{}.eps'.format(''.join([str(proj[0]),str(proj[1])]), X_path.split('/')[-1].replace('.npy','')))
print('save fig {}'.format(fig_path))
plt.savefig(fig_path, format='eps')
if __name__ == "__main__":
############################# FEATURES PATH ###########################
try:
X_path = sys.argv[1]
print("input matrix path is: " + X_path)
except:
print("ERROR: X_path not specified")
############################# LABELS PATH ###########################
try:
y_path = sys.argv[2]
print("label vector path is: " + y_path)
except:
print("ERROR: y_path not specified")
############################# PCA A PATH ###########################
try:
pcaA_path = sys.argv[3]
print("pca matrix A path is: " + pcaA_path)
except:
print("ERROR: pcaA_path not specified")
############################# PCA b PATH ###########################
try:
pcab_path = sys.argv[4]
print("pca vector b path is: " + pcab_path)
except:
print("ERROR: pcab_path not specified")
############################# OUT PATH ###########################
try:
out_path = sys.argv[5]
print("output path is: " + out_path)
except:
print("ERROR: out_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[6]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path)
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os
import json
import argparse
def get_speaker_ids_and_lst_lines(corpus_dir):
spk_ids = []
lst_lines = []
for spk in os.listdir(corpus_dir):
spk_dir = os.path.join(corpus_dir, spk)
if os.path.isdir(spk_dir):
spk_ids.append(spk)
for audio_file in os.listdir(spk_dir):
if audio_file.endswith('.wav') or audio_file.endswith('.flac'):
uttID = audio_file.replace('.wav','').replace('.flac','')
audio_path = os.path.join(spk_dir, audio_file)
lst_line = f"{uttID} {audio_path}\n"
lst_lines.append(lst_line)
return spk_ids, lst_lines
def process_DATA_directory(DATA_dir):
spk_dict = {}
lst_lines = []
for corpus in os.listdir(DATA_dir):
corpus_dir = os.path.join(DATA_dir, corpus)
if os.path.isdir(corpus_dir):
corpus_name = '_'.join(corpus.split('_')[1:])
spk_ids, new_lst_lines = get_speaker_ids_and_lst_lines(corpus_dir)
spk_dict[corpus_name] = spk_ids
lst_lines.extend(new_lst_lines)
return spk_dict, lst_lines
def write_output_files(output_lst_path, output_json_path, lst_lines, spk_dict):
with open(output_lst_path, 'w') as f:
f.writelines(lst_lines)
with open(output_json_path, 'w') as f:
json.dump(spk_dict, f, indent=4)
def main(output_lst_path, output_json_path, DATA_dir):
spk_dict, lst_lines = process_DATA_directory(DATA_dir)
#print(lst_lines)
write_output_files(output_lst_path, output_json_path, lst_lines, spk_dict)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate DATA.lst and DATA.json files.')
parser.add_argument('--output_lst_path', required=True, help='Path to the output .lst file.')
parser.add_argument('--output_json_path', required=True, help='Path to the output .json file.')
parser.add_argument('--DATA_dir', required=True, help='Path to the DATA directory.')
args = parser.parse_args()
main(args.output_lst_path, args.output_json_path, args.DATA_dir)
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import entropy
from numpy.linalg import norm
def JSD(P, Q):
_P = P / norm(P, ord=1)
_Q = Q / norm(Q, ord=1)
_M = 0.5 * (_P + _Q)
return 1 - (0.5 * (entropy(_P, _M) + entropy(_Q, _M)))
def JSD_similarity(X):
sim = np.zeros((np.shape(X)[0], np.shape(X)[0]), dtype=np.float32)
print('\ncalculate JSD similarity matrix of X: {} ...'.format(X.shape))
for row, x in enumerate(X):
for col, _ in enumerate(sim):
#print('calculate JSD similarity of features ({}, {})'.format(row, col))
sim[row, col] = JSD(x, X[col,:])
return sim
def main(X_path, y_path, out_path, json_path):
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
# LOAD MATRIX AND SPLITS
X = np.load(X_path)
splits = np.load(y_path, allow_pickle=True)
print('\ninput splits: {}\n... len: {}'.format(splits, len(splits)))
corpora_lengths = {key: len(value) for key, value in corpora.items()}
# SORT:
d_splits, col = {}, []
for idx, split in enumerate(splits):
d_splits[split] = X[idx,:]
X = np.zeros((np.shape(X)), dtype=np.float32)
splits = np.zeros((np.shape(splits)), dtype=object)
i = 0
for corpus in corpora:
for split in corpora[corpus]:
style = corpus.split("_")[1] # second entry is always style
split = f'{split}{style}'
if split in d_splits:
X[i,:] = d_splits[split]
splits[i] = split
i = i + 1
else: print('Wrong corpora entry: {}?'.format(split))
print('\nsorted splits: {}\n... len: {}'.format(splits, len(splits)))
print('\nwrite sorted {} and {}'.format(X_path, y_path))
np.save(X_path, X)
np.save(y_path, splits)
print('write sorted {} and {}'.format(X_path, y_path))
np.savetxt(X_path.replace('.npy','.tsv'), X, delimiter='\t')
np.savetxt(y_path.replace('.npy','.tsv'), splits, fmt='%s', delimiter='\t')
# Calculate similarity matrix and save:
X_sim = JSD_similarity(X)
np.save(os.path.join(out_path,'similarity_matrix'), X_sim)
np.savetxt(os.path.join(out_path,'similarity_matrix.tsv'), X_sim, delimiter='\t')
if __name__ == "__main__":
############################# FEATURES PATH ###########################
try:
X_path = sys.argv[1]
print("input matrix path is: " + X_path)
except:
print("ERROR: X_path not specified")
############################# LABELS PATH ###########################
try:
y_path = sys.argv[2]
print("label vector path is: " + y_path)
except:
print("ERROR: y_path not specified")
############################# OUT PATH ###########################
try:
out_path = sys.argv[3]
print("output path is: " + out_path)
except:
print("ERROR: out_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[4]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
main(X_path, y_path, out_path, json_path)
export CWD=$(pwd)
export FAIRSEQ=../fairseq
\ No newline at end of file
run.sh 0 → 100755
#!/bin/bash
#set -x
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
set -e -o pipefail
. path.sh
. conda.sh
if [[ $# -eq 0 ]] ; then
echo 'ERROR: this run-script requires an argument: stage=?'
exit 1
fi
## runDATA/STAGE
runDATA=$1
stage=$2
VERBOSE=1 # write logs for codebook frequency extraction?
printf "\n### STAGE ###\n"
printf "stage: %d\n" $stage
printf "### STAGE ###\n"
## DIRS/PATHS
model_path=model/xlsr_53_56k_new.pt
exp_dir=exp_$runDATA
## STAGE 0: DELETE AND RUN ALL STAGES
if [ $stage == 0 ]; then
printf "\n... Delete old experiment and run all ...\n"
rm -rf ${exp_dir}
fi
## print:
printf "\nCWD: %s" "$CWD"
printf "\nFAIRSEQ: %s" "$FAIRSEQ"
printf "\nrunDATA: %s" "$runDATA"
printf "\nmodel_path: %s" "$model_path"
printf "\nexp_dir: %s\n\n" "$exp_dir"
## CREATE EXPERIMENT FOLDER
mkdir -p $exp_dir
mkdir -p $exp_dir/logs
mkdir -p $exp_dir/data
mkdir -p $exp_dir/plots
mkdir -p $exp_dir/txt
mkdir -p $exp_dir/numpy
mkdir -p $exp_dir/numpy/pca
## PREPARE DATA
if [ $stage == 1 ] || [ $stage == 0 ]; then
printf "\n... Prepare data (*lst and *json) ...\n"
python3 local/prepare_data.py --output_lst_path $exp_dir/data/${runDATA}.lst \
--output_json_path $exp_dir/data/${runDATA}.json \
--DATA_dir ${runDATA}
fi
if [ $stage == 2 ] || [ $stage == 0 ]; then
printf "\n... Count frequencies of codebooks ...\n"
python3 local/codebook_freqs.py $exp_dir \
$exp_dir/data/${runDATA}.lst \
$exp_dir/data/${runDATA}.json \
$model_path \
$VERBOSE
printf "\n... Combine Arrays ... \n"
python3 local/codebook_combine_arrays.py $exp_dir/numpy \
$exp_dir/data/${runDATA}.json
fi
if [ $stage == 3 ] || [ $stage == 0 ]; then
printf "\n... Similarity Matrix ...\n"
python3 local/similarity_matrix.py \
$exp_dir/numpy/splits_freqs.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy \
$exp_dir/data/${runDATA}.json
fi
if [ $stage == 4 ] || [ $stage == 0 ]; then
printf "\n... PCA of similarity matrix ...\n"
python3 $FAIRSEQ/examples/wav2vec/unsupervised/scripts/pca.py \
$exp_dir/numpy/similarity_matrix.npy \
--output $exp_dir/numpy/pca \
--dim 3
printf "\n... PLOT ...\n"
python3 local/plot_pca_similarities.py \
$exp_dir/numpy/similarity_matrix.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy/pca/3_pca_A.npy \
$exp_dir/numpy/pca/3_pca_b.npy \
$exp_dir/plots \
$exp_dir/data/${runDATA}.json
fi
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment