Skip to content
Snippets Groups Projects
Commit 8d730956 authored by Julian Linke's avatar Julian Linke
Browse files

initial commit

parent 482f0286
No related branches found
No related tags found
No related merge requests found
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import fairseq
import torch, torchaudio
import numpy as np
import json
def main(exp_path, json_path):
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
cnt = 0
for corpus in corpora:
freqN_path = os.path.join(exp_path, corpus)
for freqN_vec in os.listdir(freqN_path):
if 'freqN' in freqN_vec:
cnt = cnt+1
print('counted {} freqN-files'.format(cnt))
freqN_convs = np.zeros((cnt, 320**2), dtype=np.float32)
spks_vec = np.zeros((cnt,), dtype=object)
idx = 0
print('... start combining freqN_*.npy-files coming from path {}'.format(exp_path))
for corpus in corpora:
style = corpus.split("_")[1] # second entry is always style
freqN_path = os.path.join(exp_path, corpus)
for freqN_vec in os.listdir(freqN_path):
if 'freqN' in freqN_vec:
#print(freqN_vec)
split = freqN_vec.split('_')[1].replace('.npy','')
print('read and append split {} ...'.format(os.path.join(freqN_path,freqN_vec)))
# combine
freqN_convs[idx,:] = np.load(os.path.join(freqN_path,freqN_vec))
spks_vec[idx] = f'{split}{style}'
idx = idx + 1
freq_path, splits_path = os.path.join(exp_path, 'splits_freqs'), os.path.join(exp_path, 'splits_labels')
print('\nwrite {}.npy and {}.npy'.format(freq_path, splits_path))
np.save(freq_path, freqN_convs)
np.save(splits_path, spks_vec)
print('\nwrite {}.tsv and {}.tsv'.format(freq_path, splits_path))
np.savetxt(freq_path+'.tsv', freqN_convs, delimiter='\t')
np.savetxt(splits_path+'.tsv', spks_vec, fmt='%s', delimiter='\t')
if __name__ == "__main__":
############################# EXP PATH ###########################
try:
exp_path = sys.argv[1]
print("exp path is: " + exp_path)
except:
print("ERROR: data_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[2]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
main(exp_path, json_path)
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import fairseq
import torch, torchaudio
import numpy as np
import json
def print_list(l):
for s in l:
print(s)
def count_freq(codebook_all_indexes, freq):
seq = []
for t in range(0, len(codebook_all_indexes)):
codebook_idx_at_time_t = codebook_all_indexes[t].item() # tensor(integer).item()
if VERBOSE:
print('(Verbose) frame={}: used codebook entry: {}'.format(t+1, codebook_idx_at_time_t))
seq.append(codebook_idx_at_time_t)
freq[codebook_idx_at_time_t] = freq[codebook_idx_at_time_t] + 1
return freq, seq
def calc_codebook_indexes(audio_path, freq, N):
x, fs = torchaudio.load(audio_path)
x = x.to(device) # torch.Size([1, 57120]) [1 x Samples]
if np.shape(x)[1] > 512:
C = model.quantize(x)
quantized_features = C[0][0] # torch.Size([178, 768]) [T x d]
codebook_G2_indices = C[1] # torch.Size([1, 178, 2]) [1 x T x G]; G=2
codebook_all_indexes = model.quantizer.to_codebook_index(codebook_G2_indices)[0] # torch.Size([178]) [T]
Nwav = len(codebook_all_indexes)
freq, seq = count_freq(codebook_all_indexes, freq)
N = N + Nwav
print('feature vectors: {}/{} (file/all)'.format(Nwav, N))
else:
print('WARNING: Input size of file is {} (smaller than Kernel size), skip ...'.format(len(x)))
return freq, Nwav
def main(exp_path, lst_path, json_path, model_path, VERBOSE):
# set model and device global
global model, device
device = torch.device('cuda')
# load existing model
model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([model_path])
model = model[0]
model = model.to(device)
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
# corpus loop
for corpus in corpora.keys():
# LOGFILE:
if VERBOSE:
sys.stdout = open(os.path.join(exp_path, 'logs', "codebook_freqs_{}.log".format(corpus)),"w")
# speaker list and style
spks = corpora[corpus]
style = corpus.split("_")[1] # second entry is always style
# prepare numpy arrays
freqN_spks = np.zeros((len(spks), 320**2), dtype=np.float32) # [SPKS x 120400]
spks_vec = np.zeros((len(spks),), dtype=object) # [SPKS x 1]
# speaker loop
for idx, spk in enumerate(spks):
print(f"\n--- speaker {spk} in corpus {corpus} ---")
freq, N = dict.fromkeys(range(1, 320**2+1), 0), 0
processed_files = []
# extract frequencies per speaker
with open(lst_path, 'r') as tsv:
rows = tsv.readlines()
for row in rows:
uttID, audio_path = row.split()
corpus_match = ('_').join(audio_path.split('/')[1].split('_')[1:]) # DATA/data_corpus_speakingstyle/spk/*wav
spk_match = audio_path.split('/')[2] # DATA/data_corpus_speakingstyle/spk/*wav
if corpus == corpus_match and spk == spk_match:
print('\nread wav-file {}'.format(audio_path))
freq, Nwav = calc_codebook_indexes(audio_path, freq, N)
N = N + Nwav
processed_files.append(audio_path)
print(f'(DONE) Found {N} observations for speaker {spk} ...')
# combine frequencies
freqN_vec = np.zeros((1, 320**2), dtype=np.float32)
os.system('mkdir -p {}'.format(os.path.join(exp_path, 'txt', corpus)))
os.system('mkdir -p {}'.format(os.path.join(exp_path, 'numpy', corpus)))
with open(os.path.join(exp_path, 'txt', corpus, 'freq_{}.txt'.format(spk)), 'w') as ffreq, \
open(os.path.join(exp_path, 'txt', corpus, 'freqN_{}.txt'.format(spk)), 'w') as ffreqN:
for i, code_entry in enumerate(freq.keys()):
ffreq.write('{}\t{}\n'.format(code_entry, freq[code_entry]))
ffreqN.write('{}\t{}\n'.format(code_entry, freq[code_entry]/N))
freqN_vec[0,i] = freq[code_entry]/N
np.save(os.path.join(exp_path, 'numpy', corpus, 'freqN_{}'.format(spk,style)), freqN_vec)
# combine frequencies per corpus
print(f"... speaker {spk} is column {idx} of array freq_{corpus}.npy!")
freqN_spks[idx,:] = freqN_vec
spks_vec[idx] = f'{spk}{style}'
# write combined frequencies
np.save(os.path.join(exp_path, 'numpy', corpus, 'freq_{}'.format(corpus)), freqN_spks)
np.save(os.path.join(exp_path, 'numpy', corpus, 'spkIDs_{}'.format(corpus)), spks_vec)
if __name__ == "__main__":
############################# EXP PATH ###########################
try:
exp_path = sys.argv[1]
print("\nexp path is: " + exp_path)
except:
print("ERROR: data_path not specified")
############################# LIST PATH ###########################
try:
lst_path = sys.argv[2]
print("list path is: " + lst_path)
except:
print("ERROR: lst_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[3]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
############################# MODEL PATH ###########################
try:
model_path = sys.argv[4]
print("model path is: " + model_path)
except:
print("ERROR: model_path not specified")
############################# VERBOSE #############################
global VERBOSE
try:
VERBOSE = int(sys.argv[5])
print("VERBOSE is " + str(VERBOSE) + "\n")
except:
print("VERBOSE is not specified, default is 0!")
VERBOSE = 0
main(exp_path, lst_path, json_path, model_path, VERBOSE)
# https://github.com/facebookresearch/fairseq/issues/3741
from omegaconf import DictConfig, OmegaConf, open_dict
import torch
cp_path = 'model/xlsr_53_56k.pt'
cp = torch.load(cp_path)
cfg = DictConfig(cp['cfg'])
dd = OmegaConf.to_container(cfg, resolve=True)
for k,v in dd.items():
if not isinstance(v, dict):
continue
for key, _ in v.items():
if key.split("_")[:2] == ["eval", "wer"]:
print(k,key)
with open_dict(cfg):
cfg.task.pop('eval_wer')
cfg.task.pop('eval_wer_config')
cfg.task.pop('eval_wer_tokenizer')
cfg.task.pop('eval_wer_post_process')
cfg.task.pop('autoregressive')
cp['cfg'] = cfg
torch.save(cp, 'model/xlsr_53_56k_new.pt')
\ No newline at end of file
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import json
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import pairwise_distances
import faiss
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as patches
# List of predefined colors for each corpus (you can add more if you like)
colors = ['tab:red', 'tab:blue',
'tab:green', 'tab:orange',
'tab:purple', 'tab:brown',
'tab:pink', 'tab:gray',
'tab:olive', 'tab:cyan']
# markersize
MS = 30
def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
X = np.load(X_path)
spks = np.load(y_path, allow_pickle=True)
A = np.load(pcaA_path)
b = np.load(pcab_path)
print('\nspkIDs:\n{}'.format(spks))
print('\ninput matrix:\n{}'.format(X))
labels = list(spks)
fig, ax = plt.subplots(figsize=(15,15))
cax = ax.matshow(X, interpolation='nearest')
ax.grid()
plt.xticks(range(len(labels)), labels, rotation=90);
plt.yticks(range(len(labels)), labels);
fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .8, .9, 1])
fig_path = os.path.join(out_path,'{}.png'.format(X_path.split('/')[-1].replace('.npy','')))
print('\nsave fig {}'.format(fig_path))
plt.savefig(fig_path)
colordict = {}
for corpus in corpora:
colordict[corpus] = []
assert len(corpora.keys()) <= len(colors), 'Not enough colors for corpora'
for idx, corpus in enumerate(corpora):
spks_tmp = corpora[corpus]
for spk in spks_tmp:
colordict[corpus].append(colors[idx])
print(colordict)
col = sum([lst for lst in colordict.values()], [])
print("\nProject with PCA (3 dimensions)...")
X_proj = np.dot(X, A) + b # [styles x 320^2] * [320^2 x 2] or [styles x corpora] + [corpora x 2]
print(f"\nshape(X_proj) = {np.shape(X_proj)}")
print('mean(X_proj,0): {}'.format(np.mean(X_proj,0)))
# 3D plot:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_proj[:,0], X_proj[:,1], X_proj[:,2], c=col)
# Set the labels and title
ax.set_xlabel('PCA1')
ax.set_ylabel('PCA2')
ax.set_zlabel('PCA3')
# Create the legend
for k,v in colordict.items():
ax.scatter([], [], [], c=v[0], label=k, alpha=1, s=MS)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.85, box.height])
# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1.175, .5))
# Save the plot
fig_path = os.path.join(out_path,'scatter3D_{}.png'.format(X_path.split('/')[-1].replace('.npy','')))
print('\nsave fig {}'.format(fig_path))
plt.savefig(fig_path)
fig_path = os.path.join(out_path,'scatter3D_{}.eps'.format(X_path.split('/')[-1].replace('.npy','')))
print('save fig {}'.format(fig_path))
plt.savefig(fig_path, format='eps')
plt.rcParams.update({'font.size': 50})
for proj in [(0, 1), (0, 2), (1, 2)]:
x, y = X_proj[:,proj[0]], X_proj[:,proj[1]]
fig, ax = plt.subplots(figsize=(18,16))
for i, txt in enumerate(spks):
print(f"speaker {txt}: x = {x[i]}, y = {y[i]}")
ax.plot(x[i], y[i], 'o', color=col[i], markersize=MS, alpha=.5, label=col[i])
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
cols = list(by_label.keys())
for old, new in zip(cols, corpora.keys()):
by_label[new] = by_label.pop(old)
ax.legend(by_label.values(), by_label.keys(), fontsize=30)
plt.xlabel(f'PCA{str(proj[0]+1)}')
plt.ylabel(f'PCA{str(proj[1]+1)}')
plt.grid()
plt.tight_layout()
fig_path = os.path.join(out_path,'scatter_proj{}_{}.png'.format(''.join([str(proj[0]),str(proj[1])]), X_path.split('/')[-1].replace('.npy','')))
print('\nsave fig {}'.format(fig_path))
plt.savefig(fig_path)
fig_path = os.path.join(out_path,'scatter_proj{}_{}.eps'.format(''.join([str(proj[0]),str(proj[1])]), X_path.split('/')[-1].replace('.npy','')))
print('save fig {}'.format(fig_path))
plt.savefig(fig_path, format='eps')
if __name__ == "__main__":
############################# FEATURES PATH ###########################
try:
X_path = sys.argv[1]
print("input matrix path is: " + X_path)
except:
print("ERROR: X_path not specified")
############################# LABELS PATH ###########################
try:
y_path = sys.argv[2]
print("label vector path is: " + y_path)
except:
print("ERROR: y_path not specified")
############################# PCA A PATH ###########################
try:
pcaA_path = sys.argv[3]
print("pca matrix A path is: " + pcaA_path)
except:
print("ERROR: pcaA_path not specified")
############################# PCA b PATH ###########################
try:
pcab_path = sys.argv[4]
print("pca vector b path is: " + pcab_path)
except:
print("ERROR: pcab_path not specified")
############################# OUT PATH ###########################
try:
out_path = sys.argv[5]
print("output path is: " + out_path)
except:
print("ERROR: out_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[6]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path)
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os
import json
import argparse
def get_speaker_ids_and_lst_lines(corpus_dir):
spk_ids = []
lst_lines = []
for spk in os.listdir(corpus_dir):
spk_dir = os.path.join(corpus_dir, spk)
if os.path.isdir(spk_dir):
spk_ids.append(spk)
for audio_file in os.listdir(spk_dir):
if audio_file.endswith('.wav') or audio_file.endswith('.flac'):
uttID = audio_file.replace('.wav','').replace('.flac','')
audio_path = os.path.join(spk_dir, audio_file)
lst_line = f"{uttID} {audio_path}\n"
lst_lines.append(lst_line)
return spk_ids, lst_lines
def process_DATA_directory(DATA_dir):
spk_dict = {}
lst_lines = []
for corpus in os.listdir(DATA_dir):
corpus_dir = os.path.join(DATA_dir, corpus)
if os.path.isdir(corpus_dir):
corpus_name = '_'.join(corpus.split('_')[1:])
spk_ids, new_lst_lines = get_speaker_ids_and_lst_lines(corpus_dir)
spk_dict[corpus_name] = spk_ids
lst_lines.extend(new_lst_lines)
return spk_dict, lst_lines
def write_output_files(output_lst_path, output_json_path, lst_lines, spk_dict):
with open(output_lst_path, 'w') as f:
f.writelines(lst_lines)
with open(output_json_path, 'w') as f:
json.dump(spk_dict, f, indent=4)
def main(output_lst_path, output_json_path, DATA_dir):
spk_dict, lst_lines = process_DATA_directory(DATA_dir)
#print(lst_lines)
write_output_files(output_lst_path, output_json_path, lst_lines, spk_dict)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate DATA.lst and DATA.json files.')
parser.add_argument('--output_lst_path', required=True, help='Path to the output .lst file.')
parser.add_argument('--output_json_path', required=True, help='Path to the output .json file.')
parser.add_argument('--DATA_dir', required=True, help='Path to the DATA directory.')
args = parser.parse_args()
main(args.output_lst_path, args.output_json_path, args.DATA_dir)
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
import os, sys
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import entropy
from numpy.linalg import norm
def JSD(P, Q):
_P = P / norm(P, ord=1)
_Q = Q / norm(Q, ord=1)
_M = 0.5 * (_P + _Q)
return 1 - (0.5 * (entropy(_P, _M) + entropy(_Q, _M)))
def JSD_similarity(X):
sim = np.zeros((np.shape(X)[0], np.shape(X)[0]), dtype=np.float32)
print('\ncalculate JSD similarity matrix of X: {} ...'.format(X.shape))
for row, x in enumerate(X):
for col, _ in enumerate(sim):
#print('calculate JSD similarity of features ({}, {})'.format(row, col))
sim[row, col] = JSD(x, X[col,:])
return sim
def main(X_path, y_path, out_path, json_path):
# load json ...
# dict with {"corpusA_speakingstyle": [spk1 spk2,...],
# "corpusB_speakingstyle": [spk1 spk2,...], ...}
with open(json_path, 'r') as f:
corpora = json.load(f)
# LOAD MATRIX AND SPLITS
X = np.load(X_path)
splits = np.load(y_path, allow_pickle=True)
print('\ninput splits: {}\n... len: {}'.format(splits, len(splits)))
corpora_lengths = {key: len(value) for key, value in corpora.items()}
# SORT:
d_splits, col = {}, []
for idx, split in enumerate(splits):
d_splits[split] = X[idx,:]
X = np.zeros((np.shape(X)), dtype=np.float32)
splits = np.zeros((np.shape(splits)), dtype=object)
i = 0
for corpus in corpora:
for split in corpora[corpus]:
style = corpus.split("_")[1] # second entry is always style
split = f'{split}{style}'
if split in d_splits:
X[i,:] = d_splits[split]
splits[i] = split
i = i + 1
else: print('Wrong corpora entry: {}?'.format(split))
print('\nsorted splits: {}\n... len: {}'.format(splits, len(splits)))
print('\nwrite sorted {} and {}'.format(X_path, y_path))
np.save(X_path, X)
np.save(y_path, splits)
print('write sorted {} and {}'.format(X_path, y_path))
np.savetxt(X_path.replace('.npy','.tsv'), X, delimiter='\t')
np.savetxt(y_path.replace('.npy','.tsv'), splits, fmt='%s', delimiter='\t')
# Calculate similarity matrix and save:
X_sim = JSD_similarity(X)
np.save(os.path.join(out_path,'similarity_matrix'), X_sim)
np.savetxt(os.path.join(out_path,'similarity_matrix.tsv'), X_sim, delimiter='\t')
if __name__ == "__main__":
############################# FEATURES PATH ###########################
try:
X_path = sys.argv[1]
print("input matrix path is: " + X_path)
except:
print("ERROR: X_path not specified")
############################# LABELS PATH ###########################
try:
y_path = sys.argv[2]
print("label vector path is: " + y_path)
except:
print("ERROR: y_path not specified")
############################# OUT PATH ###########################
try:
out_path = sys.argv[3]
print("output path is: " + out_path)
except:
print("ERROR: out_path not specified")
############################# JSON PATH ###########################
try:
json_path = sys.argv[4]
print("json path is: " + json_path)
except:
print("ERROR: json_path not specified")
main(X_path, y_path, out_path, json_path)
export CWD=$(pwd)
export FAIRSEQ=../fairseq
\ No newline at end of file
run.sh 0 → 100755
#!/bin/bash
#set -x
# Author: Julian Linke (linke@tugraz.at)
# SPSC TU Graz (July 2023)
set -e -o pipefail
. path.sh
. conda.sh
if [[ $# -eq 0 ]] ; then
echo 'ERROR: this run-script requires an argument: stage=?'
exit 1
fi
## runDATA/STAGE
runDATA=$1
stage=$2
VERBOSE=1 # write logs for codebook frequency extraction?
printf "\n### STAGE ###\n"
printf "stage: %d\n" $stage
printf "### STAGE ###\n"
## DIRS/PATHS
model_path=model/xlsr_53_56k_new.pt
exp_dir=exp_$runDATA
## STAGE 0: DELETE AND RUN ALL STAGES
if [ $stage == 0 ]; then
printf "\n... Delete old experiment and run all ...\n"
rm -rf ${exp_dir}
fi
## print:
printf "\nCWD: %s" "$CWD"
printf "\nFAIRSEQ: %s" "$FAIRSEQ"
printf "\nrunDATA: %s" "$runDATA"
printf "\nmodel_path: %s" "$model_path"
printf "\nexp_dir: %s\n\n" "$exp_dir"
## CREATE EXPERIMENT FOLDER
mkdir -p $exp_dir
mkdir -p $exp_dir/logs
mkdir -p $exp_dir/data
mkdir -p $exp_dir/plots
mkdir -p $exp_dir/txt
mkdir -p $exp_dir/numpy
mkdir -p $exp_dir/numpy/pca
## PREPARE DATA
if [ $stage == 1 ] || [ $stage == 0 ]; then
printf "\n... Prepare data (*lst and *json) ...\n"
python3 local/prepare_data.py --output_lst_path $exp_dir/data/${runDATA}.lst \
--output_json_path $exp_dir/data/${runDATA}.json \
--DATA_dir ${runDATA}
fi
if [ $stage == 2 ] || [ $stage == 0 ]; then
printf "\n... Count frequencies of codebooks ...\n"
python3 local/codebook_freqs.py $exp_dir \
$exp_dir/data/${runDATA}.lst \
$exp_dir/data/${runDATA}.json \
$model_path \
$VERBOSE
printf "\n... Combine Arrays ... \n"
python3 local/codebook_combine_arrays.py $exp_dir/numpy \
$exp_dir/data/${runDATA}.json
fi
if [ $stage == 3 ] || [ $stage == 0 ]; then
printf "\n... Similarity Matrix ...\n"
python3 local/similarity_matrix.py \
$exp_dir/numpy/splits_freqs.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy \
$exp_dir/data/${runDATA}.json
fi
if [ $stage == 4 ] || [ $stage == 0 ]; then
printf "\n... PCA of similarity matrix ...\n"
python3 $FAIRSEQ/examples/wav2vec/unsupervised/scripts/pca.py \
$exp_dir/numpy/similarity_matrix.npy \
--output $exp_dir/numpy/pca \
--dim 3
printf "\n... PLOT ...\n"
python3 local/plot_pca_similarities.py \
$exp_dir/numpy/similarity_matrix.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy/pca/3_pca_A.npy \
$exp_dir/numpy/pca/3_pca_b.npy \
$exp_dir/plots \
$exp_dir/data/${runDATA}.json
fi
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment