Skip to content
Snippets Groups Projects
Commit d0f8c15f authored by Julian Linke's avatar Julian Linke
Browse files

some changes: plot folders, data folder structure, ...

parent 8639edd3
No related branches found
No related tags found
No related merge requests found
...@@ -115,7 +115,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path, NC): ...@@ -115,7 +115,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path, NC):
kmeans = faiss.Kmeans( kmeans = faiss.Kmeans(
d, d,
int(NC), int(NC),
niter=10, niter=100,
verbose=True, verbose=True,
gpu=False, gpu=False,
) )
......
...@@ -35,12 +35,14 @@ def calc_codebook_indexes(audio_path, freq, N): ...@@ -35,12 +35,14 @@ def calc_codebook_indexes(audio_path, freq, N):
N = N + Nwav N = N + Nwav
print('feature vectors: {}/{} (file/all)'.format(Nwav, N)) print('feature vectors: {}/{} (file/all)'.format(Nwav, N))
else: else:
Nwav = 0
print('WARNING: Input size of file is {} (smaller than Kernel size), skip ...'.format(len(x))) print('WARNING: Input size of file is {} (smaller than Kernel size), skip ...'.format(len(x)))
return freq, Nwav return freq, Nwav
def main(exp_path, lst_path, json_path, model_path, VERBOSE): def main(exp_path, lst_path, json_path, model_path, VERBOSE):
# set model and device global # set model and device global
global model, device global model, device
#device = torch.device('cpu')
device = torch.device('cuda') device = torch.device('cuda')
# load existing model # load existing model
model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([model_path]) model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([model_path])
...@@ -72,13 +74,14 @@ def main(exp_path, lst_path, json_path, model_path, VERBOSE): ...@@ -72,13 +74,14 @@ def main(exp_path, lst_path, json_path, model_path, VERBOSE):
rows = tsv.readlines() rows = tsv.readlines()
for row in rows: for row in rows:
uttID, audio_path = row.split() uttID, audio_path = row.split()
corpus_match = ('_').join(audio_path.split('/')[1].split('_')[1:]) # DATA/data_corpus_speakingstyle/spk/*wav corpus_match = ('_').join(audio_path.split('/')[2].split('_')[1:]) # DATA/expname/data_corpus_speakingstyle/spk/*wav
spk_match = audio_path.split('/')[2] # DATA/data_corpus_speakingstyle/spk/*wav spk_match = audio_path.split('/')[3] # DATA/expname/data_corpus_speakingstyle/spk/*wav
if corpus == corpus_match and spk == spk_match: if corpus == corpus_match and spk == spk_match:
print('\nread wav-file {}'.format(audio_path)) print('\nread wav-file {}'.format(audio_path))
freq, Nwav = calc_codebook_indexes(audio_path, freq, N) freq, Nwav = calc_codebook_indexes(audio_path, freq, N)
N = N + Nwav if Nwav != 0:
processed_files.append(audio_path) N = N + Nwav
processed_files.append(audio_path)
print(f'(DONE) Found {N} observations for speaker {spk} ...') print(f'(DONE) Found {N} observations for speaker {spk} ...')
# combine frequencies # combine frequencies
freqN_vec = np.zeros((1, 320**2), dtype=np.float32) freqN_vec = np.zeros((1, 320**2), dtype=np.float32)
......
...@@ -30,6 +30,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path): ...@@ -30,6 +30,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
with open(json_path, 'r') as f: with open(json_path, 'r') as f:
corpora = json.load(f) corpora = json.load(f)
smID = X_path.split('/')[-1].replace('.npy','')
X = np.load(X_path) X = np.load(X_path)
spks = np.load(y_path, allow_pickle=True) spks = np.load(y_path, allow_pickle=True)
...@@ -52,7 +53,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path): ...@@ -52,7 +53,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
plt.yticks(range(len(labels)), labels); plt.yticks(range(len(labels)), labels);
cbar = fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .8, .9, 1]) cbar = fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .8, .9, 1])
cbar.ax.tick_params(labelsize=25) cbar.ax.tick_params(labelsize=25)
fig_path = os.path.join(out_path,'{}.png'.format(X_path.split('/')[-1].replace('.npy',''))) fig_path = os.path.join(out_path,f'{smID}.png')
print('\nsave similarity matrix to {}'.format(fig_path)) print('\nsave similarity matrix to {}'.format(fig_path))
plt.savefig(fig_path) plt.savefig(fig_path)
...@@ -88,7 +89,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path): ...@@ -88,7 +89,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
# Put a legend to the right of the current axis # Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1.175, .5)) ax.legend(loc='center left', bbox_to_anchor=(1.175, .5))
# Save the plot # Save the plot
fig_path = os.path.join(out_path,'scatter3D_{}.png'.format(X_path.split('/')[-1].replace('.npy',''))) fig_path = os.path.join(out_path,f'scatter_{smID}_PCA1_PCA2_PCA3.png')
print('\nsave 3D scatter plot {}'.format(fig_path)) print('\nsave 3D scatter plot {}'.format(fig_path))
plt.savefig(fig_path) plt.savefig(fig_path)
...@@ -113,7 +114,8 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path): ...@@ -113,7 +114,8 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
plt.ylabel(f'PCA{str(proj[1]+1)}') plt.ylabel(f'PCA{str(proj[1]+1)}')
plt.grid() plt.grid()
plt.tight_layout() plt.tight_layout()
fig_path = os.path.join(out_path,'scatter_proj{}_{}.png'.format(''.join([str(proj[0]),str(proj[1])]), X_path.split('/')[-1].replace('.npy',''))) pcaID1, pcaID2 = str(proj[0]+1), str(proj[1]+1)
fig_path = os.path.join(out_path,f'scatter_{smID}_PCA{pcaID1}_PCA{pcaID2}.png')
print('save 2D scatter plot {}'.format(fig_path)) print('save 2D scatter plot {}'.format(fig_path))
plt.savefig(fig_path) plt.savefig(fig_path)
......
...@@ -8,14 +8,15 @@ set -e -o pipefail ...@@ -8,14 +8,15 @@ set -e -o pipefail
. path.sh . path.sh
. conda.sh . conda.sh
if [[ $# -eq 1 ]] ; then if [[ $# -eq 2 ]] ; then
echo 'ERROR: this run-script requires two arguments: DATA=? stage=?' echo 'ERROR: this run-script requires two arguments: expdata=? expname=? stage=?'
exit 1 exit 1
fi fi
## runDATA/STAGE ## expname/STAGE
runDATA=$1 expdata=$1
stage=$2 expname=$2
stage=$3
VERBOSE=1 # write logs for codebook frequency extraction? VERBOSE=1 # write logs for codebook frequency extraction?
printf "\n### STAGE ###\n" printf "\n### STAGE ###\n"
printf "stage: %d\n" $stage printf "stage: %d\n" $stage
...@@ -23,7 +24,7 @@ printf "### STAGE ###\n" ...@@ -23,7 +24,7 @@ printf "### STAGE ###\n"
## DIRS/PATHS ## DIRS/PATHS
model_path=model/xlsr_53_56k_new.pt model_path=model/xlsr_53_56k_new.pt
exp_dir=exp_$runDATA exp_dir=exp_$expname
## STAGE 0: DELETE AND RUN ALL STAGES ## STAGE 0: DELETE AND RUN ALL STAGES
if [ $stage == 0 ]; then if [ $stage == 0 ]; then
...@@ -34,7 +35,8 @@ fi ...@@ -34,7 +35,8 @@ fi
## print: ## print:
printf "\nCWD: %s" "$CWD" printf "\nCWD: %s" "$CWD"
printf "\nFAIRSEQ: %s" "$FAIRSEQ" printf "\nFAIRSEQ: %s" "$FAIRSEQ"
printf "\nrunDATA: %s" "$runDATA" printf "\nexpname: %s" "$expname"
printf "\nexpdata: %s" "$expdata"
printf "\nmodel_path: %s" "$model_path" printf "\nmodel_path: %s" "$model_path"
printf "\nexp_dir: %s\n\n" "$exp_dir" printf "\nexp_dir: %s\n\n" "$exp_dir"
...@@ -50,22 +52,22 @@ mkdir -p $exp_dir/numpy/pca ...@@ -50,22 +52,22 @@ mkdir -p $exp_dir/numpy/pca
## PREPARE DATA ## PREPARE DATA
if [ $stage == 1 ] || [ $stage == 0 ]; then if [ $stage == 1 ] || [ $stage == 0 ]; then
printf "\n... Prepare data (*lst and *json) ...\n" printf "\n... Prepare data (*lst and *json) ...\n"
python3 local/prepare_data.py --output_lst_path $exp_dir/data/${runDATA}.lst \ python3 local/prepare_data.py --output_lst_path $exp_dir/data/${expname}.lst \
--output_json_path $exp_dir/data/${runDATA}.json \ --output_json_path $exp_dir/data/${expname}.json \
--DATA_dir ${runDATA} --DATA_dir ${expdata}
fi fi
## COUNT CODEBOOK USAGE ## COUNT CODEBOOK USAGE
if [ $stage == 2 ] || [ $stage == 0 ]; then if [ $stage == 2 ] || [ $stage == 0 ]; then
printf "\n... Count frequencies of codebooks ...\n" printf "\n... Count frequencies of codebooks ...\n"
python3 local/codebook_freqs.py $exp_dir \ python3 local/codebook_freqs.py $exp_dir \
$exp_dir/data/${runDATA}.lst \ $exp_dir/data/${expname}.lst \
$exp_dir/data/${runDATA}.json \ $exp_dir/data/${expname}.json \
$model_path \ $model_path \
$VERBOSE $VERBOSE
printf "\n... Combine Arrays ... \n" printf "\n... Combine Arrays ... \n"
python3 local/codebook_combine_arrays.py $exp_dir/numpy \ python3 local/codebook_combine_arrays.py $exp_dir/numpy \
$exp_dir/data/${runDATA}.json $exp_dir/data/${expname}.json
fi fi
## CALCULATE SIMILARITY MATRIX ## CALCULATE SIMILARITY MATRIX
...@@ -75,7 +77,7 @@ if [ $stage == 3 ] || [ $stage == 0 ]; then ...@@ -75,7 +77,7 @@ if [ $stage == 3 ] || [ $stage == 0 ]; then
$exp_dir/numpy/splits_freqs.npy \ $exp_dir/numpy/splits_freqs.npy \
$exp_dir/numpy/splits_labels.npy \ $exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy \ $exp_dir/numpy \
$exp_dir/data/${runDATA}.json $exp_dir/data/${expname}.json
fi fi
## PCA SPACE AND PLOTS ## PCA SPACE AND PLOTS
...@@ -86,23 +88,26 @@ if [ $stage == 4 ] || [ $stage == 0 ]; then ...@@ -86,23 +88,26 @@ if [ $stage == 4 ] || [ $stage == 0 ]; then
--output $exp_dir/numpy/pca \ --output $exp_dir/numpy/pca \
--dim 3 --dim 3
printf "\n... PLOT similarity in PCA space (Analysis) ...\n" printf "\n... PLOT similarity in PCA space (Analysis) ...\n"
mkdir -p $exp_dir/plots/analysis
python3 local/plot_pca_similarities.py \ python3 local/plot_pca_similarities.py \
$exp_dir/numpy/similarity_matrix.npy \ $exp_dir/numpy/similarity_matrix.npy \
$exp_dir/numpy/splits_labels.npy \ $exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy/pca/3_pca_A.npy \ $exp_dir/numpy/pca/3_pca_A.npy \
$exp_dir/numpy/pca/3_pca_b.npy \ $exp_dir/numpy/pca/3_pca_b.npy \
$exp_dir/plots \ $exp_dir/plots/analysis \
$exp_dir/data/${runDATA}.json $exp_dir/data/${expname}.json
fi fi
if [ $stage == 5 ] || [ $stage == 0 ]; then if [ $stage == 5 ] || [ $stage == 0 ]; then
printf "\n... Classification with k-means in PCA space ...\n" printf "\n... Classification with k-means in PCA space ...\n"
mkdir -p $exp_dir/plots/kmeans
nclust=3 # number of clusters?
python3 local/classification_kmeans.py \ python3 local/classification_kmeans.py \
$exp_dir/numpy/similarity_matrix.npy \ $exp_dir/numpy/similarity_matrix.npy \
$exp_dir/numpy/splits_labels.npy \ $exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy/pca/3_pca_A.npy \ $exp_dir/numpy/pca/3_pca_A.npy \
$exp_dir/numpy/pca/3_pca_b.npy \ $exp_dir/numpy/pca/3_pca_b.npy \
$exp_dir/plots \ $exp_dir/plots/kmeans \
$exp_dir/data/${runDATA}.json \ $exp_dir/data/${expname}.json \
2 $nclust
fi fi
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment