Skip to content
Snippets Groups Projects
Commit d0f8c15f authored by Julian Linke's avatar Julian Linke
Browse files

some changes: plot folders, data folder structure, ...

parent 8639edd3
No related branches found
No related tags found
No related merge requests found
......@@ -115,7 +115,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path, NC):
kmeans = faiss.Kmeans(
d,
int(NC),
niter=10,
niter=100,
verbose=True,
gpu=False,
)
......
......@@ -35,12 +35,14 @@ def calc_codebook_indexes(audio_path, freq, N):
N = N + Nwav
print('feature vectors: {}/{} (file/all)'.format(Nwav, N))
else:
Nwav = 0
print('WARNING: Input size of file is {} (smaller than Kernel size), skip ...'.format(len(x)))
return freq, Nwav
def main(exp_path, lst_path, json_path, model_path, VERBOSE):
# set model and device global
global model, device
#device = torch.device('cpu')
device = torch.device('cuda')
# load existing model
model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([model_path])
......@@ -72,13 +74,14 @@ def main(exp_path, lst_path, json_path, model_path, VERBOSE):
rows = tsv.readlines()
for row in rows:
uttID, audio_path = row.split()
corpus_match = ('_').join(audio_path.split('/')[1].split('_')[1:]) # DATA/data_corpus_speakingstyle/spk/*wav
spk_match = audio_path.split('/')[2] # DATA/data_corpus_speakingstyle/spk/*wav
corpus_match = ('_').join(audio_path.split('/')[2].split('_')[1:]) # DATA/expname/data_corpus_speakingstyle/spk/*wav
spk_match = audio_path.split('/')[3] # DATA/expname/data_corpus_speakingstyle/spk/*wav
if corpus == corpus_match and spk == spk_match:
print('\nread wav-file {}'.format(audio_path))
freq, Nwav = calc_codebook_indexes(audio_path, freq, N)
N = N + Nwav
processed_files.append(audio_path)
if Nwav != 0:
N = N + Nwav
processed_files.append(audio_path)
print(f'(DONE) Found {N} observations for speaker {spk} ...')
# combine frequencies
freqN_vec = np.zeros((1, 320**2), dtype=np.float32)
......
......@@ -30,6 +30,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
with open(json_path, 'r') as f:
corpora = json.load(f)
smID = X_path.split('/')[-1].replace('.npy','')
X = np.load(X_path)
spks = np.load(y_path, allow_pickle=True)
......@@ -52,7 +53,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
plt.yticks(range(len(labels)), labels);
cbar = fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .8, .9, 1])
cbar.ax.tick_params(labelsize=25)
fig_path = os.path.join(out_path,'{}.png'.format(X_path.split('/')[-1].replace('.npy','')))
fig_path = os.path.join(out_path,f'{smID}.png')
print('\nsave similarity matrix to {}'.format(fig_path))
plt.savefig(fig_path)
......@@ -88,7 +89,7 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1.175, .5))
# Save the plot
fig_path = os.path.join(out_path,'scatter3D_{}.png'.format(X_path.split('/')[-1].replace('.npy','')))
fig_path = os.path.join(out_path,f'scatter_{smID}_PCA1_PCA2_PCA3.png')
print('\nsave 3D scatter plot {}'.format(fig_path))
plt.savefig(fig_path)
......@@ -113,7 +114,8 @@ def main(X_path, y_path, pcaA_path, pcab_path, out_path, json_path):
plt.ylabel(f'PCA{str(proj[1]+1)}')
plt.grid()
plt.tight_layout()
fig_path = os.path.join(out_path,'scatter_proj{}_{}.png'.format(''.join([str(proj[0]),str(proj[1])]), X_path.split('/')[-1].replace('.npy','')))
pcaID1, pcaID2 = str(proj[0]+1), str(proj[1]+1)
fig_path = os.path.join(out_path,f'scatter_{smID}_PCA{pcaID1}_PCA{pcaID2}.png')
print('save 2D scatter plot {}'.format(fig_path))
plt.savefig(fig_path)
......
......@@ -8,14 +8,15 @@ set -e -o pipefail
. path.sh
. conda.sh
if [[ $# -eq 1 ]] ; then
echo 'ERROR: this run-script requires two arguments: DATA=? stage=?'
if [[ $# -eq 2 ]] ; then
echo 'ERROR: this run-script requires two arguments: expdata=? expname=? stage=?'
exit 1
fi
## runDATA/STAGE
runDATA=$1
stage=$2
## expname/STAGE
expdata=$1
expname=$2
stage=$3
VERBOSE=1 # write logs for codebook frequency extraction?
printf "\n### STAGE ###\n"
printf "stage: %d\n" $stage
......@@ -23,7 +24,7 @@ printf "### STAGE ###\n"
## DIRS/PATHS
model_path=model/xlsr_53_56k_new.pt
exp_dir=exp_$runDATA
exp_dir=exp_$expname
## STAGE 0: DELETE AND RUN ALL STAGES
if [ $stage == 0 ]; then
......@@ -34,7 +35,8 @@ fi
## print:
printf "\nCWD: %s" "$CWD"
printf "\nFAIRSEQ: %s" "$FAIRSEQ"
printf "\nrunDATA: %s" "$runDATA"
printf "\nexpname: %s" "$expname"
printf "\nexpdata: %s" "$expdata"
printf "\nmodel_path: %s" "$model_path"
printf "\nexp_dir: %s\n\n" "$exp_dir"
......@@ -50,22 +52,22 @@ mkdir -p $exp_dir/numpy/pca
## PREPARE DATA
if [ $stage == 1 ] || [ $stage == 0 ]; then
printf "\n... Prepare data (*lst and *json) ...\n"
python3 local/prepare_data.py --output_lst_path $exp_dir/data/${runDATA}.lst \
--output_json_path $exp_dir/data/${runDATA}.json \
--DATA_dir ${runDATA}
python3 local/prepare_data.py --output_lst_path $exp_dir/data/${expname}.lst \
--output_json_path $exp_dir/data/${expname}.json \
--DATA_dir ${expdata}
fi
## COUNT CODEBOOK USAGE
if [ $stage == 2 ] || [ $stage == 0 ]; then
printf "\n... Count frequencies of codebooks ...\n"
python3 local/codebook_freqs.py $exp_dir \
$exp_dir/data/${runDATA}.lst \
$exp_dir/data/${runDATA}.json \
$exp_dir/data/${expname}.lst \
$exp_dir/data/${expname}.json \
$model_path \
$VERBOSE
printf "\n... Combine Arrays ... \n"
python3 local/codebook_combine_arrays.py $exp_dir/numpy \
$exp_dir/data/${runDATA}.json
$exp_dir/data/${expname}.json
fi
## CALCULATE SIMILARITY MATRIX
......@@ -75,7 +77,7 @@ if [ $stage == 3 ] || [ $stage == 0 ]; then
$exp_dir/numpy/splits_freqs.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy \
$exp_dir/data/${runDATA}.json
$exp_dir/data/${expname}.json
fi
## PCA SPACE AND PLOTS
......@@ -86,23 +88,26 @@ if [ $stage == 4 ] || [ $stage == 0 ]; then
--output $exp_dir/numpy/pca \
--dim 3
printf "\n... PLOT similarity in PCA space (Analysis) ...\n"
mkdir -p $exp_dir/plots/analysis
python3 local/plot_pca_similarities.py \
$exp_dir/numpy/similarity_matrix.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy/pca/3_pca_A.npy \
$exp_dir/numpy/pca/3_pca_b.npy \
$exp_dir/plots \
$exp_dir/data/${runDATA}.json
$exp_dir/plots/analysis \
$exp_dir/data/${expname}.json
fi
if [ $stage == 5 ] || [ $stage == 0 ]; then
printf "\n... Classification with k-means in PCA space ...\n"
mkdir -p $exp_dir/plots/kmeans
nclust=3 # number of clusters?
python3 local/classification_kmeans.py \
$exp_dir/numpy/similarity_matrix.npy \
$exp_dir/numpy/splits_labels.npy \
$exp_dir/numpy/pca/3_pca_A.npy \
$exp_dir/numpy/pca/3_pca_b.npy \
$exp_dir/plots \
$exp_dir/data/${runDATA}.json \
2
$exp_dir/plots/kmeans \
$exp_dir/data/${expname}.json \
$nclust
fi
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment