Skip to content
Snippets Groups Projects
Commit 134269fb authored by Thomas Röck's avatar Thomas Röck
Browse files

initial commit

parents
Branches dev
No related tags found
No related merge requests found
from __future__ import annotations
from pathlib import Path
from typing import Optional, Union
import numpy as np
import tgt
from ..utils import get_config
from threading import Thread
def get_creak_intervals(series: np.ndarray, dt: np.ndarray, threshold: Optional[float] = None,
tgt_intervals=False):
# _config = get_config()["MODEL"]["POSTPROCESSING"]["INTERVALS"]
_config = get_config()
if threshold is None:
threshold = _config['USER']["creak_threshold"]
t_hop = _config['USER']["hop_size"]
# minimum creak length in seconds
T_MIN = _config["MODEL"]["POSTPROCESSING"]["INTERVALS"]["min_creak_length"]
T_GAP = _config["MODEL"]["POSTPROCESSING"]["INTERVALS"]["max_gap"]
# T_BLOCK = get_config()["MODEL"]["PREPROCESSING"]["block_size"]
# minimum creak length in blocks # TODO floor division?
N_MIN = round(T_MIN / t_hop) + 1
N_GAP = round(T_GAP / t_hop) - 1
# print(N_MIN, N_GAP)
i = 0
creak_bin = np.where(series >= threshold, 1, 0).astype(int)
res = np.zeros(creak_bin.shape).astype(int)
n_segment = 0
n_gap = np.arange(1, N_GAP+1) # arange is important here
creak_bin = np.append(creak_bin, np.zeros(N_GAP))
idx0: int
for idx, c in enumerate(creak_bin[:-N_GAP]):
if c == 0 and not n_segment:
continue
elif c == 1 and not n_segment:
idx0 = idx
n_segment += 1
elif c == 1 and n_segment:
n_segment += 1
elif c == 0 and n_segment:
# TODO idxerror
if any(creak_bin[idx + n_gap]):
n_segment += 1
continue
elif n_segment >= N_MIN:
res[idx0:idx0+n_segment] = 1
n_segment = 0
idx0 = 0
# return res
creak_intervals = []
x1 = 0
while x1 < len(res) - 1:
if res[x1] == 1:
for x2, t1 in enumerate(res[x1 + 1:]):
if t1 == 0:
creak_intervals.append((dt[x1], dt[x1 + x2]))
x1 += x2 + 1
break
x1 += 1
if tgt_intervals is True:
interval_text = get_config()["PRAAT"]["interval_text"]
return [tgt.core.Interval(start_time=iv[0], end_time=iv[1], text=interval_text) for iv in creak_intervals]
return creak_intervals
# print(creak_bin)
# print()
# print(res)
# while i + N_MIN + N_GAP< len(creak_bin):
# if creak_bin[i] == 0: i+=1; continue
# else:
# segment = creak_bin[i:i+N_MIN+1]
# i += len(segment)
# pass
creak_pos = []
# for idx in range(0, len(series) - 1):
# if series[idx] < threshold and series[idx + 1] >= threshold:
# creak_pos.append(idx + 1)
# if series[idx] >= threshold and series[idx + 1] < threshold:
# creak_pos.append(idx)
# creak_intervals = list(
# map(lambda x: (x[0], x[1]), zip(*[iter(dt[creak_pos])] * 2)))
# for i, interval in enumerate(creak_intervals):
# if interval[0] == interval[1] or abs(interval[0]-interval[1]) < _config["min_creak_length"]:
# creak_intervals.pop(i)
# return creak_intervals
def get_time_vector(series: np.ndarray, sr: int, t0: float = 0):
config_ = get_config()['USER']
N = config_["block_size"]
R = config_["hop_size"]
# dt = np.arange(N / 2 * 1 / sr, (series.shape[0] + 1) * R / sr,
# R / sr)
dt = N / 2 + np.linspace(
t0, (series.shape[0] - 1) * R + t0, series.shape[0], endpoint=True
)
return dt
def intervals_to_textgrid(intervals: list[tgt.core.Interval],
textgrid_path: str,
result_path: str,
tier_name: str,
verbose: bool = False):
if result_path is None:
result_path = textgrid_path
for encoding in ("utf-8", "utf-16"):
try:
textgrid = tgt.io.read_textgrid(textgrid_path, encoding=encoding)
except UnicodeDecodeError as e:
print(f"Error occured reading textfile:\n\n{e}")
else:
break
num_tiers_including_tier_name = sum(map(lambda tier: tier_name in tier.name, textgrid.tiers))
if num_tiers_including_tier_name:
tier_name += f' {num_tiers_including_tier_name + 1}'
interval_tier = tgt.core.IntervalTier(start_time=textgrid.start_time,
end_time=textgrid.end_time,
name=tier_name,
objects=intervals)
textgrid.add_tier(interval_tier)
tgt.io.write_to_file(textgrid, result_path)
if verbose:
print(f"Wrote textgrid at {Path(result_path).resolve()}")
def intervals_to_csv(intervals: list[tgt.core.Interval],
csv_dst: str):
HEADER = "start,end,text\n"
csv_dst = Path(csv_dst).with_suffix(".csv")
with open(str(csv_dst), "w", encoding="utf-8") as dst:
dst.write(HEADER)
for iv in intervals:
dst.write(f"{iv.start_time},{iv.end_time},{iv.text}\n")
def get_root() -> Path:
return Path(__file__).parent.parent
class ThreadWithReturnValue(Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs={}, Verbose=None):
Thread.__init__(self, group, target, name, args, kwargs)
self._return = None
def run(self):
if self._target is not None:
self._return = self._target(*self._args,
**self._kwargs)
def join(self, *args):
Thread.join(self, *args)
return self._return
\ No newline at end of file
from __future__ import annotations
from .helpers import get_time_vector
from .config import get_config
# from ..feature_extraction import get_feature_list
import numpy as np
import pandas as pd
def plot(X_test: pd.DataFrame,
y_pred: np.ndarray,
sr: int,
title: str | None = None):
import plotly.express as px
_config = get_config()['USER']
t0 = _config['audio_start']
features = X_test.columns.to_list()
df = pd.concat(
(pd.Series(y_pred, name='creak_probability'), X_test), axis=1
)
df['creak_threshold'] = _config['creak_threshold']
df_norm = df.copy()
df_norm[features] = df[features].apply(lambda x: x/x.abs().max(), axis=0)
dt = get_time_vector(y_pred, sr, t0)
fig = px.line(df_norm,
x=dt,
y=df_norm.columns,
title=title)
fig.update_layout(
margin=dict(t=0, b=0, l=0, r=0),
template="plotly_white",
)
fig.update_layout(
xaxis = dict(
rangeslider = dict(
visible = True
),
title='Time [s]'
)
)
fig.update_layout(
updatemenus=[
dict(
type = "buttons",
direction = "left",
buttons=list([
dict(
args=[{"y": [df_norm[column] for column in df_norm.columns]}],
label="Normalized",
method="update"
),
dict(
args=[{"y": [df[column] for column in df.columns]}],
label="Original",
method="update"
),
]),
pad={"r": 10},
showactive=True,
x=0.0,
xanchor="left",
y=1.1,
yanchor="top"
),
]
)
if title:
fig.update_layout(
title={
'text': title,
'y':0.99,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'}
)
fig.show()
return fig
\ No newline at end of file
from __future__ import annotations
import numpy as np
from soundfile import read
def read_wav(path: str, normalize: bool = True, start: float = 0.0, end: float | int = -1, mono=True,
**kwargs) -> tuple[np.ndarray, int]:
"""reads a .wav file given in the path
Args:
path (str): the path to the wav file
Returns:
ndarray: the audio data of the sound file in a numpy array
int: the sample rate of the sound file
"""
data, sr = read(path, **kwargs)
if mono is True and data.ndim > 1:
# convert to mono
data = data.sum(axis=1) / data.shape[1]
max_ = max(abs(data))
if end == -1:
data = data[int(start*sr):]
else:
data = data[int(start*sr):int(end*sr)]
if normalize is True:
data /= max_
return data, sr
This diff is collapsed.
examples/creapy_creak_example_praat.PNG

243 KiB

examples/creapy_creak_probability_example.png

787 KiB

This diff is collapsed.
# %% [markdown]
# # creapy demo
#
# This is a simple demonstration notebook to show the classification process using creapy
#
# First, define the audio- and respective textgrid path
# %%
import creapy
from pathlib import Path
# %%
example_folder_path = Path(__file__).parent
audio_path = example_folder_path / "../audio/example.wav"
textgrid_path = example_folder_path / "../textgrids/example.TextGrid"
# %% [markdown]
# Normally, `creapy` will use the model that is trained of both genders. However, you may change this and choose the model trained on `male` / `female` speakers only by setting the `gender` variable to `male` or `female` respectively or keep it unchanged (`None`).
# %%
X_test, y_pred, sr = creapy.process_file(audio_path, textgrid_path=textgrid_path, gender_model='female')
# %% [markdown]
# To change parameters you can either change them manually in the config file (see ReadME) or with the `set_config` function. In the ReadMe is a list of the configuratable parameters.
# %%
creapy.set_config()
creapy.set_config(gender_model = 'female', tier_name ='creapy', zcr_threshold = 0.09)
# %% [markdown]
# Creapy can also process more than one file at a time. This can be done with the `process_folder` function. This function will not return the computed features and the creak probability in contrast to `process_file`.
# %%
creapy.process_folder(example_folder_path / "../audio", example_folder_path / "../textgrids")
# %% [markdown]
# The plot function renders the features from `X_test` and the creak probability `y_pred` over time. One can use the scroll bar to search for the desired time and look at the computed features. Most of the time, only the features `creak_probability`, `zcr` and `ste` are interesting for the classification process (to toggle features, just click on the label on the right).
# %%
creapy.plot(X_test, y_pred, sr)
[build-system]
requires = ['setuptools', 'wheel']
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment