initial commit

134269fb · Thomas Röck · 134269fb · 134269fb · 134269fb · 134269fb
Commit 134269fb authored 2 years ago by Thomas Röck
--- a/creapy/utils/helpers.py
+++ b/creapy/utils/helpers.py
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional, Union
+
+import numpy as np
+import tgt
+
+from ..utils import get_config
+from threading import Thread
+
+
+def get_creak_intervals(series: np.ndarray, dt: np.ndarray, threshold: Optional[float] = None,
+                        tgt_intervals=False):
+    # _config = get_config()["MODEL"]["POSTPROCESSING"]["INTERVALS"]
+    _config = get_config()
+    if threshold is None:
+        threshold = _config['USER']["creak_threshold"]
+
+    t_hop = _config['USER']["hop_size"]
+    # minimum creak length in seconds
+    T_MIN = _config["MODEL"]["POSTPROCESSING"]["INTERVALS"]["min_creak_length"]
+    T_GAP = _config["MODEL"]["POSTPROCESSING"]["INTERVALS"]["max_gap"]
+    # T_BLOCK = get_config()["MODEL"]["PREPROCESSING"]["block_size"]
+    # minimum creak length in blocks # TODO floor division?
+    N_MIN = round(T_MIN / t_hop) + 1
+    N_GAP = round(T_GAP / t_hop) - 1
+    # print(N_MIN, N_GAP)
+    i = 0
+    creak_bin = np.where(series >= threshold, 1, 0).astype(int)
+    res = np.zeros(creak_bin.shape).astype(int)
+    n_segment = 0
+    n_gap = np.arange(1, N_GAP+1)  # arange is important here
+    creak_bin = np.append(creak_bin, np.zeros(N_GAP))
+    idx0: int
+    for idx, c in enumerate(creak_bin[:-N_GAP]):
+        if c == 0 and not n_segment:
+            continue
+        elif c == 1 and not n_segment:
+            idx0 = idx
+            n_segment += 1
+        elif c == 1 and n_segment:
+            n_segment += 1
+        elif c == 0 and n_segment:
+            # TODO idxerror
+            if any(creak_bin[idx + n_gap]):
+                n_segment += 1
+                continue
+            elif n_segment >= N_MIN:
+                res[idx0:idx0+n_segment] = 1
+            n_segment = 0
+            idx0 = 0
+
+    # return res
+    creak_intervals = []
+    x1 = 0
+    while x1 < len(res) - 1:
+        if res[x1] == 1:
+            for x2, t1 in enumerate(res[x1 + 1:]):
+                if t1 == 0:
+                    creak_intervals.append((dt[x1], dt[x1 + x2]))
+                    x1 += x2 + 1
+                    break
+        x1 += 1
+
+    if tgt_intervals is True:
+        interval_text = get_config()["PRAAT"]["interval_text"]
+        return [tgt.core.Interval(start_time=iv[0], end_time=iv[1], text=interval_text) for iv in creak_intervals]
+
+    return creak_intervals
+    # print(creak_bin)
+    # print()
+    # print(res)
+
+    # while i + N_MIN + N_GAP< len(creak_bin):
+    #     if creak_bin[i] == 0: i+=1; continue
+
+    #     else:
+    #         segment = creak_bin[i:i+N_MIN+1]
+
+    #         i += len(segment)
+
+    #     pass
+
+    creak_pos = []
+    # for idx in range(0, len(series) - 1):
+    #     if series[idx] < threshold and series[idx + 1] >= threshold:
+    #         creak_pos.append(idx + 1)
+    #     if series[idx] >= threshold and series[idx + 1] < threshold:
+    #         creak_pos.append(idx)
+    # creak_intervals = list(
+    #     map(lambda x: (x[0], x[1]), zip(*[iter(dt[creak_pos])] * 2)))
+
+    # for i, interval in enumerate(creak_intervals):
+    #     if interval[0] == interval[1] or abs(interval[0]-interval[1]) < _config["min_creak_length"]:
+    #         creak_intervals.pop(i)
+
+    # return creak_intervals
+
+
+def get_time_vector(series: np.ndarray, sr: int, t0: float = 0):
+
+    config_ = get_config()['USER']
+    N = config_["block_size"]
+    R = config_["hop_size"]
+
+    # dt = np.arange(N / 2 * 1 / sr, (series.shape[0] + 1) * R / sr,
+    #                R / sr)
+    dt = N / 2 + np.linspace(
+        t0, (series.shape[0] - 1) * R + t0, series.shape[0], endpoint=True
+    )
+    return dt
+
+
+def intervals_to_textgrid(intervals: list[tgt.core.Interval],
+                          textgrid_path: str,
+                          result_path: str,
+                          tier_name: str,
+                          verbose: bool = False):
+
+    if result_path is None:
+        result_path = textgrid_path
+
+    for encoding in ("utf-8", "utf-16"):
+        try:
+            textgrid = tgt.io.read_textgrid(textgrid_path, encoding=encoding)
+        except UnicodeDecodeError as e:
+            print(f"Error occured reading textfile:\n\n{e}")
+        else:
+            break
+
+    num_tiers_including_tier_name = sum(map(lambda tier: tier_name in tier.name, textgrid.tiers))
+    if num_tiers_including_tier_name:
+        tier_name += f' {num_tiers_including_tier_name + 1}'
+    
+    interval_tier = tgt.core.IntervalTier(start_time=textgrid.start_time,
+                                          end_time=textgrid.end_time,
+                                          name=tier_name,
+                                          objects=intervals)
+    textgrid.add_tier(interval_tier)
+    
+
+    tgt.io.write_to_file(textgrid, result_path)
+    if verbose:
+        print(f"Wrote textgrid at {Path(result_path).resolve()}")
+
+
+def intervals_to_csv(intervals: list[tgt.core.Interval],
+                     csv_dst: str):
+    HEADER = "start,end,text\n"
+    csv_dst = Path(csv_dst).with_suffix(".csv")
+    with open(str(csv_dst), "w", encoding="utf-8") as dst:
+        dst.write(HEADER)
+        for iv in intervals:
+            dst.write(f"{iv.start_time},{iv.end_time},{iv.text}\n")
+
+
+def get_root() -> Path:
+    return Path(__file__).parent.parent
+
+
+class ThreadWithReturnValue(Thread):
+    def __init__(self, group=None, target=None, name=None,
+                 args=(), kwargs={}, Verbose=None):
+        Thread.__init__(self, group, target, name, args, kwargs)
+        self._return = None
+
+    def run(self):
+        if self._target is not None:
+            self._return = self._target(*self._args,
+                                        **self._kwargs)
+
+    def join(self, *args):
+        Thread.join(self, *args)
+        return self._return
+    
\ No newline at end of file
--- a/creapy/utils/plot.py
+++ b/creapy/utils/plot.py
+from __future__ import annotations
+
+from .helpers import get_time_vector
+from .config import get_config
+# from ..feature_extraction import get_feature_list
+
+import numpy as np
+import pandas as pd
+
+def plot(X_test: pd.DataFrame, 
+         y_pred: np.ndarray,
+         sr: int,
+         title: str | None = None):
+    import plotly.express as px
+    _config = get_config()['USER']
+    t0 = _config['audio_start']
+    features = X_test.columns.to_list()
+    df = pd.concat(
+        (pd.Series(y_pred, name='creak_probability'), X_test), axis=1
+    )
+    df['creak_threshold'] = _config['creak_threshold']
+
+
+    df_norm = df.copy()
+    df_norm[features] = df[features].apply(lambda x: x/x.abs().max(), axis=0)
+    dt = get_time_vector(y_pred, sr, t0)
+    
+    
+    fig = px.line(df_norm, 
+                  x=dt, 
+                  y=df_norm.columns,
+                  title=title)
+
+    fig.update_layout(
+        margin=dict(t=0, b=0, l=0, r=0),
+        template="plotly_white",
+    )
+    fig.update_layout(
+        xaxis = dict(
+            rangeslider = dict(
+                visible = True
+            ),
+            title='Time [s]'
+        )
+    )
+    fig.update_layout(
+        updatemenus=[
+            dict(
+                type = "buttons",
+                direction = "left",
+                buttons=list([
+                    dict(
+                        args=[{"y": [df_norm[column] for column in df_norm.columns]}],
+                        label="Normalized",
+                        method="update"
+                    ),
+                    dict(
+                        args=[{"y": [df[column] for column in df.columns]}],
+                        label="Original",
+                        method="update"
+                    ),
+                ]),
+                pad={"r": 10},
+                showactive=True,
+                x=0.0,
+                xanchor="left",
+                y=1.1,
+                yanchor="top"
+            ),
+        ]
+    )
+    
+    if title:
+        fig.update_layout(
+        title={
+            'text': title,
+            'y':0.99,
+            'x':0.5,
+            'xanchor': 'center',
+            'yanchor': 'top'}
+        )
+
+    fig.show()
+    return fig
+    
+    
\ No newline at end of file
--- a/creapy/utils/read_wav.py
+++ b/creapy/utils/read_wav.py
+from __future__ import annotations
+import numpy as np
+from soundfile import read
+
+
+def read_wav(path: str, normalize: bool = True, start: float = 0.0, end: float | int = -1, mono=True,
+             **kwargs) -> tuple[np.ndarray, int]:
+    """reads a .wav file given in the path
+
+    Args:
+        path (str): the path to the wav file
+
+    Returns:
+        ndarray: the audio data of the sound file in a numpy array
+        int: the sample rate of the sound file 
+    """
+    data, sr = read(path, **kwargs)
+    if mono is True and data.ndim > 1:
+        # convert to mono
+        data = data.sum(axis=1) / data.shape[1]
+
+    max_ = max(abs(data))
+    if end == -1:
+        data = data[int(start*sr):]
+    else:
+        data = data[int(start*sr):int(end*sr)]
+
+    if normalize is True:
+        data /= max_
+
+    return data, sr
--- a/creapy/utils/text_grid_to_intervals.py
+++ b/creapy/utils/text_grid_to_intervals.py
--- a/examples/creapy_creak_example_praat.PNG
+++ b/examples/creapy_creak_example_praat.PNG
--- a/examples/creapy_creak_probability_example.png
+++ b/examples/creapy_creak_probability_example.png
--- a/examples/creapy_demo.ipynb
+++ b/examples/creapy_demo.ipynb
--- a/examples/creapy_demo.py
+++ b/examples/creapy_demo.py
+# %% [markdown]
+# # creapy demo
+# 
+# This is a simple demonstration notebook to show the classification process using creapy
+# 
+# First, define the audio- and respective textgrid path
+
+# %%
+import creapy
+from pathlib import Path
+# %%
+example_folder_path = Path(__file__).parent
+audio_path = example_folder_path / "../audio/example.wav"
+textgrid_path = example_folder_path / "../textgrids/example.TextGrid"
+
+# %% [markdown]
+# Normally, `creapy` will use the model that is trained of both genders. However, you may change this and choose the model trained on `male` / `female` speakers only by setting the `gender` variable to `male` or `female` respectively or keep it unchanged (`None`).
+
+# %%
+X_test, y_pred, sr = creapy.process_file(audio_path, textgrid_path=textgrid_path, gender_model='female')
+
+# %% [markdown]
+# To change parameters you can either change them manually in the config file (see ReadME) or with the `set_config` function. In the ReadMe is a list of the configuratable parameters.
+
+# %%
+creapy.set_config()
+creapy.set_config(gender_model = 'female', tier_name ='creapy', zcr_threshold = 0.09)
+
+# %% [markdown]
+# Creapy can also process more than one file at a time. This can be done with the `process_folder` function. This function will not return the computed features and the creak probability in contrast to `process_file`.
+
+# %%
+creapy.process_folder(example_folder_path / "../audio", example_folder_path / "../textgrids")
+
+# %% [markdown]
+# The plot function renders the features from `X_test` and the creak probability `y_pred` over time. One can use the scroll bar to search for the desired time and look at the computed features. Most of the time, only the features `creak_probability`, `zcr` and `ste` are interesting for the classification process (to toggle features, just click on the label on the right).
+
+# %%
+creapy.plot(X_test, y_pred, sr)
+
+
--- a/pyproject.toml
+++ b/pyproject.toml
+[build-system]
+requires = ['setuptools', 'wheel']
--- a/readme4dummies_draft.md
+++ b/readme4dummies_draft.md
--- a/setup.cfg
+++ b/setup.cfg
--- a/textgrids/example.TextGrid
+++ b/textgrids/example.TextGrid