From 0e1eeb4fb6f59ee95d2583d6ca24b049c210ef2a Mon Sep 17 00:00:00 2001 From: vmonakhov Date: Mon, 22 Apr 2024 21:47:27 +0300 Subject: [PATCH] Jitter implementation -- https://github.com/ispras/lingvodoc-react/issues/1116 (#1504) * init * jitter.py * pitch_to_point * find_maximum_correlation * sound_find_extremum * sound_find_extremum * fix * CubicSpline * fix * refactoring * refactoring * joined pitch and jitter * fix * next step * point['t'] * fixes and debug * fixed minimization * next fixes * next fixes * next fixes * cleanup * xlsx * xlsx * also xlsx * fixes * next fixes * minor --- lingvodoc/schema/gql_cognate.py | 2 +- lingvodoc/schema/query.py | 2 +- lingvodoc/views/v2/jitter.py | 385 ++++++++++++++++++++++++++++++++ lingvodoc/views/v2/phonology.py | 159 ++++++++----- server-requirements-final.txt | 2 +- 5 files changed, 487 insertions(+), 63 deletions(-) create mode 100644 lingvodoc/views/v2/jitter.py diff --git a/lingvodoc/schema/gql_cognate.py b/lingvodoc/schema/gql_cognate.py index 8945a45b..0680ac16 100644 --- a/lingvodoc/schema/gql_cognate.py +++ b/lingvodoc/schema/gql_cognate.py @@ -1964,7 +1964,7 @@ def f(): for tier_result in tier_result_list: - for (interval_str, interval_r_length, p_mean, i_list, f_list, + for (interval_str, interval_r_length, j_local, p_mean, i_list, f_list, sign_longest, sign_highest, source_index) in tier_result.interval_data_list: interval_str_list = interval_str.split() diff --git a/lingvodoc/schema/query.py b/lingvodoc/schema/query.py index 9f9859ef..f5eaddb3 100644 --- a/lingvodoc/schema/query.py +++ b/lingvodoc/schema/query.py @@ -5181,7 +5181,7 @@ def perform_phonological_statistical_distance( # ...for all intervals. - for (interval_str, interval_r_length, p_mean, i_list, f_list, + for (interval_str, interval_r_length, j_local, p_mean, i_list, f_list, sign_longest, sign_highest, source_index) in tier_result.interval_data_list: formant_list.append(tuple(map(float, f_list[:2]))) diff --git a/lingvodoc/views/v2/jitter.py b/lingvodoc/views/v2/jitter.py new file mode 100644 index 00000000..012af57e --- /dev/null +++ b/lingvodoc/views/v2/jitter.py @@ -0,0 +1,385 @@ +import bisect +import math +import numpy as np +from scipy.interpolate import CubicSpline, interp1d +from pdb import set_trace as A + +voiced_floor = 50 +voiced_ceiling = 800 +pmin = 0.8 / voiced_ceiling +pmax = 1.25 / voiced_floor +maximumPeriodFactor = 1.3 + + +def is_period(pulse, ileft): + """ + This function answers the question: is the interval from point 'ileft' to point 'ileft+1' a period? + """ + iright = ileft + 1 + + # Period condition 1: both 'ileft' and 'iright' have to be within the point process. + if ileft < 0 or iright >= pulse['nt']: + return False + + # Period condition 2: the interval has to be within the boundaries, if specified. + if pmin == pmax: # special input setting (typically both zero) + return True # all intervals count as periods, irrespective of absolute size and relative size + + interval = pulse['t'][iright] - pulse['t'][ileft] + if interval <= 0.0 or interval < pmin or interval > pmax: + return False + + if maximumPeriodFactor is None or maximumPeriodFactor < 1.0: + return True + + # Period condition 3: the interval cannot be too different from both of its neighbours, if any. + if ileft <= 0: + previousInterval = None + else: + previousInterval = pulse['t'][ileft] - pulse['t'][ileft - 1] + + if iright >= pulse['nt'] - 1: + nextInterval = None + else: + nextInterval = pulse['t'][iright + 1] - pulse['t'][iright] + + if previousInterval is None or previousInterval <= 0.0: + previousIntervalFactor = None + else: + previousIntervalFactor = interval / previousInterval + + if nextInterval is None or nextInterval <= 0.0: + nextIntervalFactor = None + else: + nextIntervalFactor = interval / nextInterval + + if previousIntervalFactor is None and nextIntervalFactor is None: + return True # no neighbours: this is a period + + if previousIntervalFactor is not None and 0.0 < previousIntervalFactor < 1.0: + previousIntervalFactor = 1.0 / previousIntervalFactor + + if nextIntervalFactor is not None and 0.0 < nextIntervalFactor < 1.0: + nextIntervalFactor = 1.0 / nextIntervalFactor + + if (previousIntervalFactor is not None and previousIntervalFactor > maximumPeriodFactor and + nextIntervalFactor is not None and nextIntervalFactor > maximumPeriodFactor): + return False + + return True + + +def unidirectional_autowindow(pulse, tmin, tmax): + if tmin >= tmax: + tmin = pulse['xmin'] + tmax = pulse['xmax'] + return tmin, tmax + + +def get_mean_period(pulse, tmin, tmax): + tmin, tmax = unidirectional_autowindow(pulse, tmin, tmax) + left, right = bisect.bisect(pulse['t'], tmin), bisect.bisect(pulse['t'], tmax) + first, last = left, right - bool(right) # decrease right if it is not zero + numberOfPeriods = 0 + dsum = 0.0 + for ipoint in range(first, last): + if is_period(pulse, ipoint): + numberOfPeriods += 1 + dsum += pulse['t'][ipoint + 1] - pulse['t'][ipoint] + return dsum / numberOfPeriods if numberOfPeriods > 0 else None + + +def get_jitter_local(pulse, tmin, tmax): + tmin, tmax = unidirectional_autowindow(pulse, tmin, tmax) + left, right = bisect.bisect(pulse['t'], tmin), bisect.bisect(pulse['t'], tmax) + first, last = left, right - bool(right) # decrease right if it is not zero + numberOfPeriods = max(0, last - first) + if numberOfPeriods < 2: + return None + dsum = 0.0 + for i in range(first + 1, last): + p1 = pulse['t'][i] - pulse['t'][i - 1] + p2 = pulse['t'][i + 1] - pulse['t'][i] + intervalFactor = p1 / p2 if p1 > p2 else p2 / p1 + if pmin == pmax or (pmin <= p1 <= pmax and pmin <= p2 <= pmax and intervalFactor <= maximumPeriodFactor): + dsum += abs(p1 - p2) + else: + numberOfPeriods -= 1 + if numberOfPeriods < 2: + return None + return (dsum / (numberOfPeriods - 1) / + get_mean_period(pulse, tmin, tmax)) + + +def sampled_index_to_x(me, index): + # Index starts from zero + return me['x1'] + index * me['dx'] + + +def x_to_sampled_index(me, x, to_int=None): + # Index starts from zero + index = (x - me['x1']) / me['dx'] + if to_int == 'nearest': + return round(index) + elif to_int == 'low': + return math.floor(index) + elif to_int == 'high': + return math.ceil(index) + else: + return index + + +def find_extremum_3(channel1_base, channel2_base, d, n, include_maxima, include_minima): + channel1 = channel1_base[d:] + channel2 = channel2_base[d:] if channel2_base is not None else None + include_all = (include_maxima == include_minima) + imin = imax = 0 + + if n < 2: + if n <= 0: + return None + else: + x1 = (channel1[0] + channel2[0]) / 2 if channel2 is not None else channel1[0] + x2 = (channel1[1] + channel2[1]) / 2 if channel2 is not None else channel1[1] + xleft = abs(x1) if include_all else x1 if include_maxima else -x1 + xright = abs(x2) if include_all else x2 if include_maxima else -x2 + return 0.0 if xleft > xright else 1.0 if xleft < xright else 0.5 + + minimum = maximum = (channel1[0] + channel2[0]) / 2 if channel2 is not None else channel1[0] + for i in range(1, n): + value = (channel1[i] + channel2[i]) / 2 if channel2 is not None else channel1[i] + if value < minimum: + minimum = value + imin = i + if value > maximum: + maximum = value + imax = i + + if minimum == maximum: + return 0.5 * n # +1? + + if include_all: + if abs(minimum) > abs(maximum): + iextr = imin + else: + iextr = imax + else: + if include_maxima: + iextr = imax + else: + iextr = imin + + if iextr == 0 or iextr == n - 1: + return iextr + + value_mid = (channel1[iextr] + channel2[iextr]) / 2 if channel2 is not None else channel1[iextr] + value_left = (channel1[iextr - 1] + channel2[iextr - 1]) / 2 if channel2 is not None else channel1[iextr - 1] + value_right = (channel1[iextr + 1] + channel2[iextr + 1]) / 2 if channel2 is not None else channel1[iextr + 1] + return iextr + 0.5 * (value_right - value_left) / (2 * value_mid - value_left - value_right) + + +def sound_find_extremum(sound, tmin, tmax, include_maxima, include_minima): + assert tmin is not None + assert tmax is not None + imin = max(0, x_to_sampled_index(sound, tmin, 'low')) + imax = min(x_to_sampled_index(sound, tmax, 'high'), sound['nx'] - 1) + iextremum = find_extremum_3(sound['z'][0], sound['z'][1] if sound['ny'] > 1 else None, imin, imax - imin, + include_maxima, include_minima) + if iextremum is not None: + # Indexes 'imin' and 'iextremum' start from zero + return sound['x1'] + (imin + iextremum) * sound['dx'] + else: + return 0.5 * (tmin + tmax) + + +def find_maximum_correlation(sound, t1, windowLength, tmin2, tmax2): + maximumCorrelation = -1.0 # smart 'impossible' starting value + r1_best = r3_best = ir = None # assignments not necessary, but extra safe + r1 = r2 = r3 = 0.0 + halfWindowLength = 0.5 * windowLength + ileft1 = x_to_sampled_index(sound, t1 - halfWindowLength, 'nearest') + iright1 = x_to_sampled_index(sound, t1 + halfWindowLength, 'nearest') + ileft2min = x_to_sampled_index(sound, tmin2 - halfWindowLength, 'low') + ileft2max = x_to_sampled_index(sound, tmax2 - halfWindowLength, 'high') + peak = 0.0 # default + tout = t1 # default + assert ileft2max >= ileft2min # if the loop is never executed, the result will be garbage + for ileft2 in range(ileft2min, ileft2max + 1): + norm1 = norm2 = product = 0.0 + localPeak = 0.0 + for ichan in range(sound['ny']): + i2 = ileft2 + for i1 in range(ileft1, iright1 + 1): + if i1 < 0 or i1 >= sound['nx'] or i2 < 0 or i2 >= sound['nx']: + continue + amp1, amp2 = sound['z'][ichan][i1], sound['z'][ichan][i2] + norm1 += amp1 ** 2 + norm2 += amp2 ** 2 + product += amp1 * amp2 + localPeak = max(localPeak, abs(amp2)) + i2 += 1 + + r1, r2, r3 = r2, r3, 0.0 if product == 0.0 else product / np.sqrt(norm1 * norm2) + if r2 > maximumCorrelation and r2 >= r1 and r2 >= r3: + r1_best, maximumCorrelation, r3_best, ir = r1, r2, r3, ileft2 - 1 + peak = localPeak + + if maximumCorrelation > -1.0: # was maximumCorrelation ever assigned to?... + # ...then r1_best and r3_best and ir must also have been assigned to: + assert r1_best is not None and r3_best is not None and ir is not None + d2r = 2 * maximumCorrelation - r1_best - r3_best + if d2r != 0.0: + dr = 0.5 * (r3_best - r1_best) + maximumCorrelation += 0.5 * dr * dr / d2r + ir += dr / d2r + tout = t1 + (ir - ileft1) * sound['dx'] + return maximumCorrelation, peak, tout + + +def pitch_to_point(sound, pitch): + try: + ''' + # Debug + num_to_erase = int((1.06 - pitch['x1']) // pitch['dx']) + pitch['frames'][num_to_erase]['candidates'][0]['frequency'] = 0.0 + pitch['frames'][num_to_erase]['candidates'][0]['strength'] = 0.0 + ''' + + point = { + 'nt': 0, + 't': [] + } + t = pitch['xmin'] + added_right = -1e308 + global_peak = np.max(np.abs(sound['z'])) # with interpolation? + + # get_value_at_time = CubicSpline( + get_value_at_time = interp1d( + [pitch['x1'] + pitch['dx'] * n for n in range(pitch['nx'])], + [frame['candidates'][0]['frequency'] for frame in pitch['frames']], + fill_value="extrapolate") + + # Cycle over all voiced intervals + edges = [0, 0] + while get_voiced_interval_after(pitch, t, edges): + t_left, t_right = edges + assert t_right > t + + # Go to the middle of the voice stretch + t_middle = (t_left + t_right) / 2 + f0_middle = get_value_at_time(t_middle) + + # Our first point is near this middle + if f0_middle is None: + raise ValueError( + f"Sound_Pitch_to_PointProcess_cc: tleft {t_left}, tright {t_right}, f0middle {f0_middle}") + + t_max = sound_find_extremum( + sound, + t_middle - 0.5 / f0_middle, + t_middle + 0.5 / f0_middle, + True, True) + + assert t_max is not None + point['t'].append(t_max) + + t_save = t_max + while True: + f0 = get_value_at_time(t_max) + if f0 is None: + break + correlation, peak, t_max = find_maximum_correlation( + sound, + t_max, + 1.0 / f0, + t_max - 1.25 / f0, + t_max - 0.8 / f0) + if correlation == -1.0: + t_max -= 1.0 / f0 + if t_max < t_left: + if correlation > 0.7 and peak > 0.023333 * global_peak and t_max - added_right > 0.8 / f0: + point['t'].append(t_max) + break + if correlation > 0.3 and (peak == 0.0 or peak > 0.01 * global_peak): + if t_max - added_right > 0.8 / f0: + point['t'].append(t_max) + + t_max = t_save + while True: + f0 = get_value_at_time(t_max) + if f0 is None: + break + correlation, peak, t_max = find_maximum_correlation( + sound, + t_max, + 1.0 / f0, + t_max + 0.8 / f0, + t_max + 1.25 / f0) + if correlation == -1.0: + t_max += 1.0 / f0 + if t_max > t_right: + if correlation > 0.7 and peak > 0.023333 * global_peak: + point['t'].append(t_max) + added_right = t_max + break + if correlation > 0.3 and (peak == 0.0 or peak > 0.01 * global_peak): + point['t'].append(t_max) + added_right = t_max + + t = t_right + + point['t'].sort() + point['nt'] = len(point['t']) + return point + except Exception as e: + print(e) + raise ValueError(f"{sound} & {pitch}: not converted to PointProcess (cc).") from e + + +def get_voiced_interval_after(pitch, after, edges): + # Index starts from zero + ileft = x_to_sampled_index(pitch, after, to_int='high') + if ileft >= pitch['nx']: + return False # offright + if ileft < 0: + ileft = 0 # offleft + + # Search for first voiced frame + while ileft < pitch['nx']: + if pitch['frames'][ileft]['candidates'][0]['frequency'] > 0.0: + break + ileft += 1 + if ileft >= pitch['nx']: + return False # offright + + # Search for last voiced frame + iright = ileft + while iright < pitch['nx']: + if pitch['frames'][iright]['candidates'][0]['frequency'] == 0.0: + break + iright += 1 + iright -= 1 + + ''' + # Debug + if 50 < ileft < 70: + for n in range(ileft, iright + 1): + print(f"{n + 1 :03}'th at {pitch['x1'] + pitch['dx'] * n :.4f} sec | " + f"{pitch['frames'][n]['candidates'][0]['frequency'] :08.4f} Hz | " + f"x{pitch['frames'][n]['candidates'][0]['strength'] :06.4f}") + print("-----") + ''' + + edges[0] = sampled_index_to_x(pitch, ileft) - 0.5 * pitch['dx'] # the whole frame is considered voiced + edges[1] = sampled_index_to_x(pitch, iright) + 0.5 * pitch['dx'] + + if edges[0] >= pitch['xmax'] - 0.5 * pitch['dx']: + return False + + edges[0] = max(edges[0], pitch['xmin']) + edges[1] = min(edges[1], pitch['xmax']) + + if edges[1] <= after: + return False + + return True diff --git a/lingvodoc/views/v2/phonology.py b/lingvodoc/views/v2/phonology.py index 35794aaf..e583f410 100644 --- a/lingvodoc/views/v2/phonology.py +++ b/lingvodoc/views/v2/phonology.py @@ -66,7 +66,7 @@ import scipy.linalg from scipy.interpolate import CubicSpline -from scipy.optimize import fmin +from scipy.optimize import minimize_scalar from sqlalchemy import and_, create_engine, func, tuple_ from sqlalchemy.orm import aliased @@ -96,6 +96,7 @@ from lingvodoc.queue.celery import celery from lingvodoc.utils import sanitize_worksheet_name from lingvodoc.views.v2.utils import anonymous_userid, as_storage_file, message, storage_file, unimplemented +from lingvodoc.views.v2.jitter import pitch_to_point, get_jitter_local from pdb import set_trace as A @@ -441,7 +442,7 @@ def sound_into_pitch_frame( ac, r, imax, localMean, x1, dx, nx, ny, z, **rest): - leftSample = (t - x1) // dx # +1? + leftSample = (t - x1) // dx rightSample = leftSample + 1 for channel in range(ny): @@ -480,11 +481,18 @@ def sound_into_pitch_frame( endSample = int(min(nsamp_window, halfnsamp_window + halfnsamp_period)) for channel in range(ny): - for j in range(startSample, endSample + 1): + for j in range(startSample, endSample): value = math.fabs(frame[channel][j]) if value > localPeak: localPeak = value + ''' + Shortcut: absolute silence is always voiceless. + We are done for this frame. + ''' + if localPeak == 0.0: + return + pitchFrame['intensity'] = 1.0 if localPeak > globalPeak else localPeak / globalPeak ''' @@ -514,22 +522,24 @@ def sound_into_pitch_frame( r[-i] = r[i] = ac[i] / (ac[0] * windowR[i]) ''' - import matplotlib.pyplot as plt - plt.plot(numpy.arange(len(r)), r) - plt.savefig('r.png') + Find the strongest maxima of the correlation of this frame, + and register them as candidates. ''' - ''' - Shortcut: absolute silence is always voiceless. - We are done for this frame. - ''' - if localPeak == 0.0: - return + offset = - brent_ixmax - 1 + + # Use cubic spline to interpolete discrete values and get function for exact argument + r_offset_spline_func = CubicSpline(numpy.arange(brent_ixmax - offset), + list(r[offset + 1:]) + list(r[:- offset])) + def inverted_spline(x): + return (-r_offset_spline_func(x)) ''' - Find the strongest maxima of the correlation of this frame, - and register them as candidates. + x = numpy.arange(0, brent_ixmax - offset, 0.5) + pyplot.plot(x, r_offset_spline_func(x)) + pyplot.savefig('spline.png') ''' + imax[0] = 0 for i in range(2, min(maximumLag, brent_ixmax)): if r[i] > 0.5 * voicingThreshold and r[i] > r[i-1] and r[i] >= r[i+1]: # maximum? @@ -541,11 +551,6 @@ def sound_into_pitch_frame( dr = 0.5 * (r[i+1] - r[i-1]) d2r = 2.0 * r[i] - r[i-1] - r[i+1] frequencyOfMaximum = 1.0 / dx / (i + dr / d2r) - offset = - brent_ixmax - 1 - - # Use cubic spline to interpolete discrete values and get function for exact argument - r_offset_spline_func = CubicSpline(numpy.arange(brent_ixmax - offset), - list(r[ offset + 1: ]) + list(r[ :- offset ])) strengthOfMaximum = float(r_offset_spline_func(1.0 / dx / frequencyOfMaximum - offset)) ''' @@ -592,12 +597,12 @@ def sound_into_pitch_frame( Second pass: for extra precision, maximize cubic spline interpolation. ''' for i in range(1, pitchFrame['nCandidates']): - offset = -brent_ixmax - 1 # Get improved x and y of function maximum after cubic spline interpolation - xmid = fmin(lambda x: (- r_offset_spline_func(x)), imax[i] - offset, disp=False)[0] + x = imax[i] - offset + xmid = minimize_scalar(inverted_spline, bounds=(x - 1, x + 1), method='bounded').x ymid = float(r_offset_spline_func(xmid)) xmid += offset - pitchFrame['candidates'][i]['frequency'] = 1.0 / dx / xmid + pitchFrame['candidates'][i]['frequency'] = 1.0 / dx / xmid - 1.0 # -1.0 is an empirique delta due to used methods if ymid > 1.0: ymid = 1.0 / ymid pitchFrame['candidates'][i]['strength'] = ymid @@ -1570,19 +1575,19 @@ def get_pitch(self, begin=0, end=None): for frame in range(nx): z[channel].append(plain_z[ny * frame + channel]) # signal - minimumPitch = 75 - maximumPitch = 600 + minimumPitch = 50 # 75? + maximumPitch = 800 # 600? periodsPerWindow = 3.0 oversampling = 4 - dt = 0.015 #periodsPerWindow / minimumPitch / oversampling + dt = periodsPerWindow / minimumPitch / oversampling # 0.015 assert minimumPitch >= periodsPerWindow / duration, \ f"To analyse this Sound, 'pitch floor' must not be less than {periodsPerWindow / duration} Hz." maximumPitch = min(0.5 / dx, maximumPitch) maxnCandidates = 15 - silenceThreshold = 0.03 - voicingThreshold = 0.45 - octaveCost = 0.01 + silenceThreshold = 0.09 # 0.03? + voicingThreshold = 0.5 # 0.45? + octaveCost = 0.055 # 0.01? octaveJumpCost = 0.35 voicedUnvoicedCost = 0.14 @@ -1623,8 +1628,8 @@ def get_pitch(self, begin=0, end=None): # Create the resulting pitch contour. thee = { - #'xmin': x1, - #'xmax': x1 + duration, + 'xmin': x1, + 'xmax': x1 + duration, 'nx': numberOfFrames, 'dx': dt, 'x1': t1, @@ -1767,7 +1772,8 @@ def get_pitch(self, begin=0, end=None): [frame['candidates'][1]['frequency'] for frame in thee['frames']]) pyplot.savefig('freq.png') ''' - return thee + return sound, thee + def find_max_interval_praat(sound, interval_list): """ @@ -2172,12 +2178,14 @@ def __init__(self, mean_interval_length, max_length_str, max_length_r_length, + max_length_jt_local, max_length_p_mean, max_length_i_list, max_length_f_list, max_length_source_index, max_intensity_str, max_intensity_r_length, + max_intensity_jt_local, max_intensity_p_mean, max_intensity_i_list, max_intensity_f_list, @@ -2193,6 +2201,7 @@ def __init__(self, self.max_length_str = max_length_str self.max_length_r_length = max_length_r_length + self.max_length_jt_local = max_length_jt_local self.max_length_p_mean = max_length_p_mean self.max_length_i_list = max_length_i_list self.max_length_f_list = max_length_f_list @@ -2200,6 +2209,7 @@ def __init__(self, self.max_intensity_str = max_intensity_str self.max_intensity_r_length = max_intensity_r_length + self.max_intensity_jt_local = max_intensity_jt_local self.max_intensity_p_mean = max_intensity_p_mean self.max_intensity_i_list = max_intensity_i_list self.max_intensity_f_list = max_intensity_f_list @@ -2220,11 +2230,12 @@ def format(self): ([interval_str, '{0:.2f}%'.format(r_length * 100), is_max_length, is_max_intensity, source_index], + j_local, p_mean, i_list, f_list) - for interval_str, r_length, p_mean, i_list, f_list, is_max_length, is_max_intensity, source_index in + for interval_str, r_length, j_local, p_mean, i_list, f_list, is_max_length, is_max_intensity, source_index in self.interval_data_list] return pprint.pformat( @@ -2321,9 +2332,10 @@ def process_sound(tier_data_list, sound, vowel_selection = None): max_length_interval = interval_list[max_length_index] # Pitches are calculated for the whole sound content at once. - pitch_list = sound.get_pitch() + sound_dict, pitch_dict = sound.get_pitch() + pulse = pitch_to_point(sound_dict, pitch_dict) - # Getting desired intervals and mean values within them from the obtained 'pitch_list'. + # Getting desired intervals and mean values within them from the obtained 'pitch_dict'. with open('pitch.log', 'a') as f: cur_frame = 0 @@ -2331,12 +2343,12 @@ def process_sound(tier_data_list, sound, vowel_selection = None): xl_p_mean = xi_p_mean = 0 for begin_sec, end_sec, text in interval_list: sum_fq = num_fq = 0 - for iframe in range(cur_frame, pitch_list['nx']): - point = pitch_list['x1'] + pitch_list['dx'] * iframe + for iframe in range(cur_frame, pitch_dict['nx']): + point = pitch_dict['x1'] + pitch_dict['dx'] * iframe if point <= begin_sec: continue elif begin_sec < point < end_sec: - freq = pitch_list['frames'][iframe]['candidates'][0]['frequency'] + freq = pitch_dict['frames'][iframe]['candidates'][0]['frequency'] print(f"'{text}' | {point:.3f} sec | {freq:.3f} Hz", file=f) sum_fq += freq num_fq += (freq > 0) @@ -2360,10 +2372,12 @@ def process_sound(tier_data_list, sound, vowel_selection = None): max_length_i_list = [] max_intensity_f_list = [] max_length_f_list = [] + max_intensity_jt_local = 0 + max_length_jt_local = 0 interval_data_list = [] if vowel_selection is None or vowel_selection == True: - #print('Calculating max_intensity_ and max_length_ lists..') + print('Calculating max_intensity_ and max_length_ lists..') max_intensity_i_list = ( sound.get_interval_intensity(*max_intensity_interval[:2])) @@ -2377,8 +2391,14 @@ def process_sound(tier_data_list, sound, vowel_selection = None): max_length_f_list = ( sound.get_interval_formants(*max_length_interval[:2])) + max_intensity_jt_local = ( + get_jitter_local(pulse, *max_intensity_interval[:2])[0]) + + max_length_jt_local = ( + get_jitter_local(pulse, *max_length_interval[:2])[0]) + if vowel_selection is None or vowel_selection == False: - #print('Calculating lists for all intervals...') + print('Calculating lists for all intervals...') intensity_list = [ sound.get_interval_intensity(begin_sec, end_sec) @@ -2388,6 +2408,12 @@ def process_sound(tier_data_list, sound, vowel_selection = None): sound.get_interval_formants(begin_sec, end_sec) for begin_sec, end_sec, text in interval_list] + jitter_list = [ + get_jitter_local(pulse, begin_sec, end_sec) + for begin_sec, end_sec, text in interval_list] + + log.debug(f"jitter_list: {jitter_list}") + # Preparing data of all other intervals. str_list = [ @@ -2411,6 +2437,7 @@ def process_sound(tier_data_list, sound, vowel_selection = None): (interval_str, (end - begin) / mean_interval_length, + f'{j_local:.3f}', f'{p_mean:.3f}', [f'{i_min:.3f}', f'{i_max:.3f}', f'{i_max - i_min:.3f}'], list(map('{0:.3f}'.format, f_list)), @@ -2421,6 +2448,7 @@ def process_sound(tier_data_list, sound, vowel_selection = None): for ( index, (interval_str, + j_local, p_mean, (_, i_min, i_max), f_list, @@ -2430,6 +2458,7 @@ def process_sound(tier_data_list, sound, vowel_selection = None): enumerate( zip( str_list, + jitter_list, pitch_means, intensity_list, formant_list, @@ -2463,12 +2492,14 @@ def process_sound(tier_data_list, sound, vowel_selection = None): mean_interval_length, max_length_str, max_length / mean_interval_length, + f'{max_length_jt_local:.3f}', f'{xl_p_mean:.3f}', list(map('{0:.3f}'.format, max_length_i_list)), list(map('{0:.3f}'.format, max_length_f_list)), max_length_source_index, max_intensity_str, (max_intensity_interval[1] - max_intensity_interval[0]) / mean_interval_length, + f'{max_intensity_jt_local:.3f}', f'{xi_p_mean:.3f}', list(map('{0:.3f}'.format, max_intensity_i_list)), list(map('{0:.3f}'.format, max_intensity_f_list)), @@ -2848,7 +2879,7 @@ def chart_data(f_2d_tt_list, f_3d_tt_list): if len(filtered_3d_list) < (len(distance_3d_list) + 1) // 2: sorted_list = [ - + (f_3d, tt) for distance_squared, f_3d, tt in distance_3d_list] @@ -2926,7 +2957,7 @@ def chart_definition_list( chart_data_2d_list[i] = list(chart_data_2d_list[i]) chart_data_2d_list[i][4] = ( - + list( filter( lambda f_2d_tt: @@ -3153,6 +3184,7 @@ def compile_workbook( 'Longest (seconds) interval', 'Relative length', + 'Jitter local', 'Pitch mean (Hz)', 'Intensity minimum (dB)', 'Intensity maximum (dB)', 'Intensity range (dB)', 'F1 mean (Hz)', 'F2 mean (Hz)', 'F3 mean (Hz)', @@ -3160,6 +3192,7 @@ def compile_workbook( 'Highest intensity (dB) interval', 'Relative length', + 'Jitter local', 'Pitch mean (Hz)', 'Intensity minimum (dB)', 'Intensity maximum (dB)', 'Intensity range (dB)', 'F1 mean (Hz)', 'F2 mean (Hz)', 'F3 mean (Hz)', @@ -3173,6 +3206,7 @@ def compile_workbook( 'Interval', 'Relative length', + 'Jitter local', 'Pitch mean (Hz)', 'Intensity minimum (dB)', 'Intensity maximum (dB)', 'Intensity range (dB)', 'F1 mean (Hz)', 'F2 mean (Hz)', 'F3 mean (Hz)', @@ -3197,23 +3231,25 @@ def compile_workbook( if args.vowel_selection: worksheet_results.set_column(0, 2, 20) - worksheet_results.set_column(3, 3, 8, format_percent) - worksheet_results.set_column(4, 7, 8) - worksheet_results.set_column(8, 10, 10) - worksheet_results.set_column(11, 11, 4) - worksheet_results.set_column(12, 12, 20) - worksheet_results.set_column(13, 13, 8, format_percent) - worksheet_results.set_column(14, 17, 8) - worksheet_results.set_column(18, 20, 10) - worksheet_results.set_column(21, 22, 4) + worksheet_results.set_column(3, 4, 8, format_percent) + worksheet_results.set_column(5, 8, 8) + worksheet_results.set_column(9, 11, 10) + worksheet_results.set_column(12, 12, 4) + worksheet_results.set_column(13, 13, 20) + worksheet_results.set_column(14, 14, 8, format_percent) + worksheet_results.set_column(15, 18, 8) + worksheet_results.set_column(19, 21, 10) + worksheet_results.set_column(22, 23, 4) + worksheet_results.set_column(24, 24, 8, format_percent) else: worksheet_results.set_column(0, 2, 20) - worksheet_results.set_column(3, 3, 8, format_percent) - worksheet_results.set_column(4, 7, 8) - worksheet_results.set_column(8, 10, 10) - worksheet_results.set_column(11, 13, 4) + worksheet_results.set_column(3, 4, 8, format_percent) + worksheet_results.set_column(5, 8, 8) + worksheet_results.set_column(9, 11, 10) + worksheet_results.set_column(12, 14, 4) + worksheet_results.set_column(15, 15, 8, format_percent) worksheet_dict[group] = ( @@ -3341,6 +3377,7 @@ def next_text(): ' '.join([vowel_a] + text_a_list[1:]), round(tier_result.max_length_r_length, 4)] + + [ float(tier_result.max_length_jt_local)] + [ float(tier_result.max_length_p_mean) ] + i_list_a + f_list_a + @@ -3349,6 +3386,7 @@ def next_text(): ' '.join([vowel_b] + text_b_list[1:]), round(tier_result.max_intensity_r_length, 4)] + + [ float(tier_result.max_intensity_jt_local)] + [ float(tier_result.max_intensity_p_mean) ] + i_list_b + f_list_b + @@ -3399,7 +3437,7 @@ def next_text(): else: for index, (interval_str, interval_r_length, - p_mean, i_list, f_list, sign_longest, sign_highest, source_index) in ( + j_local, p_mean, i_list, f_list, sign_longest, sign_highest, source_index) in ( enumerate(tier_result.interval_data_list)): @@ -3421,6 +3459,7 @@ def next_text(): ' '.join([vowel] + interval_str.split()[1:]), round(interval_r_length, 4)] + + [float(j_local)] + [float(p_mean)] + i_list + f_list + @@ -3606,7 +3645,7 @@ def next_text(): reverse = True) chart_dict_list, table_2d_row_index = ( - + chart_definition_list( chart_data_2d_list, worksheet_table_2d, min_2d_f1, max_2d_f1, min_2d_f2, max_2d_f2, @@ -3704,7 +3743,7 @@ def next_text(): list(chart_data_3d_list[i])) chart_data_3d_list[i][4] = ( - + list( filter( lambda f_3d_tt: @@ -3749,7 +3788,7 @@ def next_text(): xc_outlier_list, xl_outlier_list = ( zip(*tt_list)) - + f1_column = column_list[index * 5] f2_column = column_list[index * 5 + 1] f3_column = column_list[index * 5 + 2] @@ -4210,7 +4249,7 @@ def get_from_request(self, request): perspective_id_list = [ tuple(map(int, perspective_str.split(','))) for perspective_str in perspective_list_str.split('|')] - + self.link_field_list.append((field_id, perspective_id_list)) self.link_perspective_list = [] @@ -5236,7 +5275,7 @@ def result_filter(textgrid_result_list): else: # Ok, we don't yet have a text field, but maybe we have an identifier to find the field by? - + if perspective_id in args.link_perspective_dict: text_field_id = args.link_perspective_dict[perspective_id] diff --git a/server-requirements-final.txt b/server-requirements-final.txt index c0a3c5f6..6d9f8d7f 100644 --- a/server-requirements-final.txt +++ b/server-requirements-final.txt @@ -1,3 +1,3 @@ matplotlib==1.5.3 -# for win10 with python-8.10 +# for win10 with python-3.10 #matplotlib==2.2.5 \ No newline at end of file