From f9c1f8a164d783a15bef20353fcdb2538c7670c3 Mon Sep 17 00:00:00 2001
From: vmonakhov <vmonakhov@ispras.ru>
Date: Mon, 1 Apr 2024 15:02:43 +0300
Subject: [PATCH] Pitch phonology --
 https://github.com/ispras/lingvodoc-react/issues/1102 (#1501)

* next

* fix

* next

* next1

* next

* refactoring

* next steps

* next steps

* fixes

* next steps

* some final

* cleanup

* fixes

* hotfix

* hotfix

* minor

* cubic spline interpolation

* fix

* debug

* debug

* debug and fixes

* fixes

* Fixes

* Debug

* Normalized signal

* Plot windowR

* fixes

* more fixes

* much more

* fft autocorrelation maybe like this

* debug

* most correct

* get means for xlsx

* move field to the left

* no vowel_selection

* cleanup

* xi and xl

* xi and xl in xlsx

* fix

* cleanup

* minor

---------

Co-authored-by: Ivan Beloborodov <ivbel@ispras.ru>
---
 lingvodoc/views/v2/phonology.py | 614 +++++++++++++++++++++++++++++++-
 server-requirements-final.txt   |   2 +
 2 files changed, 599 insertions(+), 17 deletions(-)

diff --git a/lingvodoc/views/v2/phonology.py b/lingvodoc/views/v2/phonology.py
index 6ac3dcb6..faac179c 100644
--- a/lingvodoc/views/v2/phonology.py
+++ b/lingvodoc/views/v2/phonology.py
@@ -16,6 +16,7 @@
 
 import os
 from os import makedirs, path
+import multiprocessing
 
 import pprint
 import re
@@ -64,6 +65,8 @@
 from pyramid.view import view_config
 
 import scipy.linalg
+from scipy.interpolate import CubicSpline
+from scipy.optimize import fmin
 
 from sqlalchemy import and_, create_engine, func, tuple_
 from sqlalchemy.orm import aliased
@@ -94,6 +97,7 @@
 from lingvodoc.utils import sanitize_worksheet_name
 from lingvodoc.views.v2.utils import anonymous_userid, as_storage_file, message, storage_file, unimplemented
 
+from pdb import set_trace as A
 
 # Setting up logging.
 log = logging.getLogger(__name__)
@@ -325,6 +329,289 @@ def evaluate(c_list, value):
     return formant_list
 
 
+def pitch_path_finder(
+        silenceThreshold,
+        voicingThreshold,
+        octaveCost,
+        octaveJumpCost,
+        voicedUnvoicedCost,
+        floor,
+        ceiling,
+        frames,
+        dx, nx,
+        **rest):
+    try:
+
+        maxnCandidates = max(frame['nCandidates'] for frame in frames)
+
+        timeStepCorrection = 0.01 / dx
+        octaveJumpCost *= timeStepCorrection
+        voicedUnvoicedCost *= timeStepCorrection
+
+        delta = numpy.zeros((nx, maxnCandidates), dtype=float)
+        psi = numpy.zeros((nx, maxnCandidates), dtype=int)
+
+        for iframe in range(nx):
+            frame = frames[iframe]
+            unvoicedStrength = (
+                0.0 if silenceThreshold <= 0 else 2.0 - frame['intensity'] /
+                (silenceThreshold / (1.0 + voicingThreshold))
+            )
+            unvoicedStrength = voicingThreshold + max(0.0, unvoicedStrength)
+            for icand in range(frame['nCandidates']):
+                candidate = frame['candidates'][icand]
+                voiceless = not (floor < candidate['frequency'] < ceiling)
+                delta[iframe][icand] = (
+                    unvoicedStrength if voiceless else candidate['strength'] -
+                    octaveCost * math.log2( ceiling / candidate['frequency'] )
+                )
+
+        '''
+        Look for the most probable path through the maxima.
+        There is a cost for the voiced/unvoiced transition,
+        and a cost for a frequency jump.
+        '''
+        for iframe in range(1, nx):
+            prevFrame = frames[iframe - 1]
+            curFrame = frames[iframe]
+            prevDelta = delta[iframe - 1]
+            curDelta = delta[iframe]
+            curPsi = psi[iframe]
+            for icand2 in range(curFrame['nCandidates']):
+                f2 = curFrame['candidates'][icand2]['frequency']
+                maximum = -1e30
+                place = None
+                for icand1 in range(prevFrame['nCandidates']):
+                    f1 = prevFrame['candidates'][icand1]['frequency']
+                    previousVoiceless = not (floor < f1 < ceiling)
+                    currentVoiceless = not (floor < f2 < ceiling)
+                    if currentVoiceless:
+                        if previousVoiceless:
+                            transitionCost = 0.0   # both voiceless
+                        else:
+                            transitionCost = voicedUnvoicedCost  # voiced-to-unvoiced transition
+                    else:
+                        if previousVoiceless:
+                            transitionCost = voicedUnvoicedCost  # unvoiced-to-voiced transition
+                        else:
+                            transitionCost = octaveJumpCost * math.fabs( math.log2(f1 / f2) )  # both voiced
+
+                    value = prevDelta[icand1] - transitionCost + curDelta[icand2]
+
+                    if value > maximum:
+                        maximum = value
+                        place = icand1
+
+                curDelta[icand2] = maximum
+                curPsi[icand2] = place
+
+        '''
+        Find the end of the most probable path.
+        '''
+        place = 0
+        maximum = delta[nx - 1][place]
+        for icand in range(1, frames[nx - 1]['nCandidates']):
+            if delta[nx - 1][icand] > maximum:
+                place = icand
+                maximum = delta[nx - 1][place]
+
+        '''
+        Backtracking: follow the path backwards.
+        '''
+        for iframe in range(nx - 1, -1, -1):
+            frame = frames[iframe]
+            frame['candidates'][0], frame['candidates'][place] = frame['candidates'][place], frame['candidates'][0]
+            place = psi[iframe][place]
+
+    except Exception as e:
+        print(e)
+        raise RuntimeError(frames, ": path not found.")
+
+
+# Compute pitch frames
+def sound_into_pitch_frame(
+        pitchFrame, t, pitchFloor,
+        maxnCandidates, voicingThreshold,
+        octaveCost, dt_window,
+        nsamp_window, halfnsamp_window,
+        maximumLag, nsampFFT,
+        nsamp_period, halfnsamp_period,
+        brent_ixmax, globalPeak,
+        window, windowR, frame,
+        ac, r, imax, localMean,
+        x1, dx, nx, ny, z, **rest):
+
+    leftSample = (t - x1) // dx  # +1?
+    rightSample = leftSample + 1
+
+    for channel in range(ny):
+        '''
+        Compute the local mean; look one longest period to both sides.
+        '''
+        startSample = int(rightSample - nsamp_period)
+        endSample = int(leftSample + nsamp_period)
+        assert startSample >= 0
+        assert endSample < nx
+
+        localMean[channel] = 0.0
+        for i in range(startSample, endSample + 1):
+            localMean[channel] += z[channel][i]
+        localMean[channel] /= 2 * nsamp_period
+
+        '''
+		Copy a window to a frame and subtract the local mean.
+		We are going to kill the DC component before windowing.
+        '''
+        startSample = int(rightSample - halfnsamp_window)
+        endSample = int(leftSample + halfnsamp_window)
+        assert startSample >= 0
+        assert endSample < nx
+
+        for j in range(nsamp_window):
+            frame[channel][j] = (z[channel][j + startSample] - localMean[channel]) * window[j]
+        for j in range(nsamp_window, nsampFFT):
+            frame[channel][j] = 0.0
+
+    '''
+    Compute the local peak; look half a longest period to both sides.
+    '''
+    localPeak = 0.0
+    startSample = int(max(0, halfnsamp_window - halfnsamp_period))
+    endSample = int(min(nsamp_window, halfnsamp_window + halfnsamp_period))
+
+    for channel in range(ny):
+        for j in range(startSample, endSample + 1):
+            value = math.fabs(frame[channel][j])
+            if value > localPeak:
+                localPeak = value
+
+    pitchFrame['intensity'] = 1.0 if localPeak > globalPeak else localPeak / globalPeak
+
+    '''
+    Compute the correlation into the array 'r'.
+    The FFT of the autocorrelation is the power spectrum.
+    '''
+
+    nsampRFFT = nsampFFT // 2 + 1
+
+    # Initially 'ac' consists of zeroes
+    for channel in range(ny):
+        frame_view = numpy.fft.rfft(frame[channel]).view('float')  # complex spectrum
+
+        ac[0] += frame_view[0] ** 2  # DC component
+        for i in range(2, nsampFFT, 2):
+            ac[i // 2] += frame_view[i] ** 2 + frame_view[i + 1] ** 2  # power spectrum
+        ac[nsampRFFT - 1] += frame_view[nsampFFT] ** 2  # Nyquist frequency
+
+    ac[:] = numpy.fft.irfft(ac[:nsampRFFT])  # autocorrelation
+
+    '''
+    Normalize the autocorrelation to the value with zero lag,
+	and divide it by the normalized autocorrelation of the window.
+    '''
+    r[0] = 1.0
+    for i in range(1, brent_ixmax + 1):
+        r[-i] = r[i] = ac[i] / (ac[0] * windowR[i])
+
+    '''
+    import matplotlib.pyplot as plt
+    plt.plot(numpy.arange(len(r)), r)
+    plt.savefig('r.png')
+    '''
+
+    '''
+    Shortcut: absolute silence is always voiceless.
+	We are done for this frame.
+    '''
+    if localPeak == 0.0:
+        return
+
+    '''
+	Find the strongest maxima of the correlation of this frame,
+	and register them as candidates.
+    '''
+    imax[0] = 0
+    for i in range(2, min(maximumLag, brent_ixmax)):
+        if r[i] > 0.5 * voicingThreshold and r[i] > r[i-1] and r[i] >= r[i+1]:  # maximum?
+            place = None
+            '''
+            Use parabolic interpolation for first estimate of frequency,
+			and cubic spline interpolation to compute the strength of this frequency.
+            '''
+            dr = 0.5 * (r[i+1] - r[i-1])
+            d2r = 2.0 * r[i] - r[i-1] - r[i+1]
+            frequencyOfMaximum = 1.0 / dx / (i + dr / d2r)
+            offset = - brent_ixmax - 1
+
+            # Use cubic spline to interpolete discrete values and get function for exact argument
+            r_offset_spline_func =  CubicSpline(numpy.arange(brent_ixmax - offset),
+                                                list(r[ offset + 1: ]) + list(r[ :- offset ]))
+            strengthOfMaximum = float(r_offset_spline_func(1.0 / dx / frequencyOfMaximum - offset))
+
+            '''
+            High values due to short windows are to be reflected around 1.
+            '''
+            if strengthOfMaximum > 1.0:
+                strengthOfMaximum = 1.0 / strengthOfMaximum
+
+            '''
+            Find a place for this maximum.
+            '''
+            if pitchFrame['nCandidates'] < maxnCandidates:
+                pitchFrame['nCandidates'] += 1
+                while len(pitchFrame['candidates']) < pitchFrame['nCandidates']:
+                    pitchFrame['candidates'].append(pitchFrame['candidates'][-1].copy())
+                place = pitchFrame['nCandidates'] - 1
+            else:
+                '''
+                Try the place of the weakest candidate so far.
+                '''
+                weakest = 2.0
+                for iweak in range(1, maxnCandidates):
+                    '''
+                    High frequencies are to be favoured
+					if we want to analyze a perfectly periodic signal correctly.
+                    '''
+                    localStrength = (pitchFrame['candidates'][iweak]['strength'] -
+                                     octaveCost * math.log2(pitchFloor / pitchFrame['candidates'][iweak]['frequency']))
+                    if localStrength < weakest:
+                        weakest = localStrength
+                        place = iweak
+                '''
+                If this maximum is weaker than the weakest candidate so far, give it no place.
+                '''
+                if strengthOfMaximum - octaveCost * math.log2(pitchFloor / frequencyOfMaximum) <= weakest:
+                    place = None
+
+            if place is not None:
+                pitchFrame['candidates'][place]['frequency'] = frequencyOfMaximum
+                pitchFrame['candidates'][place]['strength'] = strengthOfMaximum
+                imax[place] = i
+
+    '''
+    Second pass: for extra precision, maximize cubic spline interpolation.
+    '''
+    for i in range(1, pitchFrame['nCandidates']):
+        offset = -brent_ixmax - 1
+        # Get improved x and y of function maximum after cubic spline interpolation
+        xmid = fmin(lambda x: (- r_offset_spline_func(x)), imax[i] - offset, disp=False)[0]
+        ymid = float(r_offset_spline_func(xmid))
+        xmid += offset
+        pitchFrame['candidates'][i]['frequency'] = 1.0 / dx / xmid
+        if ymid > 1.0:
+            ymid = 1.0 / ymid
+        pitchFrame['candidates'][i]['strength'] = ymid
+
+
+def sound_into_pitch(arg):
+    def f(arg, firstFrame, lastFrame, x1, dx, frames, **rest):
+        for iframe in range(firstFrame, lastFrame):
+            sound_into_pitch_frame(pitch_frame := frames[iframe],
+                                   t := x1 + iframe * dx,
+                                   **arg, **(arg.get('sound')))
+    f(arg, **arg, **(arg.get('pitch')))
+
+
 class AudioPraatLike(object):
     """
     Allows computations of sound intensity and formants using algorithms mimicking as close as possible
@@ -1255,6 +1542,234 @@ def get_interval_formants(self, begin, end):
             sum(f1_list) / len(f1_list), sum(f2_list) / len(f2_list), sum(f3_list) / len(f3_list)]
 
 
+    def get_pitch(self, begin=0, end=None):
+        """
+        Computes pitches of an audio sample.
+        """
+
+        x1 = max(0, begin)
+
+        if end is None:
+            end = int(self.intensity_sound.frame_count())
+
+        assert end >= x1
+
+        fq = self.intensity_sound.frame_rate  # frequency of samples (sampling)
+        nx = min(math.floor((end - x1) * fq), int(self.intensity_sound.frame_count()))  # number of samples to compute
+        dx = 1 / fq  # single sample duration in sec
+        duration = nx / fq  # duration of samples to compute in sec
+
+        ny = self.intensity_sound.channels  # number of channels
+        plain_z = [ smp / self.intensity_sound.max_possible_amplitude
+                    for smp in self.intensity_sound.get_array_of_samples() ]  # samples of all the channels
+        assert nx * ny <= len(plain_z)
+
+        # lay out samples by channels
+        z = [[] for _ in range(ny)]
+        for channel in range(ny):
+            for frame in range(nx):
+                z[channel].append(plain_z[ny * frame + channel])  # signal
+
+        minimumPitch = 75
+        maximumPitch = 600
+        periodsPerWindow = 3.0
+        oversampling = 4
+        dt = 0.015 #periodsPerWindow / minimumPitch / oversampling
+        assert minimumPitch >= periodsPerWindow / duration, \
+            f"To analyse this Sound, 'pitch floor' must not be less than {periodsPerWindow / duration} Hz."
+        maximumPitch = min(0.5 / dx, maximumPitch)
+
+        maxnCandidates = 15
+        silenceThreshold = 0.03
+        voicingThreshold = 0.45
+        octaveCost = 0.01
+        octaveJumpCost = 0.35
+        voicedUnvoicedCost = 0.14
+
+        # for Gaussian
+        periodsPerWindow *= 2  # because Gaussian window is twice as long
+        #brent_depth = 4
+        interpolation_depth = 0.25  # because Gaussian window is twice as long
+
+        '''
+    	Determine the number of samples in the longest period.
+		We need this to compute the local mean of the sound (looking one period in both directions),
+		and to compute the local peak of the sound (looking half a period in both directions).
+        '''
+        nsamp_period = math.floor(1.0 / dx / minimumPitch)
+        halfnsamp_period = nsamp_period / 2 + 1
+
+        # Determine window duration in seconds and in samples.
+        dt_window = periodsPerWindow / minimumPitch
+        nsamp_window = math.floor(dt_window / dx)
+        halfnsamp_window = nsamp_window / 2 - 1
+        assert halfnsamp_window >= 2, "Analysis window too short."
+        nsamp_window = int(halfnsamp_window * 2)
+
+        #minimumLag = max(2, math.floor(1.0 / dx / maximumPitch))
+        maximumLag = min(math.floor(nsamp_window / periodsPerWindow) + 2, nsamp_window)
+
+        '''
+		Determine the number of frames.
+		Fit as many frames as possible symmetrically in the total duration.
+		We do this even for the forward cross-correlation method,
+		because that allows us to compare the two methods.
+        '''
+        assert dt_window <= duration, f"{duration} is shorter than window length"
+        numberOfFrames = math.floor((duration - dt_window) / dt) + 1
+        ourMidTime = x1 + 0.5 * (duration - dx)
+        thyDuration = numberOfFrames * dt
+        t1 = ourMidTime + 0.5 * (dt - thyDuration)
+
+        # Create the resulting pitch contour.
+        thee = {
+            #'xmin': x1,
+            #'xmax': x1 + duration,
+            'nx': numberOfFrames,
+            'dx': dt,
+            'x1': t1,
+            'ceiling': maximumPitch,
+            'frames': [{
+                'intensity': 0.0,
+                'nCandidates': 1,
+                'candidates': [{
+                    'frequency': 0.0,
+                    'strength': 0.0
+                }]
+            } for _ in range(numberOfFrames)]
+        }
+
+        # Compute the global absolute peak for determination of silence threshold.
+        globalPeak = 0.0
+        for ichan in range(ny):
+            mean = sum(z[ichan])/len(z[ichan])
+            for i in range(nx):
+                value = math.fabs(z[ichan][i] - mean)
+                if value > globalPeak:
+                    globalPeak = value
+
+        if globalPeak == 0.0:
+            return thee
+
+        '''
+        Compute the number of samples needed for doing FFT.
+        To avoid edge effects, we have to append zeroes to the window.
+        The maximum lag considered for maxima is maximumLag.
+        The maximum lag used in interpolation is nsamp_window * interpolation_depth.
+        '''
+        nsampFFT = 1
+        while nsampFFT < nsamp_window * (1 + interpolation_depth):
+            nsampFFT *= 2
+
+        window = get_gaussian_window(nsamp_window)
+        windowR = window + numpy.zeros(nsampFFT - nsamp_window).tolist()
+
+        windowR = numpy.fft.rfft(windowR)
+
+        windowR_view = windowR.view('float')
+
+        # Change input windowR according to praat algorithms
+        windowR_view[0] *= windowR_view[0]  # DC component
+        for i in range(2, nsampFFT, 2):
+            windowR_view[i] = windowR_view[i] ** 2 + windowR_view[i + 1] ** 2
+            windowR_view[i + 1] = 0.0  # power spectrum: square and zero
+        windowR_view[nsampFFT] *= windowR_view[nsampFFT]  # Nyquist frequency
+
+        windowR = numpy.fft.irfft(windowR)  # autocorrelation
+
+        for i in range(1, nsamp_window):
+            windowR[i] /= windowR[0]  # normalize
+        windowR[0] = 1.0  # normalize
+
+        '''
+        import matplotlib.pyplot as plt
+        plt.plot(numpy.arange(nsamp_window), window[:nsamp_window])
+        plt.savefig('window.png')
+        plt.clf()
+        plt.plot(numpy.arange(nsamp_window), windowR[:nsamp_window])
+        plt.savefig('windowR.png')
+        '''
+
+        brent_ixmax = math.floor(nsamp_window * interpolation_depth)
+
+        # Calculating threads number
+        numberOfFramesPerThread = 20
+        numberOfThreads = (numberOfFrames - 1) // numberOfFramesPerThread + 1
+        numberOfProcessors = multiprocessing.cpu_count()
+        log.debug(f"{numberOfProcessors} processors")
+        numberOfThreads = min(numberOfThreads, numberOfProcessors)
+        numberOfThreads = max(1, min(numberOfThreads, 16))
+        numberOfFramesPerThread = (numberOfFrames - 1) // numberOfThreads + 1
+
+        firstFrame = 0
+        lastFrame = numberOfFramesPerThread
+        #cancelled = [False]
+
+        # Gather parameters into 'sound'
+        sound = {}
+        for key in 'x1', 'dx', 'nx', 'ny', 'z':
+            sound[key] = locals()[key]
+
+        args = []
+        for ithread in range(numberOfThreads):
+            # Get the rest
+            if ithread == numberOfThreads - 1:
+                lastFrame = numberOfFrames
+
+            arg = {
+                'sound': sound,
+                'pitch': thee,
+                'firstFrame': firstFrame,
+                'lastFrame': lastFrame,
+                'pitchFloor': minimumPitch,
+                'maxnCandidates': maxnCandidates,
+                'voicingThreshold': voicingThreshold,
+                'octaveCost': octaveCost,
+                'dt_window': dt_window,
+                'nsamp_window': nsamp_window,
+                'halfnsamp_window': halfnsamp_window,
+                'maximumLag': maximumLag,
+                'nsampFFT': nsampFFT,
+                'nsamp_period': nsamp_period,
+                'halfnsamp_period': halfnsamp_period,
+                'brent_ixmax': brent_ixmax,
+                'globalPeak': globalPeak,
+                'window': window,
+                'windowR': windowR,
+                'frame': numpy.zeros((ny, nsampFFT)),
+                'ac': numpy.zeros(nsampFFT, dtype=float),
+                'imax': numpy.zeros(maxnCandidates, dtype=int),
+                'localMean': numpy.zeros(ny, dtype=float),
+                'r': numpy.zeros(brent_ixmax + 1)
+            }
+
+            # single-thread
+            sound_into_pitch(arg)
+
+            args.append(arg)
+
+            firstFrame = lastFrame
+            lastFrame += numberOfFramesPerThread
+
+        '''
+        # multi-thread
+        pool = multiprocessing.Pool(numberOfThreads)
+        pool.map(sound_into_pitch, args)
+        pool.close()
+        pool.join()
+        '''
+
+        log.debug("Sound to Pitch: path finder - 95% complete")
+        pitch_path_finder(silenceThreshold, voicingThreshold, octaveCost,
+                          octaveJumpCost, voicedUnvoicedCost, minimumPitch, **thee)
+
+        '''
+        pyplot.plot(numpy.arange(len(thee['frames'])),
+                    [frame['candidates'][1]['frequency'] for frame in thee['frames']])
+        pyplot.savefig('freq.png')
+        '''
+        return thee
+
 def find_max_interval_praat(sound, interval_list):
     """
     Given a sound recording and a list of intervals specified by boundaries in seconds, returns index of the
@@ -1658,11 +2173,13 @@ def __init__(self,
         mean_interval_length,
         max_length_str,
         max_length_r_length,
+        max_length_p_mean,
         max_length_i_list,
         max_length_f_list,
         max_length_source_index,
         max_intensity_str,
         max_intensity_r_length,
+        max_intensity_p_mean,
         max_intensity_i_list,
         max_intensity_f_list,
         max_intensity_source_index,
@@ -1677,12 +2194,14 @@ def __init__(self,
 
         self.max_length_str = max_length_str
         self.max_length_r_length = max_length_r_length
+        self.max_length_p_mean = max_length_p_mean
         self.max_length_i_list = max_length_i_list
         self.max_length_f_list = max_length_f_list
         self.max_length_source_index = max_length_source_index
 
         self.max_intensity_str = max_intensity_str
         self.max_intensity_r_length = max_intensity_r_length
+        self.max_intensity_p_mean = max_intensity_p_mean
         self.max_intensity_i_list = max_intensity_i_list
         self.max_intensity_f_list = max_intensity_f_list
         self.max_intensity_source_index = max_intensity_source_index
@@ -1702,10 +2221,11 @@ def format(self):
             ([interval_str,
                 '{0:.2f}%'.format(r_length * 100),
                 is_max_length, is_max_intensity, source_index],
+                p_mean,
                 i_list,
                 f_list)
 
-                for interval_str, r_length, i_list, f_list, is_max_length, is_max_intensity, source_index in
+                for interval_str, r_length, p_mean, i_list, f_list, is_max_length, is_max_intensity, source_index in
                     self.interval_data_list]
 
         return pprint.pformat(
@@ -1810,6 +2330,41 @@ def process_sound(tier_data_list, sound):
                 sound.get_interval_formants(begin_sec, end_sec)
                     for begin_sec, end_sec, text in interval_list]
 
+            pitch_list = sound.get_pitch()
+
+            # Getting desired intervals and mean values within them from the obtained 'pitch_list'.
+            with open('pitch.log', 'a') as f:
+
+                cur_frame = 0
+                pitch_means = list()
+                xl_p_mean = xi_p_mean = 0
+                for begin_sec, end_sec, text in interval_list:
+                    sum_fq = num_fq = 0
+                    for iframe in range(cur_frame, pitch_list['nx']):
+                        point = pitch_list['x1'] + pitch_list['dx'] * iframe
+                        if point <= begin_sec:
+                            continue
+                        elif begin_sec < point < end_sec:
+                            freq = pitch_list['frames'][iframe]['candidates'][0]['frequency']
+                            print(f"'{text}' | {point:.3f} sec | {freq:.3f} Hz", file=f)
+                            sum_fq += freq
+                            num_fq += (freq > 0)
+                            continue
+                        else:
+                            break
+
+                    cur_frame = iframe
+                    mean = sum_fq / num_fq if num_fq > 0 else 0
+                    pitch_means.append(mean)
+                    if begin_sec == max_length_interval[0]:
+                        xl_p_mean = mean
+                    if begin_sec == max_intensity_interval[0]:
+                        xi_p_mean = mean
+
+                    print(f"Mean: {mean:.3f} Hz\n", file=f)
+                print(f"XL Mean: {xl_p_mean:.3f} Hz", file=f)
+                print(f"XI Mean: {xi_p_mean:.3f} Hz\n", file=f)
+
             # Computing average sound interval length.
 
             total_interval_length = (
@@ -1862,6 +2417,7 @@ def process_sound(tier_data_list, sound):
 
                 (interval_str,
                     (end - begin) / mean_interval_length,
+                    f'{p_mean:.3f}',
                     [f'{i_min:.3f}', f'{i_max:.3f}', f'{i_max - i_min:.3f}'],
                     list(map('{0:.3f}'.format, f_list)),
                     '+' if index == max_length_index else '-',
@@ -1870,11 +2426,21 @@ def process_sound(tier_data_list, sound):
 
                     for (
                         index,
-                            (interval_str, (_, i_min, i_max), f_list, (begin, end, text), source_index)) in
+                            (interval_str,
+                             p_mean,
+                             (_, i_min, i_max),
+                             f_list,
+                             (begin, end, text),
+                             source_index)) in
 
                         enumerate(
                             zip(
-                                str_list, intensity_list, formant_list, interval_list, source_index_list))]
+                                str_list,
+                                pitch_means,
+                                intensity_list,
+                                formant_list,
+                                interval_list,
+                                source_index_list))]
 
             textgrid_result_list[-1][2].append(
 
@@ -1884,11 +2450,13 @@ def process_sound(tier_data_list, sound):
                     mean_interval_length,
                     max_length_str,
                     max_length / mean_interval_length,
+                    f'{xl_p_mean:.3f}',
                     list(map('{0:.3f}'.format, max_length_i_list)),
                     list(map('{0:.3f}'.format, max_length_f_list)),
                     max_length_source_index,
                     max_intensity_str,
                     (max_intensity_interval[1] - max_intensity_interval[0]) / mean_interval_length,
+                    f'{xi_p_mean:.3f}',
                     list(map('{0:.3f}'.format, max_intensity_i_list)),
                     list(map('{0:.3f}'.format, max_intensity_f_list)),
                     max_intensity_source_index,
@@ -2023,7 +2591,7 @@ def process_sound_markup(
                 urllib.parse.urlparse(sound_url).path)[1]
 
             sound = None
-            with tempfile.NamedTemporaryFile(suffix = extension) as temp_file:
+            with tempfile.NamedTemporaryFile(suffix = extension, delete = (not __debug_flag__)) as temp_file:
 
                 if sound_bytes is None:
 
@@ -2571,12 +3139,14 @@ def compile_workbook(
 
             'Longest (seconds) interval',
             'Relative length',
+            'Pitch mean (Hz)',
             'Intensity minimum (dB)', 'Intensity maximum (dB)', 'Intensity range (dB)',
             'F1 mean (Hz)', 'F2 mean (Hz)', 'F3 mean (Hz)',
             'Table reference',
 
             'Highest intensity (dB) interval',
             'Relative length',
+            'Pitch mean (Hz)',
             'Intensity minimum (dB)', 'Intensity maximum (dB)', 'Intensity range (dB)',
             'F1 mean (Hz)', 'F2 mean (Hz)', 'F3 mean (Hz)',
             'Table reference',
@@ -2589,6 +3159,7 @@ def compile_workbook(
 
             'Interval',
             'Relative length',
+            'Pitch mean (Hz)',
             'Intensity minimum (dB)', 'Intensity maximum (dB)', 'Intensity range (dB)',
             'F1 mean (Hz)', 'F2 mean (Hz)', 'F3 mean (Hz)',
             'Table reference',
@@ -2613,22 +3184,22 @@ def compile_workbook(
 
             worksheet_results.set_column(0, 2, 20)
             worksheet_results.set_column(3, 3, 8, format_percent)
-            worksheet_results.set_column(4, 6, 8)
-            worksheet_results.set_column(7, 9, 10)
-            worksheet_results.set_column(10, 10, 4)
-            worksheet_results.set_column(11, 11, 20)
-            worksheet_results.set_column(12, 12, 8, format_percent)
-            worksheet_results.set_column(13, 15, 8)
-            worksheet_results.set_column(16, 18, 10)
-            worksheet_results.set_column(19, 20, 4)
+            worksheet_results.set_column(4, 7, 8)
+            worksheet_results.set_column(8, 10, 10)
+            worksheet_results.set_column(11, 11, 4)
+            worksheet_results.set_column(12, 12, 20)
+            worksheet_results.set_column(13, 13, 8, format_percent)
+            worksheet_results.set_column(14, 17, 8)
+            worksheet_results.set_column(18, 20, 10)
+            worksheet_results.set_column(21, 22, 4)
 
         else:
 
             worksheet_results.set_column(0, 2, 20)
             worksheet_results.set_column(3, 3, 8, format_percent)
-            worksheet_results.set_column(4, 6, 8)
-            worksheet_results.set_column(7, 9, 10)
-            worksheet_results.set_column(10, 12, 4)
+            worksheet_results.set_column(4, 7, 8)
+            worksheet_results.set_column(8, 10, 10)
+            worksheet_results.set_column(11, 13, 4)
 
         worksheet_dict[group] = (
 
@@ -2756,6 +3327,7 @@ def next_text():
                                 ' '.join([vowel_a] + text_a_list[1:]),
                                 round(tier_result.max_length_r_length, 4)] +
 
+                            [ float(tier_result.max_length_p_mean) ] +
                             i_list_a +
                             f_list_a +
 
@@ -2763,6 +3335,7 @@ def next_text():
                                 ' '.join([vowel_b] + text_b_list[1:]),
                                 round(tier_result.max_intensity_r_length, 4)] +
 
+                            [ float(tier_result.max_intensity_p_mean) ] +
                             i_list_b +
                             f_list_b +
 
@@ -2812,7 +3385,7 @@ def next_text():
                     else:
 
                         for index, (interval_str, interval_r_length,
-                            i_list, f_list, sign_longest, sign_highest, source_index) in (
+                            p_mean, i_list, f_list, sign_longest, sign_highest, source_index) in (
 
                             enumerate(tier_result.interval_data_list)):
 
@@ -2834,6 +3407,7 @@ def next_text():
                                     ' '.join([vowel] + interval_str.split()[1:]),
                                     round(interval_r_length, 4)] +
 
+                                [float(p_mean)] +
                                 i_list +
                                 f_list +
 
@@ -4191,7 +4765,7 @@ def no_vowel_selected_f(tier_number, tier_name, transcription_list, selected_lis
             urllib.parse.urlparse(sound_url).path)[1]
 
         sound = None
-        with tempfile.NamedTemporaryFile(suffix = extension) as temp_file:
+        with tempfile.NamedTemporaryFile(suffix = extension, delete = (not args.__debug_flag__)) as temp_file:
 
             with storage_f(storage, sound_url) as sound_stream:
                 temp_file.write(sound_stream.read())
@@ -4317,6 +4891,9 @@ def perform_phonology(args, task_status, storage):
         args.limit, args.limit_exception,
         args.limit_no_vowel, args.limit_result))
 
+    with open('pitch.log', 'w') as f:
+        print (datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'), "\n", file=f)
+
     time_begin = time.time()
 
     task_status.set(1, 0, 'Preparing')
@@ -4832,6 +5409,9 @@ def result_filter(textgrid_result_list):
                     perspective_result_count, '' if perspective_result_count == 1 else 's',
                     state.no_vowel_counter, state.exception_counter))
 
+    #for row in fails_dict.values(): print(row.get('errs'))
+    #sys.stdout.flush()
+
     # If we have no results, we indicate the situation and also show number of failures and number of
     # markups with no vowels.
 
diff --git a/server-requirements-final.txt b/server-requirements-final.txt
index 460af8a7..c0a3c5f6 100644
--- a/server-requirements-final.txt
+++ b/server-requirements-final.txt
@@ -1 +1,3 @@
 matplotlib==1.5.3
+# for win10 with python-8.10
+#matplotlib==2.2.5
\ No newline at end of file