From faafdd097a5e3a96308a5b004f582b4f2e135f7f Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sat, 20 May 2017 16:44:32 +0200 Subject: [PATCH 01/13] Fixed queue issue (2) that results in 3 seconds delay when reading from file and debug output. --- sopare/analyze.py | 3 +-- sopare/recorder.py | 9 ++++++--- sopare/version.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sopare/analyze.py b/sopare/analyze.py index 3abb31b..963d805 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -91,8 +91,7 @@ def deep_search(self, framing, data): if (x < len(sorted_framing_match)): best_match.append(sorted_framing_match[x]) sorted_best_match = sorted(best_match, key=lambda x: (x[1] + x[2], -x[0])) - if (self.debug): - self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n']) + self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n']) for i, best in enumerate(sorted_best_match): if (best[0] >= config.MIN_CROSS_SIMILARITY and best[1] <= config.MIN_LEFT_DISTANCE and best[2] <= config.MIN_RIGHT_DISTANCE): for x in range(best[3], best[3] + best[4]): diff --git a/sopare/recorder.py b/sopare/recorder.py index 36dbb38..3885b26 100644 --- a/sopare/recorder.py +++ b/sopare/recorder.py @@ -68,10 +68,13 @@ def readfromfile(self, infile): self.queue.close() break file.close() + once = False while (self.queue.qsize() > 0): - if (self.debug): - print ('waiting for queue...') - time.sleep(3) # wait for all threads to finish their work + if (self.debug and once == False): + print ('waiting for queue to finish...') + once = True + time.sleep(.1) # wait for all threads to finish their work + self.queue.close() self.buffering.flush('end of file') print("* done ") self.stop() diff --git a/sopare/version.py b/sopare/version.py index c1afe52..d8c9955 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.0" +__version__ = "1.2.1" From 7e5c0f8c71ca503da5f578888569a646805e70f0 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sun, 18 Jun 2017 19:07:15 +0200 Subject: [PATCH 02/13] Made characteristics length configurable --- sopare/config.py | 6 ++++-- sopare/prepare.py | 2 +- sopare/version.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sopare/config.py b/sopare/config.py index 92c5159..38fcf7d 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -24,6 +24,8 @@ # Start the analysis after reaching LONG_SILENCE LONG_SILENCE = 40 +# Characteristic length +CHUNKS = 2048 ######################################################### @@ -69,8 +71,8 @@ MIN_CROSS_SIMILARITY = 0.7 # Calculation basis or token/word comparison -SIMILARITY_NORM = 0.6 -SIMILARITY_HEIGHT = 0.4 +SIMILARITY_NORM = 0.8 +SIMILARITY_HEIGHT = 0.2 SIMILARITY_DOMINANT_FREQUENCY = 0 # Number of best matches to consider. diff --git a/sopare/prepare.py b/sopare/prepare.py index bdb1a32..aa1da09 100644 --- a/sopare/prepare.py +++ b/sopare/prepare.py @@ -120,7 +120,7 @@ def prepare(self, buf, volume): self.entered_silence = False self.silence = 0 - if (len(self.buffer) == 4096): # TODO: Make configurable + if (len(self.buffer) == config.CHUNKS): self.new_token = True meta.append({ 'token': 'token', 'silence': self.silence, 'pos': self.counter, 'adapting': adaptive, 'volume': volume, 'token_peaks': self.token_peaks }) diff --git a/sopare/version.py b/sopare/version.py index d8c9955..5935ca3 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.1" +__version__ = "1.2.2" From fc2fb186c90f616e507c90326982c13b303c28e2 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Tue, 20 Jun 2017 21:28:35 +0200 Subject: [PATCH 03/13] Config option added. Fixed plot. Introduced key/value object --- sopare.py | 13 +++++++++++-- sopare/buffering.py | 22 ++++++++------------- sopare/characteristics.py | 17 +++-------------- sopare/config.py | 2 +- sopare/filter.py | 10 +++++----- sopare/prepare.py | 21 +++++++++----------- sopare/processing.py | 25 +++++++++++------------- sopare/recorder.py | 36 ++++++++++++++++++++--------------- sopare/version.py | 2 +- sopare/worker.py | 40 +++++++++++++++++++-------------------- 10 files changed, 89 insertions(+), 99 deletions(-) diff --git a/sopare.py b/sopare.py index c6aa923..0b1b095 100755 --- a/sopare.py +++ b/sopare.py @@ -22,6 +22,7 @@ import sopare.util as util import sopare.recorder as recorder import sopare.err_logger as err_logger +import sopare.hatch as hatch from sopare.version import __version__ def main(argv): @@ -83,8 +84,16 @@ def main(argv): if opt in ("-d", "--delete"): delete_word(arg, debug) sys.exit(0) - recorder.recorder(endless_loop, debug, plot, wave, outfile, - infile, dict) + + hatched = hatch.hatch() + hatched.add("endless_loop", endless_loop) + hatched.add("debug", debug) + hatched.add("plot", plot) + hatched.add("wave", wave) + hatched.add("outfile", outfile) + hatched.add("infile",infile ) + hatched.add("dict", dict) + recorder.recorder(hatched) def recreate_dict(debug): print ("recreating dictionary from raw input files...") diff --git a/sopare/buffering.py b/sopare/buffering.py index 14ff304..b3f05e1 100644 --- a/sopare/buffering.py +++ b/sopare/buffering.py @@ -19,41 +19,35 @@ import multiprocessing import processing +import hatch class buffering(multiprocessing.Process): - def __init__(self, queue, endless_loop, debug, plot, wave, outfile, dict): + def __init__(self, hatch, queue): multiprocessing.Process.__init__(self, name="buffering queue") + self.hatch = hatch self.queue = queue - self.endless_loop = endless_loop - self.debug = debug - self.plot = plot - self.outfile = outfile - self.proc = processing.processor(endless_loop, debug, plot, wave, outfile, dict, self) + self.proc = processing.processor(hatch, self) self.PROCESS_ROUND_DONE = False self.test_counter = 0 self.start() def run(self): - if (self.debug): + if (self.hatch.get('debug') == True): print ("buffering queue runner") while True: buf = self.queue.get() - if ((self.endless_loop == False or self.outfile != None) and self.PROCESS_ROUND_DONE): + if ((self.hatch.get('endless_loop') == False or self.hatch.get('outfile') != None) and self.PROCESS_ROUND_DONE): break self.proc.check_silence(buf) - if (self.debug): + if (self.hatch.get('debug') == True): print ("terminating queue runner") def flush(self, message): self.proc.stop(message) def stop(self): - if (self.debug): + if (self.hatch.get('debug') == True): print ("stop buffering") self.PROCESS_ROUND_DONE = True - - - - diff --git a/sopare/characteristics.py b/sopare/characteristics.py index 23912c7..8121b1e 100644 --- a/sopare/characteristics.py +++ b/sopare/characteristics.py @@ -19,25 +19,14 @@ import numpy import config +import hatch class characteristic: - def __init__(self, debug): - self.debug = debug + def __init__(self, hatch): + self.hatch = hatch def getcharacteristic(self, fft, chunked_norm, meta): - - #chunked_norm = [ ] - #progessive = 1 - #i = config.MIN_PROGRESSIVE_STEP - #for x in range(0, len(norm), i): - # if (hasattr(config, 'START_PROGRESSIVE_FACTOR') and x >= config.START_PROGRESSIVE_FACTOR): - # progessive += progessive * config.PROGRESSIVE_FACTOR - # i += int(progessive) - # if (i > config.MAX_PROGRESSIVE_STEP): - # i = config.MAX_PROGRESSIVE_STEP - # chunked_norm.append(round(sum(norm[x:x+i]), 2)) - fft = numpy.abs(fft) df = numpy.argmax(fft) dfm = int(numpy.amax(fft)) diff --git a/sopare/config.py b/sopare/config.py index 38fcf7d..b60060f 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -82,7 +82,7 @@ # Min. distance to keep a word MIN_LEFT_DISTANCE = 0.3 -MIN_RIGHT_DISTANCE = 0.3 +MIN_RIGHT_DISTANCE = 0.25 # Use given number as results to assembly result diff --git a/sopare/filter.py b/sopare/filter.py index 3ac7c9f..618a78c 100644 --- a/sopare/filter.py +++ b/sopare/filter.py @@ -22,16 +22,16 @@ import worker import config import characteristics +import hatch class filtering(): - def __init__(self, debug, plot, dict, wave): - self.debug = debug - self.plot = plot + def __init__(self, hatch): + self.hatch = hatch self.first = True self.queue = multiprocessing.Queue() - self.characteristic = characteristics.characteristic(debug) - self.worker = worker.worker(self.queue, debug, plot, dict, wave) + self.characteristic = characteristics.characteristic(self.hatch) + self.worker = worker.worker(self.hatch, self.queue) def stop(self): self.queue.put({ 'action': 'stop' }) diff --git a/sopare/prepare.py b/sopare/prepare.py index aa1da09..31fef1c 100644 --- a/sopare/prepare.py +++ b/sopare/prepare.py @@ -22,17 +22,15 @@ import visual import util import config +import hatch class preparing(): - def __init__(self, debug, plot, wave, dict): - self.debug = debug - self.plot = plot - self.wave = wave - self.dict = dict + def __init__(self, hatch): + self.hatch = hatch self.visual = visual.visual() - self.util = util.util(debug) - self.filter = filter.filtering(debug, plot, dict, wave) + self.util = util.util(self.hatch.get('debug')) + self.filter = filter.filtering(self.hatch) self.silence = 0 self.force = False self.counter = 0 @@ -45,7 +43,6 @@ def __init__(self, debug, plot, wave, dict): self.token_peaks = [ ] self.last_low_pos = 0 self.force = False - self.plot_buffer = [ ] self.entered_silence = False def tokenize(self, meta): @@ -68,10 +65,10 @@ def valid_token(self, meta): return True def stop(self): + if (self.hatch.get('plot') == True): + self.visual.create_sample(self.hatch.get_plot_cache(), 'sample.png') self.tokenize([{ 'token': 'stop' }]) self.filter.stop() - if (self.plot): - self.visual.create_sample(self.plot_buffer, 'sample.png') self.filter_reset() self.reset() @@ -97,8 +94,8 @@ def force_tokenizer(self): def prepare(self, buf, volume): data = numpy.fromstring(buf, dtype=numpy.int16) - if (self.plot): - self.plot_buffer.extend(data) + if (self.hatch.get('plot') == True and self.hatch.get('endless_loop') == False): + self.hatch.extend_plot_cache(data) self.buffer.extend(data) self.counter += 1 abs_data = abs(data) diff --git a/sopare/processing.py b/sopare/processing.py index dddf5e7..26ee3bd 100644 --- a/sopare/processing.py +++ b/sopare/processing.py @@ -22,35 +22,32 @@ import prepare import io import config +import hatch -class processor: +class processor(): - def __init__(self, endless_loop, debug, plot, wave, outfile, dict, buffering, live = True): + def __init__(self, hatch, buffering, live = True): self.append = False - self.endless_loop = endless_loop - self.debug = debug - self.plot = plot - self.wave = wave + self.hatch = hatch self.out = None - if (outfile != None): + if (self.hatch.get('outfile') != None): self.out = io.open(outfile, 'wb') self.buffering = buffering - self.dict = dict self.live = live self.timer = 0 self.silence_timer = 0 self.silence_counter = 0 self.silence_buffer = [ ] - self.prepare = prepare.preparing(debug, plot, wave, dict) + self.prepare = prepare.preparing(self.hatch) def stop(self, message): - if (self.debug): + if (self.hatch.get('debug') == True): print (message) if (self.out != None): self.out.close() self.append = False self.silence_timer = 0 - if (self.endless_loop == False): + if (self.hatch.get('endless_loop') == False): self.prepare.stop() else: self.prepare.force_tokenizer() @@ -61,7 +58,7 @@ def check_silence(self, buf): volume = audioop.rms(buf, 2) if (volume >= config.THRESHOLD): if (self.append == False): - if (self.debug): + if (self.hatch.get('debug') == True): print ('starting append mode') self.silence_timer = time.time() self.timer = time.time() @@ -81,12 +78,12 @@ def check_silence(self, buf): self.prepare.prepare(buf, volume) if (self.append == True and self.silence_timer > 0 and self.silence_timer + config.MAX_SILENCE_AFTER_START < time.time() - and self.live == True and self.endless_loop == False): + and self.live == True and self.hatch.get('endless_loop') == False): self.stop("stop append mode because of silence") if (self.append == True and self.timer + config.MAX_TIME < time.time() and self.live == True): self.stop("stop append mode because time is up") - if (self.append == True and self.live == True and self.endless_loop == True + if (self.append == True and self.live == True and self.hatch.get('endless_loop') == True and self.silence_counter > config.SILENCE_COUNTER): self.append = False self.stop("endless loop silence detected") diff --git a/sopare/recorder.py b/sopare/recorder.py index 3885b26..a3bbaee 100644 --- a/sopare/recorder.py +++ b/sopare/recorder.py @@ -24,23 +24,23 @@ import sys import io import config +import hatch +import numpy +import visual -class recorder: +class recorder(): - def __init__(self, endless_loop, debug, plot, wave, outfile, infile, dict): - self.debug = debug - self.plot = plot - self.wave = wave - self.outfile = outfile - self.dict = dict + def __init__(self, hatch): + self.hatch = hatch self.FORMAT = pyaudio.paInt16 # mono self.CHANNELS = 1 self.pa = pyaudio.PyAudio() self.queue = multiprocessing.JoinableQueue() self.running = True + self.visual = visual.visual() - if (debug): + if (self.hatch.get('debug') == True): defaultCapability = self.pa.get_default_host_api_info() print defaultCapability @@ -51,26 +51,31 @@ def __init__(self, endless_loop, debug, plot, wave, outfile, infile, dict): output=False, frames_per_buffer=config.CHUNK) - self.buffering = buffering.buffering(self.queue, endless_loop, self.debug, self.plot, self.wave, self.outfile, self.dict) - if (infile == None): + self.buffering = buffering.buffering(self.hatch, self.queue) + if (hatch.get('infile') == None): self.recording() else: - self.readfromfile(infile) + self.readfromfile() - def readfromfile(self, infile): - print("* reading file "+infile) - file = io.open(infile, 'rb', buffering=config.CHUNK) + def readfromfile(self): + print("* reading file " + self.hatch.get('infile')) + file = io.open(self.hatch.get('infile'), 'rb', buffering=config.CHUNK) while True: buf = file.read(config.CHUNK * 2) if buf: self.queue.put(buf) + if (self.hatch.get('plot') == True): + data = numpy.fromstring(buf, dtype=numpy.int16) + self.hatch.extend_plot_cache(data) else: self.queue.close() break file.close() once = False + if (self.hatch.get('plot') == True): + self.visual.create_sample(self.hatch.get_plot_cache(), 'sample.png') while (self.queue.qsize() > 0): - if (self.debug and once == False): + if (self.hatch.get('debug') == True and once == False): print ('waiting for queue to finish...') once = True time.sleep(.1) # wait for all threads to finish their work @@ -93,6 +98,7 @@ def recording(self): break except IOError as e: print ("stream read error "+str(e)) + self.stop() sys.exit() diff --git a/sopare/version.py b/sopare/version.py index 5935ca3..8b7f2a3 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.2" +__version__ = "1.2.3" diff --git a/sopare/worker.py b/sopare/worker.py index dc453d5..0581e3a 100644 --- a/sopare/worker.py +++ b/sopare/worker.py @@ -25,20 +25,18 @@ import uuid import comparator import config +import hatch class worker(multiprocessing.Process): - def __init__(self, queue, debug, plot, dict, wave): + def __init__(self, hatch, queue): multiprocessing.Process.__init__(self, name="worker for filtered data") + self.hatch = hatch self.queue = queue - self.debug = debug - self.plot = plot - self.dict = dict - self.wave = wave self.visual = visual.visual() - self.util = util.util(debug) - self.analyze = analyze.analyze(debug) - self.compare = comparator.compare(debug, self.util) + self.util = util.util(self.hatch.get('debug')) + self.analyze = analyze.analyze(self.hatch.get('debug')) + self.compare = comparator.compare(self.hatch.get('debug'), self.util) self.running = True self.counter = 0 self.plot_counter = 0 @@ -55,10 +53,10 @@ def __init__(self, queue, debug, plot, dict, wave): def reset(self): self.counter = 0 - if (self.wave and len(self.rawbuf) > 0): + if (self.hatch.get('wave') == True and len(self.rawbuf) > 0): self.save_wave_buf() self.rawbuf = [ ] - self.rawfft = [ ] + #self.rawfft = [ ] self.raw = [ ] self.fft = [ ] self.word_tendency = None @@ -85,36 +83,36 @@ def remove_silence(self, m): def run(self): - if (self.debug): + if (self.hatch.get('debug') == True): print ("worker queue runner started") while self.running: obj = self.queue.get() if (obj['action'] == 'data'): raw_token = obj['token'] - if (self.wave or True): # TODO: "or True" is just temporary for testing. Must be removed later on! + if (self.hatch.get('wave') == True or True): # TODO: "or True" is just temporary for testing. Must be removed later on! self.rawbuf.extend(raw_token) fft = obj['fft'] - if (self.plot): + if (self.hatch.get('plot') == True): self.rawfft.extend(fft) meta = obj['meta'] norm = obj['norm'] characteristic = obj['characteristic'] self.character.append((characteristic, meta)) self.compare.word(self.character) - if (self.dict != None): + if (self.hatch.get('dict') != None): self.raw_character.append({ 'fft': fft, 'norm': norm, 'meta': meta }) if (characteristic != None): - if (self.debug): + if (self.hatch.get('debug') == True): print ('characteristic = ' + str(self.counter) + ' ' + str(characteristic)) print ('meta = '+str(meta)) - if (self.wave): + if (self.hatch.get('wave') == True): self.util.savefilteredwave('token'+str(self.counter)+self.uid, raw_token) - if (self.plot): + if (self.hatch.get('plot') == True and self.plot_counter < 6): self.visual.create_sample(characteristic['norm'], 'norm'+str(self.plot_counter)+'.png') self.visual.create_sample(fft, 'fft'+str(self.plot_counter)+'.png') self.plot_counter += 1 self.counter += 1 - elif (obj['action'] == 'reset' and self.dict == None): + elif (obj['action'] == 'reset' and self.hatch.get('dict') == None): self.reset() elif (obj['action'] == 'stop'): self.running = False @@ -123,17 +121,17 @@ def run(self): for m in meta: if (m['token'] == 'start analysis'): self.remove_silence(m) - if (self.dict == None): + if (self.hatch.get('dict') == None): self.analyze.do_analysis(self.compare.get_results(), self.character, self.rawbuf) else: self.util.store_raw_dict_entry(self.dict, self.raw_character) self.reset() - if (self.wave and len(self.rawbuf) > 0): + if (self.hatch.get('wave') == True and len(self.rawbuf) > 0): self.save_wave_buf() self.queue.close() - if (self.plot): + if (self.hatch.get('plot') == True): self.visual.create_sample(self.rawfft, 'fft.png') From dff1a052313f19ae7b5d628c0bb5eab8b004ec52 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Tue, 20 Jun 2017 21:31:04 +0200 Subject: [PATCH 04/13] Introducing a simple key/value object --- sopare/hatch.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 sopare/hatch.py diff --git a/sopare/hatch.py b/sopare/hatch.py new file mode 100644 index 0000000..8deb110 --- /dev/null +++ b/sopare/hatch.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) + +Licensed under the Apache License, Version 2.0 (the "License"); you may +not use this file except in compliance with the License. You may obtain +a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. +""" + +import config + +class hatch(): + + def __init__(self): + self.plot_cache = [ ] + self.key_value_store = { } + + def add(self, key, value): + self.key_value_store[key] = value + + def get(self, key): + if (key in self.key_value_store): + return self.key_value_store[key] + return None + + def extend_plot_cache(self, data): + self.plot_cache.extend(data) + + def get_plot_cache(self): + return self.plot_cache From 44f17759e5640bd1abb198745653e3d911f44beb Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sat, 24 Jun 2017 17:56:23 +0200 Subject: [PATCH 05/13] Function call fix --- sopare/worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sopare/worker.py b/sopare/worker.py index 0581e3a..a02844d 100644 --- a/sopare/worker.py +++ b/sopare/worker.py @@ -124,7 +124,7 @@ def run(self): if (self.hatch.get('dict') == None): self.analyze.do_analysis(self.compare.get_results(), self.character, self.rawbuf) else: - self.util.store_raw_dict_entry(self.dict, self.raw_character) + self.util.store_raw_dict_entry(self.hatch.get('dict'), self.raw_character) self.reset() if (self.hatch.get('wave') == True and len(self.rawbuf) > 0): From c3516d58768697b86b435342ab1774796446a0cf Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sun, 25 Jun 2017 14:44:25 +0200 Subject: [PATCH 06/13] Function call fix --- sopare/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sopare/processing.py b/sopare/processing.py index 26ee3bd..553a061 100644 --- a/sopare/processing.py +++ b/sopare/processing.py @@ -31,7 +31,7 @@ def __init__(self, hatch, buffering, live = True): self.hatch = hatch self.out = None if (self.hatch.get('outfile') != None): - self.out = io.open(outfile, 'wb') + self.out = io.open(self.hatch.get('outfile'), 'wb') self.buffering = buffering self.live = live self.timer = 0 From bbd385d76ff8a8919340f6b4ba7052a5ed81e263 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sat, 29 Jul 2017 16:55:04 +0200 Subject: [PATCH 07/13] New config option for not so strict comparison. --- sopare/analyze.py | 4 ++-- sopare/config.py | 20 +++++++++++--------- sopare/version.py | 2 +- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sopare/analyze.py b/sopare/analyze.py index 963d805..b549c8f 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -127,7 +127,7 @@ def deep_inspection(self, id, startpos, data): token_sim[1] = token_sim[1] / c token_sim[2] = token_sim[2] / c token_sim[4] = int(c) - if ((config.STRICT_LENGTH_CHECK == False and c > 1 ) or c >= self.dict_analysis[id]['min_tokens']): + if ((config.STRICT_LENGTH_CHECK == False and c > 1 ) or c >= self.dict_analysis[id]['min_tokens'] - config.STRICT_LENGTH_UNDERMINING): word_sim.append(token_sim) return word_sim @@ -149,7 +149,7 @@ def get_match(self, framing): def validate_match_result(self, result, start, end, match_results): if (len(result) == 0 or result[0] == '' or end-start < 2): return match_results - if (config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] or len(result) > self.dict_analysis[result[0]]['max_tokens'])): + if (config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - config.STRICT_LENGTH_UNDERMINING or len(result) > self.dict_analysis[result[0]]['max_tokens'])): if (self.debug): self.debug_info += 'STRICT_LENGTH_CHECK failed for '+result[0] + ': ' + str(self.dict_analysis[result[0]]['min_tokens']) + ' > ' + str(len(result)) + ' < ' + str(self.dict_analysis[result[0]]['max_tokens']) + '\n' return match_results diff --git a/sopare/config.py b/sopare/config.py index b60060f..0748e74 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -25,7 +25,7 @@ LONG_SILENCE = 40 # Characteristic length -CHUNKS = 2048 +CHUNKS = 4096 ######################################################### @@ -38,7 +38,7 @@ # Progressive value is used if you want to pack not # so relevant frequencies PROGRESSIVE_FACTOR = 0 -START_PROGRESSIVE_FACTOR = 600 +START_PROGRESSIVE_FACTOR = 1000 MIN_PROGRESSIVE_STEP = 25 MAX_PROGRESSIVE_STEP = 25 @@ -46,7 +46,7 @@ # analysis. Freq outside of the ranges are set to zero. # Human language can be found between 20 and 5000. LOW_FREQ = 20 -HIGH_FREQ = 600 +HIGH_FREQ = 1000 # Make use of Hann window function HANNING = True @@ -61,14 +61,14 @@ ######################################################### # Min. number of tokens to identify the beginning of a word -MIN_START_TOKENS = 3 +MIN_START_TOKENS = 4 # Min. value for potential beginning of a word MARGINAL_VALUE = 0.8 # Minimal similarity across all comparison to # identify a complete word across all tokens -MIN_CROSS_SIMILARITY = 0.7 +MIN_CROSS_SIMILARITY = 0.6 # Calculation basis or token/word comparison SIMILARITY_NORM = 0.8 @@ -81,9 +81,8 @@ NUMBER_OF_BEST_MATCHES = 1 # Min. distance to keep a word -MIN_LEFT_DISTANCE = 0.3 -MIN_RIGHT_DISTANCE = 0.25 - +MIN_LEFT_DISTANCE = 0.8 +MIN_RIGHT_DISTANCE = 0.8 # Use given number as results to assembly result # 0 for all predictions @@ -91,4 +90,7 @@ MAX_TOP_RESULTS = 3 # Enable or disable strict length check for words -STRICT_LENGTH_CHECK = False +STRICT_LENGTH_CHECK = True +# Value to soften the strict length check a bit to still +# get quite precise results but to be less strict +STRICT_LENGTH_UNDERMINING = 1 diff --git a/sopare/version.py b/sopare/version.py index 8b7f2a3..0122460 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.3" +__version__ = "1.2.4" From 6f5f807f1d99d1044a1ab545229a1a0ea3e3a160 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Tue, 1 Aug 2017 08:13:25 +0200 Subject: [PATCH 08/13] Adding Short term memory for better results. --- sopare/analyze.py | 3 +++ sopare/config.py | 27 +++++++++++++++------------ sopare/stm.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ sopare/version.py | 2 +- 4 files changed, 65 insertions(+), 13 deletions(-) create mode 100644 sopare/stm.py diff --git a/sopare/analyze.py b/sopare/analyze.py index b549c8f..b34bc72 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -20,6 +20,7 @@ from operator import itemgetter import characteristics import config +import stm import path import util import imp @@ -33,6 +34,7 @@ def __init__(self, debug): self.util = util.util(debug) self.learned_dict = self.util.getDICT() self.dict_analysis = self.util.compile_analysis(self.learned_dict) + self.stm = stm.short_term_memory(debug) self.plugins = [ ] self.load_plugins() self.last_results = None @@ -45,6 +47,7 @@ def do_analysis(self, results, data, rawbuf): self.debug_info += ''.join([str(results), '\n\n']) matches = self.deep_search(framing, data) readable_results = self.get_match(matches) + readable_results = self.stm.get_results(readable_results) if (self.debug): print (self.debug_info) if (readable_results != None): diff --git a/sopare/config.py b/sopare/config.py index 0748e74..6ecba44 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -12,17 +12,17 @@ THRESHOLD = 400 # Silence time in seconds when analysis is called -MAX_SILENCE_AFTER_START = 3 +MAX_SILENCE_AFTER_START = 1 # Time in seconds after the analysis is forced MAX_TIME = 3.2 # Counter to stop processing and prepare more data # Should be > LONG_SILENCE -SILENCE_COUNTER = 42 +SILENCE_COUNTER = 256 # Start the analysis after reaching LONG_SILENCE -LONG_SILENCE = 40 +LONG_SILENCE = 30 # Characteristic length CHUNKS = 4096 @@ -61,28 +61,28 @@ ######################################################### # Min. number of tokens to identify the beginning of a word -MIN_START_TOKENS = 4 +MIN_START_TOKENS = 2 # Min. value for potential beginning of a word -MARGINAL_VALUE = 0.8 +MARGINAL_VALUE = 0.7 # Minimal similarity across all comparison to # identify a complete word across all tokens -MIN_CROSS_SIMILARITY = 0.6 +MIN_CROSS_SIMILARITY = 0.7 # Calculation basis or token/word comparison -SIMILARITY_NORM = 0.8 -SIMILARITY_HEIGHT = 0.2 +SIMILARITY_NORM = 0.6 +SIMILARITY_HEIGHT = 0.4 SIMILARITY_DOMINANT_FREQUENCY = 0 # Number of best matches to consider. # Value must be > 0 # If not specified or value < 1 value is set to 1 -NUMBER_OF_BEST_MATCHES = 1 +NUMBER_OF_BEST_MATCHES = 2 # Min. distance to keep a word -MIN_LEFT_DISTANCE = 0.8 -MIN_RIGHT_DISTANCE = 0.8 +MIN_LEFT_DISTANCE = 0.5 +MIN_RIGHT_DISTANCE = 0.35 # Use given number as results to assembly result # 0 for all predictions @@ -93,4 +93,7 @@ STRICT_LENGTH_CHECK = True # Value to soften the strict length check a bit to still # get quite precise results but to be less strict -STRICT_LENGTH_UNDERMINING = 1 +STRICT_LENGTH_UNDERMINING = 0 + +# Short term memory retention time in seconds. Zero to disable STM +STM_RETENTION = 1 diff --git a/sopare/stm.py b/sopare/stm.py new file mode 100644 index 0000000..fcae0fa --- /dev/null +++ b/sopare/stm.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) + +Licensed under the Apache License, Version 2.0 (the "License"); you may +not use this file except in compliance with the License. You may obtain +a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. +""" + +import time +import config + +class short_term_memory(): + + def __init__(self, debug): + self.debug = debug + self.last_results = [ ] + self.last_time = 0 + + def get_stm_results(self, results): + stm_results = self.last_results[:] + stm_results.extend(results) + return stm_results + + def get_results(self, results): + if (results == None or len(results) == 0): + return results + if (time.time() < self.last_time): + if (self.debug): + print ('stm input: ' + str(results) + ' ' + str(self.last_results)) + results = self.get_stm_results(results) + if (self.debug): + print ('stm mnodification: ' + str(results)) + self.last_results = results + self.last_time = time.time() + config.STM_RETENTION + return results diff --git a/sopare/version.py b/sopare/version.py index 0122460..fafd78f 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.4" +__version__ = "1.2.5" From cc14f670d097fccfba49d482cfd559c4222ca28a Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Tue, 1 Aug 2017 09:58:33 +0200 Subject: [PATCH 09/13] Fixing silence timeout issue for endless loop --- sopare/processing.py | 4 ++-- sopare/version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sopare/processing.py b/sopare/processing.py index 553a061..50710d7 100644 --- a/sopare/processing.py +++ b/sopare/processing.py @@ -57,10 +57,10 @@ def stop(self, message): def check_silence(self, buf): volume = audioop.rms(buf, 2) if (volume >= config.THRESHOLD): + self.silence_timer = time.time() if (self.append == False): if (self.hatch.get('debug') == True): print ('starting append mode') - self.silence_timer = time.time() self.timer = time.time() for sbuf in self.silence_buffer: self.prepare.prepare(sbuf, volume) @@ -78,7 +78,7 @@ def check_silence(self, buf): self.prepare.prepare(buf, volume) if (self.append == True and self.silence_timer > 0 and self.silence_timer + config.MAX_SILENCE_AFTER_START < time.time() - and self.live == True and self.hatch.get('endless_loop') == False): + and self.live == True): self.stop("stop append mode because of silence") if (self.append == True and self.timer + config.MAX_TIME < time.time() and self.live == True): diff --git a/sopare/version.py b/sopare/version.py index fafd78f..0f5fcdf 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.5" +__version__ = "1.2.6" From ace145b3af8be9babb77a9b1fd34409f0aaee0f3 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sat, 5 Aug 2017 13:11:32 +0200 Subject: [PATCH 10/13] Fixed empty token check --- sopare/analyze.py | 4 +++- sopare/version.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sopare/analyze.py b/sopare/analyze.py index b34bc72..fc0a686 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -145,7 +145,9 @@ def get_match(self, framing): match_results = self.validate_match_result(framing[s:], s, x, match_results) elif (x == len(framing)-1): match_results = self.validate_match_result(framing[s:], s, x, match_results) - if (match_results.count('') > len(match_results) / 2): + if (framing.count('') > len(framing) / 2): + if (self.debug): + self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + len(framing) + ' Eliminating results' return [ 0 ] * len(match_results) return match_results diff --git a/sopare/version.py b/sopare/version.py index 0f5fcdf..07de3ae 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.6" +__version__ = "1.2.7" From 7347c72a82a6d22f0d4f0de929deddf963cb88c3 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sat, 5 Aug 2017 18:32:19 +0200 Subject: [PATCH 11/13] Fixed debug output --- sopare/analyze.py | 2 +- sopare/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sopare/analyze.py b/sopare/analyze.py index fc0a686..c510d70 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -147,7 +147,7 @@ def get_match(self, framing): match_results = self.validate_match_result(framing[s:], s, x, match_results) if (framing.count('') > len(framing) / 2): if (self.debug): - self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + len(framing) + ' Eliminating results' + self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results' return [ 0 ] * len(match_results) return match_results diff --git a/sopare/config.py b/sopare/config.py index 6ecba44..6a272b0 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -96,4 +96,4 @@ STRICT_LENGTH_UNDERMINING = 0 # Short term memory retention time in seconds. Zero to disable STM -STM_RETENTION = 1 +STM_RETENTION = 0.8 From 4a1d30d8278d62b2f6ad0fd7e3e7dd52ceebe30b Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sun, 13 Aug 2017 19:22:57 +0200 Subject: [PATCH 12/13] New option to further increase precision --- sopare/analyze.py | 2 +- sopare/config.py | 5 +++++ sopare/version.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sopare/analyze.py b/sopare/analyze.py index c510d70..2ccbb52 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -145,7 +145,7 @@ def get_match(self, framing): match_results = self.validate_match_result(framing[s:], s, x, match_results) elif (x == len(framing)-1): match_results = self.validate_match_result(framing[s:], s, x, match_results) - if (framing.count('') > len(framing) / 2): + if (framing.count('') > len(framing) * config.FILL_RESULT_PERCENTAGE): if (self.debug): self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results' return [ 0 ] * len(match_results) diff --git a/sopare/config.py b/sopare/config.py index 6a272b0..76af055 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -97,3 +97,8 @@ # Short term memory retention time in seconds. Zero to disable STM STM_RETENTION = 0.8 + +# Fill result percentage +# 0.5 means that half of the values can by empty to still get valid results +# A lower value should theoretically avoid false positives +FILL_RESULT_PERCENTAGE = 0.2 diff --git a/sopare/version.py b/sopare/version.py index 07de3ae..b18b900 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.7" +__version__ = "1.2.8" From 2d1fe29377ba4d85515aab109b85271bfdd83ce1 Mon Sep 17 00:00:00 2001 From: Martin Kauss Date: Sat, 26 Aug 2017 12:03:44 +0200 Subject: [PATCH 13/13] Cleanup and adjustments for a very precise and reliable version --- sopare/analyze.py | 2 +- sopare/config.py | 18 +++++++----------- sopare/processing.py | 4 ---- sopare/version.py | 2 +- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/sopare/analyze.py b/sopare/analyze.py index 2ccbb52..96b100c 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -148,7 +148,7 @@ def get_match(self, framing): if (framing.count('') > len(framing) * config.FILL_RESULT_PERCENTAGE): if (self.debug): self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results' - return [ 0 ] * len(match_results) + return [ ] * len(match_results) return match_results def validate_match_result(self, result, start, end, match_results): diff --git a/sopare/config.py b/sopare/config.py index 76af055..b5e77bf 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -17,15 +17,11 @@ # Time in seconds after the analysis is forced MAX_TIME = 3.2 -# Counter to stop processing and prepare more data -# Should be > LONG_SILENCE -SILENCE_COUNTER = 256 - # Start the analysis after reaching LONG_SILENCE LONG_SILENCE = 30 # Characteristic length -CHUNKS = 4096 +CHUNKS = 2048 ######################################################### @@ -61,18 +57,18 @@ ######################################################### # Min. number of tokens to identify the beginning of a word -MIN_START_TOKENS = 2 +MIN_START_TOKENS = 3 # Min. value for potential beginning of a word MARGINAL_VALUE = 0.7 # Minimal similarity across all comparison to # identify a complete word across all tokens -MIN_CROSS_SIMILARITY = 0.7 +MIN_CROSS_SIMILARITY = 0.85 # Calculation basis or token/word comparison -SIMILARITY_NORM = 0.6 -SIMILARITY_HEIGHT = 0.4 +SIMILARITY_NORM = 1 +SIMILARITY_HEIGHT = 0 SIMILARITY_DOMINANT_FREQUENCY = 0 # Number of best matches to consider. @@ -81,8 +77,8 @@ NUMBER_OF_BEST_MATCHES = 2 # Min. distance to keep a word -MIN_LEFT_DISTANCE = 0.5 -MIN_RIGHT_DISTANCE = 0.35 +MIN_LEFT_DISTANCE = 0.4 +MIN_RIGHT_DISTANCE = 0.3 # Use given number as results to assembly result # 0 for all predictions diff --git a/sopare/processing.py b/sopare/processing.py index 50710d7..c00834a 100644 --- a/sopare/processing.py +++ b/sopare/processing.py @@ -83,7 +83,3 @@ def check_silence(self, buf): if (self.append == True and self.timer + config.MAX_TIME < time.time() and self.live == True): self.stop("stop append mode because time is up") - if (self.append == True and self.live == True and self.hatch.get('endless_loop') == True - and self.silence_counter > config.SILENCE_COUNTER): - self.append = False - self.stop("endless loop silence detected") diff --git a/sopare/version.py b/sopare/version.py index b18b900..1794957 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.2.8" +__version__ = "1.3.0"