Skip to content

Commit

Permalink
Merge branch 'testing'. Many changes according to precision and relia…
Browse files Browse the repository at this point in the history
…bility. After updating you need to re-train!
  • Loading branch information
bishoph committed Aug 26, 2017
2 parents eb3ea4b + 2d1fe29 commit 9103ba0
Show file tree
Hide file tree
Showing 13 changed files with 216 additions and 129 deletions.
13 changes: 11 additions & 2 deletions sopare.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import sopare.util as util
import sopare.recorder as recorder
import sopare.err_logger as err_logger
import sopare.hatch as hatch
from sopare.version import __version__

def main(argv):
Expand Down Expand Up @@ -83,8 +84,16 @@ def main(argv):
if opt in ("-d", "--delete"):
delete_word(arg, debug)
sys.exit(0)
recorder.recorder(endless_loop, debug, plot, wave, outfile,
infile, dict)

hatched = hatch.hatch()
hatched.add("endless_loop", endless_loop)
hatched.add("debug", debug)
hatched.add("plot", plot)
hatched.add("wave", wave)
hatched.add("outfile", outfile)
hatched.add("infile",infile )
hatched.add("dict", dict)
recorder.recorder(hatched)

def recreate_dict(debug):
print ("recreating dictionary from raw input files...")
Expand Down
16 changes: 10 additions & 6 deletions sopare/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from operator import itemgetter
import characteristics
import config
import stm
import path
import util
import imp
Expand All @@ -33,6 +34,7 @@ def __init__(self, debug):
self.util = util.util(debug)
self.learned_dict = self.util.getDICT()
self.dict_analysis = self.util.compile_analysis(self.learned_dict)
self.stm = stm.short_term_memory(debug)
self.plugins = [ ]
self.load_plugins()
self.last_results = None
Expand All @@ -45,6 +47,7 @@ def do_analysis(self, results, data, rawbuf):
self.debug_info += ''.join([str(results), '\n\n'])
matches = self.deep_search(framing, data)
readable_results = self.get_match(matches)
readable_results = self.stm.get_results(readable_results)
if (self.debug):
print (self.debug_info)
if (readable_results != None):
Expand Down Expand Up @@ -91,8 +94,7 @@ def deep_search(self, framing, data):
if (x < len(sorted_framing_match)):
best_match.append(sorted_framing_match[x])
sorted_best_match = sorted(best_match, key=lambda x: (x[1] + x[2], -x[0]))
if (self.debug):
self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n'])
self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n'])
for i, best in enumerate(sorted_best_match):
if (best[0] >= config.MIN_CROSS_SIMILARITY and best[1] <= config.MIN_LEFT_DISTANCE and best[2] <= config.MIN_RIGHT_DISTANCE):
for x in range(best[3], best[3] + best[4]):
Expand Down Expand Up @@ -128,7 +130,7 @@ def deep_inspection(self, id, startpos, data):
token_sim[1] = token_sim[1] / c
token_sim[2] = token_sim[2] / c
token_sim[4] = int(c)
if ((config.STRICT_LENGTH_CHECK == False and c > 1 ) or c >= self.dict_analysis[id]['min_tokens']):
if ((config.STRICT_LENGTH_CHECK == False and c > 1 ) or c >= self.dict_analysis[id]['min_tokens'] - config.STRICT_LENGTH_UNDERMINING):
word_sim.append(token_sim)
return word_sim

Expand All @@ -143,14 +145,16 @@ def get_match(self, framing):
match_results = self.validate_match_result(framing[s:], s, x, match_results)
elif (x == len(framing)-1):
match_results = self.validate_match_result(framing[s:], s, x, match_results)
if (match_results.count('') > len(match_results) / 2):
return [ 0 ] * len(match_results)
if (framing.count('') > len(framing) * config.FILL_RESULT_PERCENTAGE):
if (self.debug):
self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results'
return [ ] * len(match_results)
return match_results

def validate_match_result(self, result, start, end, match_results):
if (len(result) == 0 or result[0] == '' or end-start < 2):
return match_results
if (config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] or len(result) > self.dict_analysis[result[0]]['max_tokens'])):
if (config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - config.STRICT_LENGTH_UNDERMINING or len(result) > self.dict_analysis[result[0]]['max_tokens'])):
if (self.debug):
self.debug_info += 'STRICT_LENGTH_CHECK failed for '+result[0] + ': ' + str(self.dict_analysis[result[0]]['min_tokens']) + ' > ' + str(len(result)) + ' < ' + str(self.dict_analysis[result[0]]['max_tokens']) + '\n'
return match_results
Expand Down
22 changes: 8 additions & 14 deletions sopare/buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,41 +19,35 @@

import multiprocessing
import processing
import hatch

class buffering(multiprocessing.Process):

def __init__(self, queue, endless_loop, debug, plot, wave, outfile, dict):
def __init__(self, hatch, queue):
multiprocessing.Process.__init__(self, name="buffering queue")
self.hatch = hatch
self.queue = queue
self.endless_loop = endless_loop
self.debug = debug
self.plot = plot
self.outfile = outfile
self.proc = processing.processor(endless_loop, debug, plot, wave, outfile, dict, self)
self.proc = processing.processor(hatch, self)
self.PROCESS_ROUND_DONE = False
self.test_counter = 0
self.start()

def run(self):
if (self.debug):
if (self.hatch.get('debug') == True):
print ("buffering queue runner")
while True:
buf = self.queue.get()
if ((self.endless_loop == False or self.outfile != None) and self.PROCESS_ROUND_DONE):
if ((self.hatch.get('endless_loop') == False or self.hatch.get('outfile') != None) and self.PROCESS_ROUND_DONE):
break
self.proc.check_silence(buf)
if (self.debug):
if (self.hatch.get('debug') == True):
print ("terminating queue runner")

def flush(self, message):
self.proc.stop(message)

def stop(self):
if (self.debug):
if (self.hatch.get('debug') == True):
print ("stop buffering")
self.PROCESS_ROUND_DONE = True





17 changes: 3 additions & 14 deletions sopare/characteristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,14 @@

import numpy
import config
import hatch

class characteristic:

def __init__(self, debug):
self.debug = debug
def __init__(self, hatch):
self.hatch = hatch

def getcharacteristic(self, fft, chunked_norm, meta):

#chunked_norm = [ ]
#progessive = 1
#i = config.MIN_PROGRESSIVE_STEP
#for x in range(0, len(norm), i):
# if (hasattr(config, 'START_PROGRESSIVE_FACTOR') and x >= config.START_PROGRESSIVE_FACTOR):
# progessive += progessive * config.PROGRESSIVE_FACTOR
# i += int(progessive)
# if (i > config.MAX_PROGRESSIVE_STEP):
# i = config.MAX_PROGRESSIVE_STEP
# chunked_norm.append(round(sum(norm[x:x+i]), 2))

fft = numpy.abs(fft)
df = numpy.argmax(fft)
dfm = int(numpy.amax(fft))
Expand Down
40 changes: 24 additions & 16 deletions sopare/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,16 @@
THRESHOLD = 400

# Silence time in seconds when analysis is called
MAX_SILENCE_AFTER_START = 3
MAX_SILENCE_AFTER_START = 1

# Time in seconds after the analysis is forced
MAX_TIME = 3.2

# Counter to stop processing and prepare more data
# Should be > LONG_SILENCE
SILENCE_COUNTER = 42

# Start the analysis after reaching LONG_SILENCE
LONG_SILENCE = 40
LONG_SILENCE = 30

# Characteristic length
CHUNKS = 2048


#########################################################
Expand All @@ -36,15 +34,15 @@
# Progressive value is used if you want to pack not
# so relevant frequencies
PROGRESSIVE_FACTOR = 0
START_PROGRESSIVE_FACTOR = 600
START_PROGRESSIVE_FACTOR = 1000
MIN_PROGRESSIVE_STEP = 25
MAX_PROGRESSIVE_STEP = 25

# Specifies freq ranges that are kept for further
# analysis. Freq outside of the ranges are set to zero.
# Human language can be found between 20 and 5000.
LOW_FREQ = 20
HIGH_FREQ = 600
HIGH_FREQ = 1000

# Make use of Hann window function
HANNING = True
Expand All @@ -62,31 +60,41 @@
MIN_START_TOKENS = 3

# Min. value for potential beginning of a word
MARGINAL_VALUE = 0.8
MARGINAL_VALUE = 0.7

# Minimal similarity across all comparison to
# identify a complete word across all tokens
MIN_CROSS_SIMILARITY = 0.7
MIN_CROSS_SIMILARITY = 0.85

# Calculation basis or token/word comparison
SIMILARITY_NORM = 0.6
SIMILARITY_HEIGHT = 0.4
SIMILARITY_NORM = 1
SIMILARITY_HEIGHT = 0
SIMILARITY_DOMINANT_FREQUENCY = 0

# Number of best matches to consider.
# Value must be > 0
# If not specified or value < 1 value is set to 1
NUMBER_OF_BEST_MATCHES = 1
NUMBER_OF_BEST_MATCHES = 2

# Min. distance to keep a word
MIN_LEFT_DISTANCE = 0.3
MIN_LEFT_DISTANCE = 0.4
MIN_RIGHT_DISTANCE = 0.3


# Use given number as results to assembly result
# 0 for all predictions
MAX_WORD_START_RESULTS = 2
MAX_TOP_RESULTS = 3

# Enable or disable strict length check for words
STRICT_LENGTH_CHECK = False
STRICT_LENGTH_CHECK = True
# Value to soften the strict length check a bit to still
# get quite precise results but to be less strict
STRICT_LENGTH_UNDERMINING = 0

# Short term memory retention time in seconds. Zero to disable STM
STM_RETENTION = 0.8

# Fill result percentage
# 0.5 means that half of the values can by empty to still get valid results
# A lower value should theoretically avoid false positives
FILL_RESULT_PERCENTAGE = 0.2
10 changes: 5 additions & 5 deletions sopare/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
import worker
import config
import characteristics
import hatch

class filtering():

def __init__(self, debug, plot, dict, wave):
self.debug = debug
self.plot = plot
def __init__(self, hatch):
self.hatch = hatch
self.first = True
self.queue = multiprocessing.Queue()
self.characteristic = characteristics.characteristic(debug)
self.worker = worker.worker(self.queue, debug, plot, dict, wave)
self.characteristic = characteristics.characteristic(self.hatch)
self.worker = worker.worker(self.hatch, self.queue)

def stop(self):
self.queue.put({ 'action': 'stop' })
Expand Down
40 changes: 40 additions & 0 deletions sopare/hatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Copyright (C) 2015 - 2017 Martin Kauss ([email protected])
Licensed under the Apache License, Version 2.0 (the "License"); you may
not use this file except in compliance with the License. You may obtain
a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations
under the License.
"""

import config

class hatch():

def __init__(self):
self.plot_cache = [ ]
self.key_value_store = { }

def add(self, key, value):
self.key_value_store[key] = value

def get(self, key):
if (key in self.key_value_store):
return self.key_value_store[key]
return None

def extend_plot_cache(self, data):
self.plot_cache.extend(data)

def get_plot_cache(self):
return self.plot_cache
23 changes: 10 additions & 13 deletions sopare/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,15 @@
import visual
import util
import config
import hatch

class preparing():

def __init__(self, debug, plot, wave, dict):
self.debug = debug
self.plot = plot
self.wave = wave
self.dict = dict
def __init__(self, hatch):
self.hatch = hatch
self.visual = visual.visual()
self.util = util.util(debug)
self.filter = filter.filtering(debug, plot, dict, wave)
self.util = util.util(self.hatch.get('debug'))
self.filter = filter.filtering(self.hatch)
self.silence = 0
self.force = False
self.counter = 0
Expand All @@ -45,7 +43,6 @@ def __init__(self, debug, plot, wave, dict):
self.token_peaks = [ ]
self.last_low_pos = 0
self.force = False
self.plot_buffer = [ ]
self.entered_silence = False

def tokenize(self, meta):
Expand All @@ -68,10 +65,10 @@ def valid_token(self, meta):
return True

def stop(self):
if (self.hatch.get('plot') == True):
self.visual.create_sample(self.hatch.get_plot_cache(), 'sample.png')
self.tokenize([{ 'token': 'stop' }])
self.filter.stop()
if (self.plot):
self.visual.create_sample(self.plot_buffer, 'sample.png')
self.filter_reset()
self.reset()

Expand All @@ -97,8 +94,8 @@ def force_tokenizer(self):

def prepare(self, buf, volume):
data = numpy.fromstring(buf, dtype=numpy.int16)
if (self.plot):
self.plot_buffer.extend(data)
if (self.hatch.get('plot') == True and self.hatch.get('endless_loop') == False):
self.hatch.extend_plot_cache(data)
self.buffer.extend(data)
self.counter += 1
abs_data = abs(data)
Expand All @@ -120,7 +117,7 @@ def prepare(self, buf, volume):
self.entered_silence = False
self.silence = 0

if (len(self.buffer) == 4096): # TODO: Make configurable
if (len(self.buffer) == config.CHUNKS):
self.new_token = True
meta.append({ 'token': 'token', 'silence': self.silence, 'pos': self.counter, 'adapting': adaptive, 'volume': volume, 'token_peaks': self.token_peaks })

Expand Down
Loading

0 comments on commit 9103ba0

Please sign in to comment.