diff --git a/.gitignore b/.gitignore index 07aa852..b113400 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ *.png error.log plugins/* +config/* diff --git a/NOTICE.txt b/NOTICE.txt index 387805a..2fac4ee 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ SoPaRe -Copyright 2015-2017 +Copyright 2015-2018 This is the SOund PAttern REcognition project developed by Martin Kauss (yo@bishoph.org). diff --git a/config/default.ini b/config/default.ini new file mode 100644 index 0000000..9c2c26c --- /dev/null +++ b/config/default.ini @@ -0,0 +1,127 @@ +######################################################### +# Stream prep and silence configuration options ######### +######################################################### + +[stream] + +# Read chunk size +CHUNK = 512 + +# Sample rate +SAMPLE_RATE = 48000 + +# Volume threshold when audio processing starts / silence +THRESHOLD = 380 + +# Silence time in seconds when analysis is called +MAX_SILENCE_AFTER_START = 1.4 + +# Time in seconds after the analysis is forced +MAX_TIME = 2.4 + +# Start the analysis after reaching LONG_SILENCE +LONG_SILENCE = 20 + +# Characteristic length +CHUNKS = 3072 + + +######################################################### +# Characteristic configuration options ################## +######################################################### + +[characteristic] + +# Steps boil down the data into smaller chunks of data. +# Smaller steps mean more precision but require +# normally more learned entries in the dictionary. +# Progressive value is used if you want to pack not +# so relevant frequencies +PROGRESSIVE_FACTOR = 0 +START_PROGRESSIVE_FACTOR = 600 +MIN_PROGRESSIVE_STEP = 5 +MAX_PROGRESSIVE_STEP = 5 + +# Specifies freq ranges that are kept for further +# analysis. Freq outside of the ranges are set to zero. +# Human language can be found between 20 and 5000. +LOW_FREQ = 20 +HIGH_FREQ = 600 + +# Make use of Hann window function +HANNING = true + +# Range factor for peaks +PEAK_FACTOR = 0.7 + + + +######################################################### +# Compare configuration options ######################### +######################################################### + +[compare] + +# Min. number of tokens to identify the beginning of a word +MIN_START_TOKENS = 3 + +# Min. value for potential beginning of a word +MARGINAL_VALUE = 0.7 + +# Minimal similarity across all comparison to +# identify a complete word across all tokens +MIN_CROSS_SIMILARITY = 0.8 + +# Calculation basis or token/word comparison +SIMILARITY_NORM = 0.6 +SIMILARITY_HEIGHT = 0.4 +SIMILARITY_DOMINANT_FREQUENCY = 0 + +# Number of best matches to consider. +# Value must be > 0 +# If not specified or value < 1 value is set to 1 +NUMBER_OF_BEST_MATCHES = 2 + +# Min. distance to keep a word +MIN_LEFT_DISTANCE = 0.9 +MIN_RIGHT_DISTANCE = 0.7 + +# Use given number as results to assembly result +# 0 for all predictions +MAX_WORD_START_RESULTS = 2 +MAX_TOP_RESULTS = 3 + +# Enable or disable strict length check for words +STRICT_LENGTH_CHECK = true +# Value to soften the strict length check a bit to still +# get quite precise results but to be less strict +STRICT_LENGTH_UNDERMINING = 2 + +# Short term memory retention time in seconds. Zero to disable STM +STM_RETENTION = 1.2 + +# Fill result percentage +# 0.5 means that half of the values can by empty to still get valid results +# A lower value should theoretically avoid false positives +FILL_RESULT_PERCENTAGE = 0.1 + + + +######################################################### +# Misc configuration options ############################ +######################################################### + +[misc] + +# Loglevel (CRITICAL, ERROR, WARNING, INFO, DEBUG) +LOGLEVEL = ERROR + + +######################################################### +# Experimental configuration options #################### +######################################################### + +[experimental] + +# Additional FFT analysis and comparison for CHUNKS/2 length +FFT_SHIFT = false diff --git a/readme.md b/readme.md index 0cb7082..9426ff2 100644 --- a/readme.md +++ b/readme.md @@ -71,6 +71,7 @@ Next steps/TODOs: * Optimizations (e.g. word separation, performance) * Python3 compatibility and testing, install guides + * Individual loglevels per class Project status: @@ -89,7 +90,8 @@ Usage: -l --loop : loop forever - -e --error : redirect sdterr to error.log + -e --error : redirect outpout to error.log + loglevel is forced to error! -p --plot : plot results (only without loop option) @@ -114,6 +116,8 @@ Usage: -d --delete [word] : delete [word] from dictionary and exits. '*' deletes everything! + -i --ini [file] : use alternative configuration file + -a --analysis : show dictionary analysis and exits. -u --unit : run unit tests @@ -140,7 +144,7 @@ python2 sopare.py -c ./sopare.py -v -l -Changing config options and new SOPARE versions requires re-training. +Changing config options and new SOPARE versions require re-training. Delete your training files and the dictionary entries before continue: ./sopare.py -d "*" diff --git a/sopare.py b/sopare.py index d4bde24..d639110 100755 --- a/sopare.py +++ b/sopare.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -19,9 +19,9 @@ import sys import getopt +import sopare.config as config import sopare.util as util import sopare.recorder as recorder -import sopare.hatch as hatch import sopare.log as log import test.unit_tests as tests from sopare.version import __version__ @@ -35,14 +35,18 @@ def main(argv): plot = False wave = False error = False + cfg_ini = None + + recreate = False + unit = False print ("sopare "+__version__) if (len(argv) > 0): - try: - opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:", + try: + opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:i:", ["analysis", "help", "error", "loop", "plot", "verbose", "wave", "create", "overview", "unit", - "show=", "write=", "read=", "train=", "delete=" + "show=", "write=", "read=", "train=", "delete=", "ini=" ]) except getopt.GetoptError: usage() @@ -66,8 +70,7 @@ def main(argv): if (opt in ("-~", "--wave")): wave = True if opt in ("-c", "--create"): - recreate_dict(debug) - sys.exit(0) + recreate = True if opt in ("-o", "--overview"): show_dict_ids(debug) sys.exit(0) @@ -86,26 +89,44 @@ def main(argv): if opt in ("-d", "--delete"): delete_word(arg, debug) sys.exit(0) + if opt in ("-i", "--ini"): + cfg_ini = arg if opt in ("-u", "--unit"): - unit_tests(debug) - sys.exit(0) + unit = True + + cfg = create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error) + if (recreate == True): + recreate_dict(debug, cfg) + sys.exit(0) - hatched = hatch.hatch() - hatched.add("endless_loop", endless_loop) - hatched.add("debug", debug) - hatched.add("plot", plot) - hatched.add("wave", wave) - hatched.add("outfile", outfile) - hatched.add("infile",infile ) - hatched.add("dict", dict) - logger = log.log(debug, error) - hatched.add("logger", logger) - recorder.recorder(hatched) + if (unit == True): + unit_tests(debug, cfg) + sys.exit(0) -def recreate_dict(debug): + + recorder.recorder(cfg) + +def create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error): + if (cfg_ini == None): + cfg = config.config() + else: + cfg = config.config(cfg_ini) + logger = log.log(debug, error, cfg) + cfg.addsection('cmdlopt') + cfg.setoption('cmdlopt', 'endless_loop', str(endless_loop)) + cfg.setoption('cmdlopt', 'debug', str(debug)) + cfg.setoption('cmdlopt', 'plot', str(plot)) + cfg.setoption('cmdlopt', 'wave', str(wave)) + cfg.setoption('cmdlopt', 'outfile', outfile) + cfg.setoption('cmdlopt', 'infile', infile) + cfg.setoption('cmdlopt', 'dict', dict) + cfg.addlogger(logger) + return cfg + +def recreate_dict(debug, cfg): print ("recreating dictionary from raw input files...") - utilities = util.util(debug) + utilities = util.util(debug, cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) utilities.recreate_dict_from_raw_files() def delete_word(dict, debug): @@ -113,29 +134,32 @@ def delete_word(dict, debug): print ("deleting "+dict+" from dictionary") else: print ("deleting all enttries from dictionary") - utilities = util.util(debug) + utilities = util.util(debug, None) utilities.deletefromdict(dict) def show_word_entries(dict, debug): print (dict+" entries in dictionary:") print - utilities = util.util(debug) + utilities = util.util(debug, None) utilities.showdictentry(dict) def show_dict_ids(debug): print ("current entries in dictionary:") - utilities = util.util(debug) + utilities = util.util(debug, None) utilities.showdictentriesbyid() def show_dict_analysis(debug): print ("dictionary analysis:") - utilities = util.util(debug) - print (utilities.compile_analysis(utilities.getDICT())) - -def unit_tests(debug): + utilities = util.util(debug, None) + analysis = utilities.compile_analysis(utilities.getDICT()) + for id in analysis: + print (id) + for k, v in analysis[id].iteritems(): + print (' ' + str(k) + ' ' + str(v)) + +def unit_tests(debug, cfg): print ("starting unit tests...") - utilities = util.util(debug) - tests.unit_tests(debug) + tests.unit_tests(debug, cfg) print ("done.") def usage(): @@ -156,6 +180,7 @@ def usage(): print (" -t --train [word] : add raw data to raw dictionary file\n") print (" -d --delete [word] : delete [word] from dictionary and exits.") print (" '*' deletes everything!\n") + print (" -i --ini [file] : use alternative configuration file\n") print (" -a --analysis : show dictionary analysis and exits.\n") print (" -u --unit : run unit tests\n") diff --git a/sopare/analyze.py b/sopare/analyze.py index 495b8d8..7651652 100644 --- a/sopare/analyze.py +++ b/sopare/analyze.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -19,7 +19,6 @@ from operator import itemgetter import sopare.characteristics -import sopare.config import sopare.stm import sopare.path import sopare.util @@ -29,13 +28,15 @@ class analyze(): - def __init__(self, debug): - self.debug = debug - self.characteristic = sopare.characteristics.characteristic(debug) - self.util = sopare.util.util(debug) + def __init__(self, cfg): + self.cfg = cfg + self.debug = self.cfg.getbool('cmdlopt', 'debug') + self.util = sopare.util.util(self.debug, self.cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) self.learned_dict = self.util.getDICT() self.dict_analysis = self.util.compile_analysis(self.learned_dict) - self.stm = sopare.stm.short_term_memory(debug) + self.stm = sopare.stm.short_term_memory(self.cfg) + self.logger = self.cfg.getlogger().getlog() + self.logger = logging.getLogger(__name__) self.plugins = [ ] self.load_plugins() self.last_results = None @@ -53,8 +54,8 @@ def do_analysis(self, results, data, rawbuf): self.debug_info += ''.join([str(results), '\n\n']) matches = self.deep_search(framing, data) readable_results = self.get_match(matches) - readable_results = self.stm.get_results(readable_results) - logging.debug(self.debug_info) + readable_results, self.debug_info = self.stm.get_results(readable_results, self.debug_info) + self.logger.debug(self.debug_info) if (readable_results != None): for p in self.plugins: p.run(readable_results, self.debug_info, rawbuf) @@ -63,22 +64,22 @@ def framing(self, results, data_length): framing = { } arr = [ ] for id in results: - framing[id] = [ ] + framing[id] = [ ] for i, row in enumerate(results[id]): row = self.row_validation(row, id) row_result = sum(row[0:len(row)]) / self.dict_analysis[id]['min_tokens'] - if (row_result >= sopare.config.MARGINAL_VALUE): + if (row_result >= self.cfg.getfloatoption('compare', 'MARGINAL_VALUE')): arr.append([row_result, i, id]) else: - logging.debug('removing '+id + ' from potential start position '+str(i) + ' bc MARGINAL_VALUE > ' +str(row_result)) + self.logger.debug('removing '+id + ' from potential start position '+str(i) + ' bc MARGINAL_VALUE > ' +str(row_result)) sorted_arr = sorted(arr, key=itemgetter(0), reverse = True) for el in sorted_arr: - if (el[1] not in framing[el[2]] and (sopare.config.MAX_WORD_START_RESULTS == 0 or len(framing[el[2]]) < sopare.config.MAX_WORD_START_RESULTS)): + if (el[1] not in framing[el[2]] and (self.cfg.getintoption('compare', 'MAX_WORD_START_RESULTS') == 0 or len(framing[el[2]]) < self.cfg.getintoption('compare', 'MAX_WORD_START_RESULTS'))): framing[el[2]].append(el[1]) return framing def row_validation(self, row, id): - if (row[0] == 0 or len(row) <= sopare.config.MIN_START_TOKENS): + if (row[0] == 0 or len(row) <= self.cfg.getintoption('compare', 'MIN_START_TOKENS')): return [ 0 ] * len(row) return row @@ -95,27 +96,27 @@ def deep_search(self, framing, data): for match in framing_match: sorted_framing_match = sorted(match, key=lambda x: (x[1] + x[2], -x[0])) nobm = 1 - if (hasattr(sopare.config, 'NUMBER_OF_BEST_MATCHES') and sopare.config.NUMBER_OF_BEST_MATCHES > 0): - nobm = sopare.config.NUMBER_OF_BEST_MATCHES + if (self.cfg.hasoption('compare', 'NUMBER_OF_BEST_MATCHES') and self.cfg.getintoption('compare', 'NUMBER_OF_BEST_MATCHES') > 0): + nobm = self.cfg.getintoption('compare', 'NUMBER_OF_BEST_MATCHES') for x in range(0, nobm): if (x < len(sorted_framing_match)): best_match.append(sorted_framing_match[x]) sorted_best_match = sorted(best_match, key=lambda x: (x[1] + x[2], -x[0])) self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n']) for i, best in enumerate(sorted_best_match): - if (best[0] >= sopare.config.MIN_CROSS_SIMILARITY and best[1] <= sopare.config.MIN_LEFT_DISTANCE and best[2] <= sopare.config.MIN_RIGHT_DISTANCE): + if (best[0] >= self.cfg.getfloatoption('compare', 'MIN_CROSS_SIMILARITY') and best[1] <= self.cfg.getfloatoption('compare', 'MIN_LEFT_DISTANCE') and best[2] <= self.cfg.getfloatoption('compare', 'MIN_RIGHT_DISTANCE')): for x in range(best[3], best[3] + best[4]): if (match_results[x] == ''): match_results[x] = best[5] - if (sopare.config.MAX_TOP_RESULTS > 0 and i > sopare.config.MAX_TOP_RESULTS): + if (self.cfg.getintoption('compare', 'MAX_TOP_RESULTS') > 0 and i > self.cfg.getintoption('compare', 'MAX_TOP_RESULTS')): break self.debug_info += str(match_results).join(['match_results: ', '\n\n']) return match_results def token_sim(self, characteristic, dcharacteristic): - sim_norm = self.util.similarity(characteristic['norm'], dcharacteristic['norm']) * sopare.config.SIMILARITY_NORM - sim_token_peaks = self.util.similarity(characteristic['token_peaks'], dcharacteristic['token_peaks']) * sopare.config.SIMILARITY_HEIGHT - sim_df = self.util.single_similarity(characteristic['df'], dcharacteristic['df']) * sopare.config.SIMILARITY_DOMINANT_FREQUENCY + sim_norm = self.util.similarity(characteristic['norm'], dcharacteristic['norm']) * self.cfg.getfloatoption('compare', 'SIMILARITY_NORM') + sim_token_peaks = self.util.similarity(characteristic['token_peaks'], dcharacteristic['token_peaks']) * self.cfg.getfloatoption('compare', 'SIMILARITY_HEIGHT') + sim_df = self.util.single_similarity(characteristic['df'], dcharacteristic['df']) * self.cfg.getfloatoption('compare', 'SIMILARITY_DOMINANT_FREQUENCY') sim = sim_norm + sim_token_peaks + sim_df sl, sr = self.util.manhatten_distance(characteristic['norm'], dcharacteristic['norm']) return sim, sl, sr @@ -139,17 +140,19 @@ def deep_inspection(self, id, startpos, data): if (ssr < sr): sr = ssr if (ssl < sl): - sl = ssl + sl = ssl token_sim[0] += sim token_sim[1] += sl token_sim[2] += sr c += 1.0 if (c > 0): token_sim[0] = token_sim[0] / c + if (token_sim[0] > 1.0 and c >= self.cfg.getintoption('compare', 'MIN_START_TOKENS') and c >= self.dict_analysis[id]['min_tokens']): + self.logger.warning('Your calculation basis seems to be wrong as we get results > 1.0!') token_sim[1] = token_sim[1] / c token_sim[2] = token_sim[2] / c token_sim[4] = int(c) - if ((sopare.config.STRICT_LENGTH_CHECK == False and c >= sopare.config.MIN_START_TOKENS ) or c >= self.dict_analysis[id]['min_tokens'] - sopare.config.STRICT_LENGTH_UNDERMINING): + if ((self.cfg.getbool('compare', 'STRICT_LENGTH_CHECK') == False and c >= self.cfg.getintoption('compare', 'MIN_START_TOKENS')) or c >= self.dict_analysis[id]['min_tokens'] - self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING')): word_sim.append(token_sim) return word_sim @@ -164,7 +167,7 @@ def get_match(self, framing): match_results = self.validate_match_result(framing[s:], s, x, match_results) elif (x == len(framing)-1): match_results = self.validate_match_result(framing[s:], s, x, match_results) - if (framing.count('') > len(framing) * sopare.config.FILL_RESULT_PERCENTAGE): + if (framing.count('') > len(framing) * self.cfg.getfloatoption('compare', 'FILL_RESULT_PERCENTAGE')): if (self.debug): self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results' return [ ] * len(match_results) @@ -173,7 +176,7 @@ def get_match(self, framing): def validate_match_result(self, result, start, end, match_results): if (len(result) == 0 or result[0] == ''): return match_results - if (sopare.config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - sopare.config.STRICT_LENGTH_UNDERMINING or len(result) > self.dict_analysis[result[0]]['max_tokens'])): + if (self.cfg.getbool('compare', 'STRICT_LENGTH_CHECK') == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING') or len(result) > self.dict_analysis[result[0]]['max_tokens'])): if (self.debug): self.debug_info += 'STRICT_LENGTH_CHECK failed for '+result[0] + ': ' + str(self.dict_analysis[result[0]]['min_tokens']) + ' > ' + str(len(result)) + ' < ' + str(self.dict_analysis[result[0]]['max_tokens']) + '\n' match_results.append('') @@ -182,16 +185,16 @@ def validate_match_result(self, result, start, end, match_results): return match_results def load_plugins(self): - logging.info('checking for plugins...') + self.logger.info('checking for plugins...') pluginsfound = os.listdir(sopare.path.__plugindestination__) for plugin in pluginsfound: try: pluginpath = os.path.join(sopare.path.__plugindestination__, plugin) - logging.debug('loading and initialzing '+pluginpath) + self.logger.debug('loading and initialzing '+pluginpath) f, filename, description = imp.find_module('__init__', [pluginpath]) self.plugins.append(imp.load_module(plugin, f, filename, description)) except ImportError, err: - logging.error('ImportError: %s', err) + self.logger.error('ImportError: %s', err) def reset(self): self.last_results = None diff --git a/sopare/audio_factory.py b/sopare/audio_factory.py new file mode 100644 index 0000000..4b74c53 --- /dev/null +++ b/sopare/audio_factory.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) + +Licensed under the Apache License, Version 2.0 (the "License"); you may +not use this file except in compliance with the License. You may obtain +a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. +""" + +import pyaudio +import logging + +class audio_factory(): + + def __init__(self, cfg): + self.cfg = cfg + self.logger = self.cfg.getlogger().getlog() + self.logger = logging.getLogger(__name__) + self.stream = None + self.pa = pyaudio.PyAudio() + self.debug_once = False + + def open(self, sample_rate, input_format=pyaudio.paInt16): + if (self.debug_once == False): + self.logger.debug('#### Default input device info #####') + for k, v in self.pa.get_default_input_device_info().iteritems(): + self.logger.debug(str(k) + ': ' + str(v)) + self.debug_once = True + try: + self.stream = self.pa.open(format = input_format, + channels = 1, # mono + rate=sample_rate, + input=True, + output=False, + frames_per_buffer = self.cfg.getintoption('stream', 'CHUNK')) + except IOError as e: + self.logger.error("Error: " + str(e)) + return None + return self.stream + + def close(self): + if (self.stream != None): + try: + self.stream.stop_stream() + self.stream.close() + except IOError as e: + self.logger.error("Error: " + str(e)) + + def terminate(self): + self.pa.terminate() diff --git a/sopare/buffering.py b/sopare/buffering.py index cc882c8..241dd74 100644 --- a/sopare/buffering.py +++ b/sopare/buffering.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -20,34 +20,32 @@ import multiprocessing import logging import sopare.processing -import sopare.hatch class buffering(multiprocessing.Process): - def __init__(self, hatch, queue): + def __init__(self, cfg, queue): multiprocessing.Process.__init__(self, name="buffering queue") - self.hatch = hatch + self.cfg = cfg self.queue = queue - self.proc = sopare.processing.processor(hatch, self) + self.proc = sopare.processing.processor(self.cfg, self) self.PROCESS_ROUND_DONE = False self.test_counter = 0 - self.logger = self.hatch.get('logger').getlog() + self.logger = self.cfg.getlogger().getlog() self.logger = logging.getLogger(__name__) self.start() - + def run(self): self.logger.info("buffering queue runner") while True: buf = self.queue.get() - if ((self.hatch.get('endless_loop') == False or self.hatch.get('outfile') != None) and self.PROCESS_ROUND_DONE): + if ((self.cfg.getbool('cmdlopt', 'endless_loop') == False or self.cfg.getoption('cmdlopt', 'outfile') != None) and self.PROCESS_ROUND_DONE): break self.proc.check_silence(buf) self.logger.info("terminating queue runner") def flush(self, message): self.proc.stop(message) - + def stop(self): self.logger.info("stop buffering") self.PROCESS_ROUND_DONE = True - diff --git a/sopare/characteristics.py b/sopare/characteristics.py index b353d6a..bc2f083 100644 --- a/sopare/characteristics.py +++ b/sopare/characteristics.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -18,13 +18,11 @@ """ import numpy -import sopare.config -import sopare.hatch class characteristic: - def __init__(self, hatch): - self.hatch = hatch + def __init__(self, peak_factor): + self.peak_factor = peak_factor def getcharacteristic(self, fft, chunked_norm, meta): fft = numpy.abs(fft) @@ -33,7 +31,7 @@ def getcharacteristic(self, fft, chunked_norm, meta): fc = 0 peaks = [ ] if (len(chunked_norm) > 0): - where_range = numpy.mean(chunked_norm) / sopare.config.PEAK_FACTOR + where_range = numpy.mean(chunked_norm) / self.peak_factor peaks = list(numpy.array(numpy.where(chunked_norm > where_range))[0]) where_range = numpy.mean(chunked_norm) npeaks = numpy.array(numpy.where(chunked_norm > where_range)) diff --git a/sopare/comparator.py b/sopare/comparator.py index f17c7b3..20876ac 100644 --- a/sopare/comparator.py +++ b/sopare/comparator.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -40,7 +40,7 @@ def word(self, characteristics): self.results[id] = [ ] self.create_structure() self.fill_structure(characteristics[ll]) - + def create_structure(self): for id in self.dict_analysis: self.results[id].append([ ]) diff --git a/sopare/config.py b/sopare/config.py index a00b166..4df7d60 100644 --- a/sopare/config.py +++ b/sopare/config.py @@ -1,118 +1,61 @@ -######################################################### -# Stream prep and silence configuration options ######### -######################################################### +#!/usr/bin/env python +# -*- coding: utf-8 -*- -# Read chunk size -CHUNK = 512 +""" +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) -# Sample rate -SAMPLE_RATE = 48000 +Licensed under the Apache License, Version 2.0 (the "License"); you may +not use this file except in compliance with the License. You may obtain +a copy of the License at -# Volume threshold when audio processing starts / silence -THRESHOLD = 400 + http://www.apache.org/licenses/LICENSE-2.0 -# Silence time in seconds when analysis is called -MAX_SILENCE_AFTER_START = 1.4 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. +""" -# Time in seconds after the analysis is forced -MAX_TIME = 2.4 +import ConfigParser -# Start the analysis after reaching LONG_SILENCE -LONG_SILENCE = 30 +class config(): -# Characteristic length -CHUNKS = 1024*3 + def __init__(self, config_file = 'config/default.ini'): + self.config = ConfigParser.ConfigParser(allow_no_value=True) + self.config.read(config_file) + self.logger = None + def getoption(self, section, option): + return self.config.get(section, option) -######################################################### -# Characteristic configuration options ################## -######################################################### + def getfloatoption(self, section, option): + return self.config.getfloat(section, option) -# Steps boil down the data into smaller chunks of data. -# Smaller steps mean more precision but require -# normally more learned entries in the dictionary. -# Progressive value is used if you want to pack not -# so relevant frequencies -PROGRESSIVE_FACTOR = 0 -START_PROGRESSIVE_FACTOR = 600 -MIN_PROGRESSIVE_STEP = 10 -MAX_PROGRESSIVE_STEP = 10 + def getintoption(self, section, option): + return self.config.getint(section, option) -# Specifies freq ranges that are kept for further -# analysis. Freq outside of the ranges are set to zero. -# Human language can be found between 20 and 5000. -LOW_FREQ = 20 -HIGH_FREQ = 600 + def getbool(self, section, option): + return self.config.getboolean(section, option) -# Make use of Hann window function -HANNING = True + def addsection(self, section): + self.config.add_section(section) -# Range factor for peaks -PEAK_FACTOR = 0.8 + def setoption(self, section, id, option): + self.config.set(section, id, option) + def hasoption(self, section, option): + return self.config.has_option(section, option) + def addlogger(self, logger): + self.logger = logger -######################################################### -# Compare configuration options ######################### -######################################################### + def getlogger(self): + return self.logger -# Min. number of tokens to identify the beginning of a word -MIN_START_TOKENS = 4 - -# Min. value for potential beginning of a word -MARGINAL_VALUE = 0.7 - -# Minimal similarity across all comparison to -# identify a complete word across all tokens -MIN_CROSS_SIMILARITY = 0.8 - -# Calculation basis or token/word comparison -SIMILARITY_NORM = 0.6 -SIMILARITY_HEIGHT = 0.4 -SIMILARITY_DOMINANT_FREQUENCY = 0 - -# Number of best matches to consider. -# Value must be > 0 -# If not specified or value < 1 value is set to 1 -NUMBER_OF_BEST_MATCHES = 2 - -# Min. distance to keep a word -MIN_LEFT_DISTANCE = 0.5 -MIN_RIGHT_DISTANCE = 0.4 - -# Use given number as results to assembly result -# 0 for all predictions -MAX_WORD_START_RESULTS = 2 -MAX_TOP_RESULTS = 3 - -# Enable or disable strict length check for words -STRICT_LENGTH_CHECK = True -# Value to soften the strict length check a bit to still -# get quite precise results but to be less strict -STRICT_LENGTH_UNDERMINING = 2 - -# Short term memory retention time in seconds. Zero to disable STM -STM_RETENTION = 0.8 - -# Fill result percentage -# 0.5 means that half of the values can by empty to still get valid results -# A lower value should theoretically avoid false positives -FILL_RESULT_PERCENTAGE = 0.1 - - - -######################################################### -# Misc configuration options ############################ -######################################################### - -# Loglevel -import logging -LOGLEVEL = logging.ERROR - - -######################################################### -# Experimental configuration options #################### -######################################################### - -# Additional FFT analysis and comparison for CHUNKS/2 length -FFT_SHIFT = False + def showconfig(self): + print ('current config:') + for section in self.config.sections(): + print (str(section)) + for option in self.config.options(section): + print (' ' + str(option) + ' = ' + str(self.getoption(section, option))) diff --git a/sopare/filter.py b/sopare/filter.py index d80adb9..dcacc2e 100644 --- a/sopare/filter.py +++ b/sopare/filter.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -21,22 +21,20 @@ import logging import numpy import sopare.worker -import sopare.config import sopare.characteristics -import sopare.hatch class filtering(): - def __init__(self, hatch): - self.hatch = hatch + def __init__(self, cfg): + self.cfg = cfg self.first = True self.queue = multiprocessing.Queue() - self.characteristic = sopare.characteristics.characteristic(self.hatch) - self.worker = sopare.worker.worker(self.hatch, self.queue) + self.characteristic = sopare.characteristics.characteristic(self.cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) + self.worker = sopare.worker.worker(self.cfg, self.queue) self.data_shift = [ ] self.last_data = None self.data_shift_counter = 0 - self.logger = self.hatch.get('logger').getlog() + self.logger = self.cfg.getlogger().getlog() self.logger = logging.getLogger(__name__) def stop(self): @@ -54,17 +52,16 @@ def check_for_windowing(meta): return True return False - @staticmethod - def get_chunked_norm(nfft): + def get_chunked_norm(self, nfft): chunked_norm = [ ] progessive = 1 - i = sopare.config.MIN_PROGRESSIVE_STEP + i = self.cfg.getintoption('characteristic', 'MIN_PROGRESSIVE_STEP') for x in range(0, nfft.size, i): - if (hasattr(sopare.config, 'START_PROGRESSIVE_FACTOR') and x >= sopare.config.START_PROGRESSIVE_FACTOR): - progessive += progessive * sopare.config.PROGRESSIVE_FACTOR + if (self.cfg.hasoption('characteristic', 'START_PROGRESSIVE_FACTOR') and x >= self.cfg.getfloatoption('characteristic', 'START_PROGRESSIVE_FACTOR')): + progessive += progessive * pf i += int(progessive) - if (i > sopare.config.MAX_PROGRESSIVE_STEP): - i = sopare.config.MAX_PROGRESSIVE_STEP + if (i > self.cfg.getintoption('characteristic', 'MAX_PROGRESSIVE_STEP')): + i = self.cfg.getintoption('characteristic', 'MAX_PROGRESSIVE_STEP') chunked_norm.append( nfft[x:x+i].sum() ) return numpy.array(chunked_norm) @@ -80,7 +77,7 @@ def n_shift(self, data): self.data_shift = [ ] self.data_shift_counter = 0 if (self.data_shift_counter == 0): - self.data_shift = [ v for v in range(0, sopare.config.CHUNKS/2) ] + self.data_shift = [ v for v in range(0, self.cfg.getintoption('stream', 'CHUNKS')/2) ] self.data_shift.extend(data[len(data)/2:]) elif (self.data_shift_counter == 1): self.data_shift = self.data_shift[len(self.data_shift)/2:] @@ -91,13 +88,13 @@ def n_shift(self, data): self.last_data = data self.data_shift_counter += 1 - + def filter(self, data, meta): self.n_shift(data) shift_fft = None - if (self.first == False or sopare.config.HANNING == False or len(data) < sopare.config.CHUNK): + if (self.first == False or self.cfg.getbool('characteristic', 'HANNING') == False or len(data) < self.cfg.getintoption('stream', 'CHUNKS')): fft = numpy.fft.rfft(data) - if (len(self.data_shift) >= sopare.config.CHUNKS): + if (len(self.data_shift) >= self.cfg.getintoption('stream', 'CHUNKS')): shift_fft = numpy.fft.rfft(self.data_shift) self.first = self.check_for_windowing(meta) elif (self.first == True): @@ -107,17 +104,17 @@ def filter(self, data, meta): hl += 1 hw = numpy.hanning(hl) fft = numpy.fft.rfft(data * hw) - if (len(self.data_shift) >= sopare.config.CHUNKS): + if (len(self.data_shift) >= self.cfg.getintoption('stream', 'CHUNKS')): hl = len(self.data_shift) if (hl % 2 != 0): hl += 1 hw = numpy.hanning(hl) shift_fft = numpy.fft.rfft(self.data_shift * hw) self.first = False - fft[sopare.config.HIGH_FREQ:] = 0 - fft[:sopare.config.LOW_FREQ] = 0 + fft[self.cfg.getintoption('characteristic', 'HIGH_FREQ'):] = 0 + fft[:self.cfg.getintoption('characteristic', 'LOW_FREQ')] = 0 data = numpy.fft.irfft(fft) - nfft = fft[sopare.config.LOW_FREQ:sopare.config.HIGH_FREQ] + nfft = fft[self.cfg.getintoption('characteristic', 'LOW_FREQ'):self.cfg.getintoption('characteristic', 'HIGH_FREQ')] nfft = numpy.abs(nfft) nfft[nfft == 0] = numpy.NaN nfft = numpy.log10(nfft)**2 @@ -130,11 +127,11 @@ def filter(self, data, meta): normalized = self.normalize(chunked_norm) characteristic = self.characteristic.getcharacteristic(fft, normalized, meta) - if (shift_fft != None and (hasattr(sopare.config, 'FFT_SHIFT') and sopare.config.FFT_SHIFT == True)): - shift_fft[sopare.config.HIGH_FREQ:] = 0 - shift_fft[:sopare.config.LOW_FREQ] = 0 + if ((shift_fft is not None) and self.cfg.hasoption('experimental', 'FFT_SHIFT') and self.cfg.getbool('experimental', 'FFT_SHIFT') == True): + shift_fft[self.cfg.getintoption('characteristic', 'HIGH_FREQ'):] = 0 + shift_fft[:self.cfg.getintoption('characteristic', 'LOW_FREQ')] = 0 shift_data = numpy.fft.irfft(shift_fft) - shift_nfft = fft[sopare.config.LOW_FREQ:sopare.config.HIGH_FREQ] + shift_nfft = fft[self.cfg.getintoption('characteristic', 'LOW_FREQ'):self.cfg.getintoption('characteristic', 'HIGH_FREQ')] shift_nfft = numpy.abs(nfft) shift_nfft[nfft == 0] = numpy.NaN shift_nfft = numpy.log10(nfft)**2 @@ -147,7 +144,7 @@ def filter(self, data, meta): shift_normalized = self.normalize(shift_chunked_norm) # TODO: Do some shift meta magic! shift_characteristic = self.characteristic.getcharacteristic(shift_fft, shift_normalized, meta) - characteristic['shift'] = shift_characteristic + characteristic['shift'] = shift_characteristic obj = { 'action': 'data', 'token': data, 'fft': fft, 'norm': normalized, 'meta': meta, 'characteristic': characteristic } self.queue.put(obj) diff --git a/sopare/log.py b/sopare/log.py index 40b7cd0..f83522e 100644 --- a/sopare/log.py +++ b/sopare/log.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -18,25 +18,26 @@ """ import logging -import sopare.config class log(): - def __init__(self, debug, error): + def __init__(self, debug, error, cfg = None): + if (error == True): + logging.basicConfig(filename='error.log', filemode='a', loglevel='ERROR') + else: + logging.basicConfig() self.logger = logging.getLogger() self.logformat = '%(levelname)s: %(message)s' - self.loglevel = logging.ERROR - if (hasattr(sopare.config, 'LOGFORMAT')): - self.logformat = sopare.config.LOGFORMAT - if (debug == True): - self.loglevel = logging.DEBUG - elif (hasattr(sopare.config, 'LOGLEVEL')): - self.loglevel = sopare.config.LOGLEVEL + self.loglevel = 'ERROR' + if (error == False and cfg != None and cfg.hasoption('misc', 'LOGLEVEL')): + check = cfg.getoption('misc', 'LOGLEVEL') + if (check != ''): + self.loglevel = check + if (error == False and debug == True): + self.loglevel = 'DEBUG' self.logger.setLevel(self.loglevel) ch = logging.StreamHandler() ch.setFormatter(self.logformat) - if (error == True): - logging.basicConfig(filename='error.log', filemode='a', loglevel=self.loglevel) def getlog(self): return self.logger diff --git a/sopare/numpyjsonencoder.py b/sopare/numpyjsonencoder.py index c6befba..2b4f6c9 100644 --- a/sopare/numpyjsonencoder.py +++ b/sopare/numpyjsonencoder.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain diff --git a/sopare/prepare.py b/sopare/prepare.py index 6887d5c..303df97 100644 --- a/sopare/prepare.py +++ b/sopare/prepare.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -21,16 +21,14 @@ import filter import sopare.visual import sopare.util -import sopare.config -import sopare.hatch class preparing(): - def __init__(self, hatch): - self.hatch = hatch + def __init__(self, cfg): + self.cfg = cfg self.visual = sopare.visual.visual() - self.util = sopare.util.util(self.hatch.get('debug')) - self.filter = sopare.filter.filtering(self.hatch) + self.util = sopare.util.util(self.cfg.getbool('cmdlopt', 'debug'), self.cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) + self.filter = sopare.filter.filtering(self.cfg) self.silence = 0 self.force = False self.counter = 0 @@ -65,11 +63,11 @@ def valid_token(self, meta): return True def stop(self): - if (self.hatch.get('plot') == True): - self.visual.create_sample(self.hatch.get_plot_cache(), 'sample.png') + if (self.cfg.getbool('cmdlopt', 'plot') == True): + self.visual.create_sample(self.visual.get_plot_cache(), 'sample.png') self.tokenize([{ 'token': 'stop' }]) - self.filter.stop() self.filter_reset() + self.filter.stop() self.reset() def reset(self): @@ -91,11 +89,11 @@ def filter_reset(self): def force_tokenizer(self): self.force = True self.tokenize([ { 'token': 'start analysis', 'silence': self.silence, 'pos': self.counter, 'adapting': 0, 'volume': 0, 'peaks': self.peaks } ]) - + def prepare(self, buf, volume): data = numpy.fromstring(buf, dtype=numpy.int16) - if (self.hatch.get('plot') == True and self.hatch.get('endless_loop') == False): - self.hatch.extend_plot_cache(data) + if (self.cfg.getbool('cmdlopt', 'plot') == True and self.cfg.getbool('cmdlopt', 'endless_loop') == False): + self.visual.extend_plot_cache(data) self.buffer.extend(data) self.counter += 1 abs_data = abs(data) @@ -103,21 +101,21 @@ def prepare(self, buf, volume): self.token_peaks.append(adaptive) meta = [ ] - if (volume < sopare.config.THRESHOLD): + if (volume < self.cfg.getintoption('stream', 'THRESHOLD')): self.silence += 1 - if (self.silence == sopare.config.LONG_SILENCE): + if (self.silence == self.cfg.getintoption('stream', 'LONG_SILENCE')): self.new_word = True self.entered_silence = True self.peaks.extend(self.token_peaks) meta.append({ 'token': 'start analysis', 'silence': self.silence, 'pos': self.counter, 'adapting': adaptive, 'volume': volume, 'token_peaks': self.token_peaks, 'peaks': self.peaks }) self.peaks = [ ] - elif (self.silence > sopare.config.LONG_SILENCE): + elif (self.silence > self.cfg.getintoption('stream', 'LONG_SILENCE')): meta.append({ 'token': 'noop', 'silence': self.silence, 'pos': self.counter, 'adapting': adaptive, 'volume': volume }) else: self.entered_silence = False self.silence = 0 - if (len(self.buffer) == sopare.config.CHUNKS): + if (len(self.buffer) == self.cfg.getintoption('stream', 'CHUNKS')): self.new_token = True meta.append({ 'token': 'token', 'silence': self.silence, 'pos': self.counter, 'adapting': adaptive, 'volume': volume, 'token_peaks': self.token_peaks }) diff --git a/sopare/processing.py b/sopare/processing.py index 08ca48d..6bff051 100644 --- a/sopare/processing.py +++ b/sopare/processing.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -22,24 +22,22 @@ import prepare import time import io -import sopare.config -import sopare.hatch class processor(): - def __init__(self, hatch, buffering, live = True): + def __init__(self, cfg, buffering, live = True): self.append = False - self.hatch = hatch + self.cfg = cfg self.out = None - if (self.hatch.get('outfile') != None): - self.out = io.open(self.hatch.get('outfile'), 'wb') + if (self.cfg.getoption('cmdlopt', 'outfile') != None): + self.out = io.open(self.cfg.getoption('cmdlopt', 'outfile'), 'wb') self.buffering = buffering self.live = live self.timer = 0 self.silence_timer = 0 self.silence_buffer = [ ] - self.prepare = prepare.preparing(self.hatch) - self.logger = self.hatch.get('logger').getlog() + self.prepare = prepare.preparing(self.cfg) + self.logger = self.cfg.getlogger().getlog() self.logger = logging.getLogger(__name__) def stop(self, message): @@ -48,7 +46,7 @@ def stop(self, message): self.out.close() self.append = False self.silence_timer = 0 - if (self.hatch.get('endless_loop') == False): + if (self.cfg.getbool('cmdlopt', 'endless_loop') == False): self.prepare.stop() else: self.prepare.force_tokenizer() @@ -57,7 +55,7 @@ def stop(self, message): def check_silence(self, buf): volume = audioop.rms(buf, 2) - if (volume >= sopare.config.THRESHOLD): + if (volume >= self.cfg.getintoption('stream', 'THRESHOLD')): self.silence_timer = time.time() if (self.append == False): self.logger.info('starting append mode') @@ -75,9 +73,9 @@ def check_silence(self, buf): if (self.append == True): self.prepare.prepare(buf, volume) if (self.append == True and self.silence_timer > 0 - and self.silence_timer + sopare.config.MAX_SILENCE_AFTER_START < time.time() + and self.silence_timer + self.cfg.getfloatoption('stream', 'MAX_SILENCE_AFTER_START') < time.time() and self.live == True): self.stop("stop append mode because of silence") - if (self.append == True and self.timer + sopare.config.MAX_TIME < time.time() + if (self.append == True and self.timer + self.cfg.getfloatoption('stream', 'MAX_TIME') < time.time() and self.live == True): self.stop("stop append mode because time is up") diff --git a/sopare/recorder.py b/sopare/recorder.py index d185edf..db48367 100644 --- a/sopare/recorder.py +++ b/sopare/recorder.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -24,66 +24,49 @@ import time import sys import io +import sopare.audio_factory import sopare.buffering -import sopare.config -import sopare.hatch import sopare.visual class recorder(): - def __init__(self, hatch): - self.hatch = hatch - self.FORMAT = pyaudio.paInt16 - # mono - self.CHANNELS = 1 - self.pa = pyaudio.PyAudio() + def __init__(self, cfg): + self.cfg = cfg + self.audio_factory = sopare.audio_factory.audio_factory(cfg) self.queue = multiprocessing.JoinableQueue() self.running = True self.visual = sopare.visual.visual() - - # logging ################### - self.logger = self.hatch.get('logger').getlog() + self.logger = self.cfg.getlogger().getlog() self.logger = logging.getLogger(__name__) + self.buffering = sopare.buffering.buffering(self.cfg, self.queue) - self.stream = self.pa.open(format = self.FORMAT, - channels = self.CHANNELS, - rate=sopare.config.SAMPLE_RATE, - input=True, - output=False, - frames_per_buffer = sopare.config.CHUNK) - - self.buffering = sopare.buffering.buffering(self.hatch, self.queue) - if (hatch.get('infile') == None): + if (self.cfg.getoption('cmdlopt', 'infile') == None): self.recording() else: self.readfromfile() def debug_info(self): - defaultCapability = self.pa.get_default_host_api_info() - self.logger.debug(str(defaultCapability)) - self.logger.debug('SAMPLE_RATE: '+str(sopare.config.SAMPLE_RATE)) - self.logger.debug('CHUNK: '+str(sopare.config.CHUNK)) - - + self.logger.debug('SAMPLE_RATE: '+str(self.cfg.getintoption('stream', 'SAMPLE_RATE'))) + self.logger.debug('CHUNK: '+str(self.cfg.getintoption('stream', 'CHUNK'))) def readfromfile(self): self.debug_info() - self.logger.info("* reading file " + self.hatch.get('infile')) - file = io.open(self.hatch.get('infile'), 'rb', buffering = sopare.config.CHUNK) + self.logger.info("* reading file " + self.cfg.getoption('cmdlopt', 'infile')) + file = io.open(self.cfg.getoption('cmdlopt', 'infile'), 'rb', buffering = self.cfg.getintoption('stream', 'CHUNK')) while True: - buf = file.read(sopare.config.CHUNK * 2) + buf = file.read(self.cfg.getintoption('stream', 'CHUNK') * 2) if buf: self.queue.put(buf) - if (self.hatch.get('plot') == True): + if (self.cfg.getbool('cmdlopt', 'plot') == True): data = numpy.fromstring(buf, dtype=numpy.int16) - self.hatch.extend_plot_cache(data) + self.visual.extend_plot_cache(data) else: self.queue.close() break file.close() once = False - if (self.hatch.get('plot') == True): - self.visual.create_sample(self.hatch.get_plot_cache(), 'sample.png') + if (self.cfg.getbool('cmdlopt', 'plot') == True): + self.visual.create_sample(self.visual.get_plot_cache(), 'sample.png') while (self.queue.qsize() > 0): if (once == False): self.logger.debug('waiting for queue to finish...') @@ -96,12 +79,13 @@ def readfromfile(self): sys.exit() def recording(self): + self.stream = self.audio_factory.open(self.cfg.getintoption('stream', 'SAMPLE_RATE')) self.debug_info() self.logger.info("start endless recording") while self.running: try: if (self.buffering.is_alive()): - buf = self.stream.read(sopare.config.CHUNK) + buf = self.stream.read(self.cfg.getintoption('stream', 'CHUNK')) self.queue.put(buf) else: self.logger.info("Buffering not alive, stop recording") @@ -109,7 +93,6 @@ def recording(self): break except IOError as e: self.logger.warning("stream read error "+str(e)) - self.stop() sys.exit() @@ -121,6 +104,5 @@ def stop(self): self.buffering.terminate() except: pass - self.stream.stop_stream() - self.stream.close() - self.pa.terminate() + self.audio_factory.close() + self.audio_factory.terminate() diff --git a/sopare/stm.py b/sopare/stm.py index 2865e3b..02f020c 100644 --- a/sopare/stm.py +++ b/sopare/stm.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -19,12 +19,13 @@ import time import logging -import sopare.config class short_term_memory(): - def __init__(self, debug): - self.debug = debug + def __init__(self, cfg): + self.cfg = cfg + self.debug = self.cfg.getbool('cmdlopt', 'debug') + self.last_debug_info = '' self.last_results = [ ] self.last_time = 0 @@ -33,13 +34,18 @@ def get_stm_results(self, results): stm_results.extend(results) return stm_results - def get_results(self, results): + def get_stm_debug_info(self, debug_info): + return self.last_debug_info + debug_info + + def get_results(self, results, debug_info): if (results == None or len(results) == 0): - return results + return results, debug_info if (time.time() < self.last_time): logging.debug('stm input: ' + str(results) + ' ' + str(self.last_results)) results = self.get_stm_results(results) + debug_info = self.get_stm_debug_info(debug_info) logging.debug('stm mnodification: ' + str(results)) self.last_results = results - self.last_time = time.time() + sopare.config.STM_RETENTION - return results + self.last_debug_info = debug_info + self.last_time = time.time() + self.cfg.getfloatoption('compare', 'STM_RETENTION') + return results, debug_info diff --git a/sopare/util.py b/sopare/util.py index 3a4532e..a91af10 100644 --- a/sopare/util.py +++ b/sopare/util.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -31,9 +31,9 @@ class util: - def __init__(self, debug): + def __init__(self, debug, peak_factor): self.debug = debug - self.characteristic = sopare.characteristics.characteristic(debug) + self.characteristic = sopare.characteristics.characteristic(peak_factor) self.cache = { } def showdictentriesbyid(self): @@ -118,7 +118,7 @@ def prepare_dict_model(characteristics): for o in characteristics: characteristic, meta = o for m in meta: - token = m['token'] + token = m['token'] if (token != 'stop'): if (characteristic != None): tokens.append(characteristic) @@ -200,13 +200,13 @@ def saverawwave(filename, start, end, raw): wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(44100) - data = raw[start:end] + data = raw[start:end] wf.writeframes(b''.join(data)) @staticmethod def savefilteredwave(filename, buffer): scaled = numpy.int16(buffer/numpy.max(numpy.abs(buffer)) * 32767) - write(__wavedestination__+filename+'.wav', 44100, scaled) + write(__wavedestination__+filename+'.wav', 44100, scaled) @staticmethod def manhatten_distance(arr1, arr2): diff --git a/sopare/version.py b/sopare/version.py index 6117d71..3739f6e 100644 --- a/sopare/version.py +++ b/sopare/version.py @@ -1,2 +1,2 @@ -__version__ = "1.4.0" +__version__ = "1.5.0" diff --git a/sopare/visual.py b/sopare/visual.py index a02bc0f..ba6427e 100644 --- a/sopare/visual.py +++ b/sopare/visual.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -25,10 +25,15 @@ class visual: def __init__(self): - self.init = 1 + self.plot_cache = [ ] def create_sample(self, data, filename): pyplot.plot(data) pyplot.savefig(__plotdestination__+filename) pyplot.clf() - + + def extend_plot_cache(self, data): + self.plot_cache.extend(data) + + def get_plot_cache(self): + return self.plot_cache diff --git a/sopare/worker.py b/sopare/worker.py index 0d98d6a..79f3d0f 100644 --- a/sopare/worker.py +++ b/sopare/worker.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) +Copyright (C) 2015 - 2018 Martin Kauss (yo@bishoph.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain @@ -17,7 +17,7 @@ under the License. """ -import multiprocessing +import multiprocessing import logging import uuid import sopare.util @@ -25,21 +25,19 @@ import sopare.analyze import sopare.characteristics import sopare.comparator -import sopare.config -import sopare.hatch class worker(multiprocessing.Process): - def __init__(self, hatch, queue): + def __init__(self, cfg, queue): multiprocessing.Process.__init__(self, name="worker for filtered data") - self.hatch = hatch + self.cfg = cfg self.queue = queue self.visual = sopare.visual.visual() - self.util = sopare.util.util(self.hatch.get('debug')) - self.logger = self.hatch.get('logger').getlog() + self.util = sopare.util.util(self.cfg.getbool('cmdlopt', 'debug'), self.cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) + self.logger = self.cfg.getlogger().getlog() self.logger = logging.getLogger(__name__) - self.analyze = sopare.analyze.analyze(self.hatch.get('debug')) - self.compare = sopare.comparator.compare(self.hatch.get('debug'), self.util) + self.analyze = sopare.analyze.analyze(self.cfg) + self.compare = sopare.comparator.compare(self.cfg.getbool('cmdlopt', 'debug'), self.util) self.running = True self.counter = 0 self.plot_counter = 0 @@ -56,7 +54,7 @@ def __init__(self, hatch, queue): def reset(self): self.counter = 0 - if (self.hatch.get('wave') == True and len(self.rawbuf) > 0): + if (self.cfg.getbool('cmdlopt', 'wave') == True and len(self.rawbuf) > 0): self.save_wave_buf() self.rawbuf = [ ] self.raw = [ ] @@ -73,7 +71,7 @@ def save_wave_buf(self): self.util.savefilteredwave('filtered_results'+str(self.reset_counter), self.rawbuf) def remove_silence(self, m): - silence = ((sopare.config.LONG_SILENCE * sopare.config.CHUNK) / 4096) - 4 # TODO: Find auto value or make configurable + silence = ((self.cfg.getintoption('stream', 'LONG_SILENCE') * self.cfg.getintoption('stream', 'CHUNK')) / 4096) - 4 # TODO: Find auto value or make configurable if (silence < 0): silence = 0 for x in range(len(self.character) - 1, len(self.character) - silence, -1): @@ -90,29 +88,29 @@ def run(self): obj = self.queue.get() if (obj['action'] == 'data'): raw_token = obj['token'] - if (self.hatch.get('wave') == True or True): # TODO: "or True" is just temporary for testing. Must be removed later on! + if (self.cfg.getbool('cmdlopt', 'wave') == True or True): # TODO: "or True" is just temporary for testing. Must be removed later on! self.rawbuf.extend(raw_token) fft = obj['fft'] - if (self.hatch.get('plot') == True): + if (self.cfg.getbool('cmdlopt', 'plot') == True): self.rawfft.extend(fft) meta = obj['meta'] norm = obj['norm'] characteristic = obj['characteristic'] self.character.append((characteristic, meta)) self.compare.word(self.character) - if (self.hatch.get('dict') != None): + if (self.cfg.getoption('cmdlopt', 'dict') != None): self.raw_character.append({ 'fft': fft, 'norm': norm, 'meta': meta }) if (characteristic != None): self.logger.debug('characteristic = ' + str(self.counter) + ' ' + str(characteristic)) self.logger.debug('meta = '+str(meta)) - if (self.hatch.get('wave') == True): + if (self.cfg.getbool('cmdlopt', 'wave') == True): self.util.savefilteredwave('token'+str(self.counter)+self.uid, raw_token) - if (self.hatch.get('plot') == True and self.plot_counter < 6): + if (self.cfg.getbool('cmdlopt', 'plot') == True and self.plot_counter < 6): self.visual.create_sample(characteristic['norm'], 'norm'+str(self.plot_counter)+'.png') self.visual.create_sample(fft, 'fft'+str(self.plot_counter)+'.png') self.plot_counter += 1 self.counter += 1 - elif (obj['action'] == 'reset' and self.hatch.get('dict') == None): + elif (obj['action'] == 'reset' and self.cfg.getoption('cmdlopt', 'dict') == None): self.reset() elif (obj['action'] == 'stop'): self.running = False @@ -121,17 +119,17 @@ def run(self): for m in meta: if (m['token'] == 'start analysis'): self.remove_silence(m) - if (self.hatch.get('dict') == None): + if (self.cfg.getoption('cmdlopt', 'dict') == None): self.analyze.do_analysis(self.compare.get_results(), self.character, self.rawbuf) else: - self.util.store_raw_dict_entry(self.hatch.get('dict'), self.raw_character) + self.util.store_raw_dict_entry(self.cfg.getoption('cmdlopt', 'dict'), self.raw_character) self.reset() - if (self.hatch.get('wave') == True and len(self.rawbuf) > 0): + if (self.cfg.getbool('cmdlopt', 'wave') == True and len(self.rawbuf) > 0): self.save_wave_buf() self.queue.close() - if (self.hatch.get('plot') == True): + if (self.cfg.getbool('cmdlopt', 'plot') == True): self.visual.create_sample(self.rawfft, 'fft.png') diff --git a/test/test_analyze.py b/test/test_analyze.py index 9cbee81..d8c904d 100644 --- a/test/test_analyze.py +++ b/test/test_analyze.py @@ -19,15 +19,15 @@ import unittest import sopare.util as util -import sopare.config as config import sopare.analyze as analyze class test_analyze(unittest.TestCase): - def __init__(self, debug): + def __init__(self, debug, cfg): print ('analyze test preparation...') - self.util = util.util(debug) - self.analyze = analyze.analyze(debug) + self.cfg = cfg + self.util = util.util(debug, cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) + self.analyze = analyze.analyze(cfg) self.test_dict = self.create_test_dict() self.dict_analysis = self.analyze.prepare_test_analysis(self.test_dict) self.test_analyze_get_match() @@ -39,7 +39,6 @@ def create_test_dict(self): def test_analyze_get_match(self): print ('testing analyze get_match...') - # Normal conditions for t in range(1,4): test_framing, correct_object = self.create_test_framing(t) @@ -50,7 +49,7 @@ def test_analyze_get_match(self): # Testing leading empty results test_framing, correct_object = self.create_test_framing(t) test_framing.insert(0, '') - config.FILL_RESULT_PERCENTAGE = 0.1 + self.cfg.setoption('compare', 'FILL_RESULT_PERCENTAGE', '0.1') result = self.analyze.get_match(test_framing) print ('testing leading space '+str(result) + ' == ' + str(correct_object)) self.assertSequenceEqual(result, correct_object, 'test_analyze_get_match leading results failed!') @@ -69,7 +68,7 @@ def test_analyze_get_match(self): self.assertSequenceEqual(result, correct_object, 'test_analyze_get_match order results failed!') # Testing strict length - config.STRICT_LENGTH_CHECK = True + self.cfg.setoption('compare' , 'STRICT_LENGTH_CHECK', 'True') #config.STRICT_LENGTH_UNDERMINING = 2 test_framing, correct_object = self.create_test_framing_order_strict_length() result = self.analyze.get_match(test_framing) @@ -77,12 +76,12 @@ def test_analyze_get_match(self): self.assertSequenceEqual(result, correct_object, 'test_analyze_get_match strict length results failed!') # Testing false leading results - config.STRICT_LENGTH_CHECK = True + self.cfg.setoption('compare' , 'STRICT_LENGTH_CHECK', 'True') test_framing, correct_object = self.create_test_framing_false_leading_results() result = self.analyze.get_match(test_framing) print ('testing false leading results '+str(result) + ' == ' + str(correct_object)) self.assertSequenceEqual(result, correct_object, 'test_analyze_get_match false leading results failed!') - + def create_test_framing(self, number): test_framing = [ ] correct_object = [ ] @@ -116,7 +115,7 @@ def create_test_framing_order_strict_length(self): single_frames.append(frame) test_framing.extend(single_frames[0]) test_framing.extend(single_frames[1]) - too_short = single_frames[0][0: len(single_frames[0]) - (config.STRICT_LENGTH_UNDERMINING + 1)] + too_short = single_frames[0][0: len(single_frames[0]) - (self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING') + 1)] test_framing.extend(too_short) test_framing.extend(single_frames[2]) correct_object.insert(2, '') diff --git a/test/test_audio.py b/test/test_audio.py index a4cbe6e..c2b1d5b 100644 --- a/test/test_audio.py +++ b/test/test_audio.py @@ -22,6 +22,13 @@ import pyaudio import audioop import math +import time +import sys +import test_multi +sys.path.append('../sopare') +import sopare.log +import sopare.config +import sopare.audio_factory class test_audio(unittest.TestCase): @@ -35,25 +42,14 @@ def __init__(self): self.good_chunks = [ ] self.silence = [ ] self.stream = None - self.pa = pyaudio.PyAudio() - print ('\n\n##### Default input device info #####') - for k, v in self.pa.get_default_input_device_info().iteritems(): - print (str(k) + ': ' + str(v)) - print ('#####################################\n\n') - def open(self, sample_rate, chunk): - test_result = False - try: - self.stream = self.pa.open(format = pyaudio.paInt16, - channels = 1, - rate=sample_rate, - input=True, - output=False) - test_result = True - except IOError as e: - test_result = False - print ("Error: " + str(e)) - return test_result + cfg = sopare.config.config() + logger = sopare.log.log(True, False) + cfg.addlogger(logger) + + self.audio_factory = sopare.audio_factory.audio_factory(cfg) + self.queue = multiprocessing.JoinableQueue() + self.multi = test_multi.multi(self.queue) def read(self, chunks, loops): test_result = False @@ -61,6 +57,7 @@ def read(self, chunks, loops): try: for x in range(loops): buf = self.stream.read(chunks) + self.queue.put(buf) current_vol = audioop.rms(buf, 2) if (current_vol > vol): vol = current_vol @@ -78,27 +75,24 @@ def test_environment(self): def test_sample_rates(self): print ('testing different SAMPLE_RATEs ... this may take a while!\n\n') for test_sample_rate in test_audio.SAMPLE_RATES: - test_result = ta.open(test_sample_rate, test_sample_rate) - if (test_result == True): - self.good_sample_rates.append(test_sample_rate) + self.stream = self.audio_factory.open(test_sample_rate) if (self.stream != None): - self.stream.close() + self.good_sample_rates.append(test_sample_rate) + self.audio_factory.close() def test_chunks(self): print ('testing different CHUNK sizes ... this may take a while!\n\n') for good_sample_rate in self.good_sample_rates: for chunks in test_audio.CHUNKS: - test_result = ta.open(good_sample_rate, chunks) - if (test_result == True): + self.stream = self.audio_factory.open(good_sample_rate) + if (self.stream != None): if (good_sample_rate not in test_audio.TEST_RESULTS): test_audio.TEST_RESULTS[good_sample_rate] = [ ] read_test_result = ta.read(chunks, 10) if (read_test_result == True): self.good_chunks.append(chunks) test_audio.TEST_RESULTS[good_sample_rate].append(chunks) - - if (self.stream != None): - self.stream.close() + self.audio_factory.close() def test_results(self): recommendations = { } @@ -109,7 +103,7 @@ def test_results(self): found = True print ('\n\n') if (found == True): - best = sorted(recommendations, key=recommendations.__getitem__, reverse=True) + best = sorted(recommendations, key=recommendations.__getitem__, reverse = True) print ('Your sopare/config.py recommendations:\n') print ('SAMPLE_RATE = '+str(max(best))) print ('CHUNK = '+str(min(test_audio.TEST_RESULTS[best[0]]))) @@ -120,8 +114,19 @@ def test_results(self): print ('However, here are the sucessful tested sample rates:') print (str(test_audio.TEST_RESULTS)) + def stop(self): + while (self.queue.qsize() > 0): + time.sleep(.1) # wait for all threads to finish their work + self.queue.close() + self.multi.stop() + self.queue.join_thread() + self.audio_factory.close() + self.audio_factory.terminate() + sys.exit() + ta = test_audio() ta.test_environment() ta.test_sample_rates() ta.test_chunks() ta.test_results() +ta.stop() diff --git a/test/test_filter.py b/test/test_filter.py index c07bb0d..4f38f75 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -18,7 +18,6 @@ """ import unittest -import sopare.hatch as hatch import sopare.util as util import sopare.config as config import sopare.filter as filter @@ -26,15 +25,12 @@ class test_filter(unittest.TestCase): - def __init__(self, debug): + def __init__(self, debug, cfg): print ('filter test preparation...') - self.util = util.util(debug) - hatched = hatch.hatch() - hatched.add("debug", debug) - logger = log.log(debug, False) - hatched.add("logger", logger) - self.filter = filter.filtering(hatched) - config.CHUNKS = 10 + self.util = util.util(debug, cfg.getfloatoption('characteristic', 'PEAK_FACTOR')) + cfg.setoption('stream', 'CHUNKS', '10') + self.filter = filter.filtering(cfg) + self.CHUNKS = 10 self.test_filter_n_shift() print ('filter tests run successful.') self.filter.stop() @@ -42,13 +38,13 @@ def __init__(self, debug): def test_filter_n_shift(self): print ('testing filter n_shift...') data_object_array = [ v for v in range(0, 40) ] - for x in xrange(0, len(data_object_array), config.CHUNKS): - data_object = data_object_array[x:x+config.CHUNKS] + for x in xrange(0, len(data_object_array), self.CHUNKS): + data_object = data_object_array[x:x+self.CHUNKS] self.filter.n_shift(data_object) correct_object = [ ] if (x == 0): self.filter.first = False else: - correct_object = data_object_array[x-config.CHUNKS/2:x+config.CHUNKS/2] + correct_object = data_object_array[x-self.CHUNKS/2:x+self.CHUNKS/2] print ('testing n_shift '+str(self.filter.data_shift) + ' == ' + str(correct_object)) self.assertSequenceEqual(self.filter.data_shift, correct_object, 'test_filter_n_shift 0 failed!') diff --git a/sopare/hatch.py b/test/test_multi.py similarity index 53% rename from sopare/hatch.py rename to test/test_multi.py index 669b5be..bc3b6b4 100644 --- a/sopare/hatch.py +++ b/test/test_multi.py @@ -3,13 +3,10 @@ """ Copyright (C) 2015 - 2017 Martin Kauss (yo@bishoph.org) - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -17,22 +14,23 @@ under the License. """ -class hatch(): - - def __init__(self): - self.plot_cache = [ ] - self.key_value_store = { } - - def add(self, key, value): - self.key_value_store[key] = value - - def get(self, key): - if (key in self.key_value_store): - return self.key_value_store[key] - return None - - def extend_plot_cache(self, data): - self.plot_cache.extend(data) - - def get_plot_cache(self): - return self.plot_cache +import multiprocessing +import numpy + +class multi(multiprocessing.Process): + + def __init__(self, queue): + multiprocessing.Process.__init__(self, name="multiprocessing buffering queue") + self.queue = queue + self.running = True + self.start() + + def run(self): + while (self.running == True or self.queue.is_alive()): + buf = self.queue.get() + data = numpy.fromstring(buf, dtype=numpy.int16) + fft = numpy.fft.rfft(data) + + def stop(self): + self.running = False + self.terminate() diff --git a/test/unit_tests.py b/test/unit_tests.py index ffc4c8a..e2b6b8a 100644 --- a/test/unit_tests.py +++ b/test/unit_tests.py @@ -22,8 +22,8 @@ class unit_tests(): - def __init__(self, debug): + def __init__(self, debug, cfg): print ('starting analyze tests...') - test_analyze.test_analyze(debug) - test_filter.test_filter(debug) + test_analyze.test_analyze(debug, cfg) + test_filter.test_filter(debug, cfg) print ('unit_tests run successful!')