Merge branch 'testing'

Conflicts: sopare/filter.py
bishoph · Jan 28, 2018 · 873242e · 873242e
2 parents a033bf1 + 134f078
commit 873242e
Show file tree

Hide file tree

Showing 27 changed files with 548 additions and 406 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,4 +7,5 @@
 *.png
 error.log
 plugins/*
+config/*
 
diff --git a/NOTICE.txt b/NOTICE.txt
@@ -1,5 +1,5 @@
 SoPaRe
-Copyright 2015-2017
+Copyright 2015-2018
 
 This is the SOund PAttern REcognition project
 developed by Martin Kauss ([email protected]).

diff --git a/config/default.ini b/config/default.ini
@@ -0,0 +1,127 @@
+#########################################################
+# Stream prep and silence configuration options #########
+#########################################################
+
+[stream]
+
+# Read chunk size
+CHUNK = 512
+
+# Sample rate
+SAMPLE_RATE = 48000
+
+# Volume threshold when audio processing starts / silence
+THRESHOLD = 380
+
+# Silence time in seconds when analysis is called
+MAX_SILENCE_AFTER_START = 1.4
+
+# Time in seconds after the analysis is forced
+MAX_TIME = 2.4
+
+# Start the analysis after reaching LONG_SILENCE
+LONG_SILENCE = 20
+
+# Characteristic length
+CHUNKS = 3072
+
+
+#########################################################
+# Characteristic configuration options ##################
+#########################################################
+
+[characteristic]
+
+# Steps boil down the data into smaller chunks of data.
+# Smaller steps mean more precision but require
+# normally more learned entries in the dictionary.
+# Progressive value is used if you want to pack not
+# so relevant frequencies
+PROGRESSIVE_FACTOR = 0
+START_PROGRESSIVE_FACTOR = 600
+MIN_PROGRESSIVE_STEP = 5
+MAX_PROGRESSIVE_STEP = 5
+
+# Specifies freq ranges that are kept for further
+# analysis. Freq outside of the ranges are set to zero.
+# Human language can be found between 20 and 5000.
+LOW_FREQ = 20
+HIGH_FREQ = 600
+
+# Make use of Hann window function
+HANNING = true
+
+# Range factor for peaks
+PEAK_FACTOR = 0.7
+
+
+
+#########################################################
+# Compare configuration options #########################
+#########################################################
+
+[compare]
+
+# Min. number of tokens to identify the beginning of a word
+MIN_START_TOKENS = 3
+
+# Min. value for potential beginning of a word
+MARGINAL_VALUE = 0.7
+
+# Minimal similarity across all comparison to
+# identify a complete word across all tokens
+MIN_CROSS_SIMILARITY = 0.8
+
+# Calculation basis or token/word comparison
+SIMILARITY_NORM = 0.6
+SIMILARITY_HEIGHT = 0.4
+SIMILARITY_DOMINANT_FREQUENCY = 0
+
+# Number of best matches to consider.
+# Value must be > 0
+# If not specified or value < 1 value is set to 1
+NUMBER_OF_BEST_MATCHES = 2
+
+# Min. distance to keep a word
+MIN_LEFT_DISTANCE = 0.9
+MIN_RIGHT_DISTANCE = 0.7
+
+# Use given number as results to assembly result
+# 0 for all predictions
+MAX_WORD_START_RESULTS = 2
+MAX_TOP_RESULTS = 3
+
+# Enable or disable strict length check for words
+STRICT_LENGTH_CHECK = true
+# Value to soften the strict length check a bit to still
+# get quite precise results but to be less strict
+STRICT_LENGTH_UNDERMINING = 2
+
+# Short term memory retention time in seconds. Zero to disable STM
+STM_RETENTION = 1.2
+
+# Fill result percentage
+# 0.5 means that half of the values can by empty to still get valid results
+# A lower value should theoretically avoid false positives
+FILL_RESULT_PERCENTAGE = 0.1
+
+
+
+#########################################################
+# Misc configuration options ############################
+#########################################################
+
+[misc]
+
+# Loglevel (CRITICAL, ERROR, WARNING, INFO, DEBUG)
+LOGLEVEL = ERROR
+
+
+#########################################################
+# Experimental configuration options ####################
+#########################################################
+
+[experimental]
+
+# Additional FFT analysis and comparison for CHUNKS/2 length
+FFT_SHIFT = false
diff --git a/readme.md b/readme.md
@@ -71,6 +71,7 @@ Next steps/TODOs:
 
   * Optimizations (e.g. word separation, performance)
   * Python3 compatibility and testing, install guides
+  * Individual loglevels per class
 
 
 Project status:
@@ -89,7 +90,8 @@ Usage:
 
  -l --loop           : loop forever
 
- -e --error          : redirect sdterr to error.log
+ -e --error          : redirect outpout to error.log
+                       loglevel is forced to error!
 
  -p --plot           : plot results (only without loop option)
 
@@ -114,6 +116,8 @@ Usage:
  -d --delete [word]  : delete [word] from dictionary and exits.
                        '*' deletes everything!
 
+ -i --ini    [file]  : use alternative configuration file
+
  -a --analysis       : show dictionary analysis and exits.
 
  -u --unit           : run unit tests
@@ -140,7 +144,7 @@ python2 sopare.py -c
 ./sopare.py -v -l
 
 
-Changing config options and new SOPARE versions requires re-training.
+Changing config options and new SOPARE versions require re-training.
 Delete your training files and the dictionary entries before continue:
 
 ./sopare.py -d "*"

diff --git a/sopare.py b/sopare.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 """
-Copyright (C) 2015 - 2017 Martin Kauss ([email protected])
+Copyright (C) 2015 - 2018 Martin Kauss ([email protected])
 
 Licensed under the Apache License, Version 2.0 (the "License"); you may
 not use this file except in compliance with the License. You may obtain
@@ -19,9 +19,9 @@
 
 import sys
 import getopt
+import sopare.config as config
 import sopare.util as util
 import sopare.recorder as recorder
-import sopare.hatch as hatch
 import sopare.log as log
 import test.unit_tests as tests
 from sopare.version import __version__
@@ -35,14 +35,18 @@ def main(argv):
     plot = False
     wave = False
     error = False
+    cfg_ini = None
+
+    recreate = False
+    unit = False
 
     print ("sopare "+__version__)
 
     if (len(argv) > 0):
-        try:                                
-            opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:",
+        try:
+            opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:i:",
              ["analysis", "help", "error", "loop", "plot", "verbose", "wave", "create", "overview", "unit",
-              "show=", "write=", "read=", "train=", "delete="
+              "show=", "write=", "read=", "train=", "delete=", "ini="
              ])
         except getopt.GetoptError:
             usage()
@@ -66,8 +70,7 @@ def main(argv):
             if (opt in ("-~", "--wave")):
                 wave = True
             if opt in ("-c", "--create"):
-                recreate_dict(debug)
-                sys.exit(0)
+                recreate = True
             if opt in ("-o", "--overview"):
                 show_dict_ids(debug)
                 sys.exit(0)
@@ -86,56 +89,77 @@ def main(argv):
             if opt in ("-d", "--delete"):
                 delete_word(arg, debug)
                 sys.exit(0)
+            if opt in ("-i", "--ini"):
+                cfg_ini = arg
             if opt in ("-u", "--unit"):
-                unit_tests(debug)
-                sys.exit(0)
+                unit = True
+
+    cfg = create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error)
 
+    if (recreate == True):
+        recreate_dict(debug, cfg)
+        sys.exit(0)
 
-    hatched = hatch.hatch()
-    hatched.add("endless_loop", endless_loop)
-    hatched.add("debug", debug)
-    hatched.add("plot", plot)
-    hatched.add("wave", wave)
-    hatched.add("outfile", outfile)
-    hatched.add("infile",infile )
-    hatched.add("dict", dict)
-    logger = log.log(debug, error) 
-    hatched.add("logger", logger)
-    recorder.recorder(hatched)
+    if (unit == True):
+        unit_tests(debug, cfg)
+        sys.exit(0)
 
-def recreate_dict(debug):
+
+    recorder.recorder(cfg)
+
+def create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error):
+    if (cfg_ini == None):
+        cfg = config.config()
+    else:
+        cfg = config.config(cfg_ini)
+    logger = log.log(debug, error, cfg)
+    cfg.addsection('cmdlopt')
+    cfg.setoption('cmdlopt', 'endless_loop', str(endless_loop))
+    cfg.setoption('cmdlopt', 'debug', str(debug))
+    cfg.setoption('cmdlopt', 'plot', str(plot))
+    cfg.setoption('cmdlopt', 'wave', str(wave))
+    cfg.setoption('cmdlopt', 'outfile', outfile)
+    cfg.setoption('cmdlopt', 'infile', infile)
+    cfg.setoption('cmdlopt', 'dict', dict)
+    cfg.addlogger(logger)
+    return cfg
+
+def recreate_dict(debug, cfg):
     print ("recreating dictionary from raw input files...")
-    utilities = util.util(debug)
+    utilities = util.util(debug, cfg.getfloatoption('characteristic', 'PEAK_FACTOR'))
     utilities.recreate_dict_from_raw_files()
 
 def delete_word(dict, debug):
     if (dict != "*"):
         print ("deleting "+dict+" from dictionary")
     else:
         print ("deleting all enttries from dictionary")
-    utilities = util.util(debug)
+    utilities = util.util(debug, None)
     utilities.deletefromdict(dict)
 
 def show_word_entries(dict, debug):
     print (dict+" entries in dictionary:")
     print
-    utilities = util.util(debug)
+    utilities = util.util(debug, None)
     utilities.showdictentry(dict)
 
 def show_dict_ids(debug):
     print ("current entries in dictionary:")
-    utilities = util.util(debug)
+    utilities = util.util(debug, None)
     utilities.showdictentriesbyid()
 
 def show_dict_analysis(debug):
     print ("dictionary analysis:")
-    utilities = util.util(debug)
-    print (utilities.compile_analysis(utilities.getDICT()))
-
-def unit_tests(debug):
+    utilities = util.util(debug, None)
+    analysis = utilities.compile_analysis(utilities.getDICT())
+    for id in analysis:
+        print (id)
+        for k, v in analysis[id].iteritems():
+            print (' ' + str(k) + ' ' + str(v))
+
+def unit_tests(debug, cfg):
     print ("starting unit tests...")
-    utilities = util.util(debug)
-    tests.unit_tests(debug)
+    tests.unit_tests(debug, cfg)
     print ("done.")
 
 def usage():
@@ -156,6 +180,7 @@ def usage():
     print (" -t --train  [word]  : add raw data to raw dictionary file\n")
     print (" -d --delete [word]  : delete [word] from dictionary and exits.")
     print ("                       '*' deletes everything!\n")
+    print (" -i --ini    [file]  : use alternative configuration file\n")
     print (" -a --analysis       : show dictionary analysis and exits.\n")
     print (" -u --unit           : run unit tests\n")