[Spring Release] Framework for administering and joining data collect…

…ion studies on Fusion (#213) * feat: support for daily insights * fix: date panning logic * note: bump build * misc: build ver & subscription notes * wip: background insights * feat: api support for cocoa-pad verb fluency tests * feat: create & view quests * feat: edit quest & display quest info on mobile * build: bump packages * feat: add health data to quest view * build: bump packages * feat: quest routes, wip mail sender * feat: add organizer name * feat: present health data * fix: list quest organizer on edit * feat: fetch quest subscribers
NEUROFUSIONInc · May 14, 2024 · 74880a9 · 74880a9
1 parent a57088c
commit 74880a9
Show file tree

Hide file tree

Showing 50 changed files with 336,299 additions and 293 deletions.
diff --git a/analysis_api/.gitignore b/analysis_api/.gitignore
@@ -1,3 +1,4 @@
 temp_unzip
 powerComparisons.png
-powerDistributions.png
+powerDistributions.png
+.env
diff --git a/analysis_api/animal_groups.txt b/analysis_api/animal_groups.txt
diff --git a/analysis_api/app.py b/analysis_api/app.py
@@ -8,8 +8,10 @@
 from PIL import Image
 from flask_cors import CORS
 import base64
+import re
 
 import eeg
+import cocoa_pad
 
 app = Flask(__name__)
 CORS(app, resources={r"/api/*": {"origins": "*"}})
@@ -22,7 +24,7 @@ def encode_image_to_base64(image_path):
 
 # TODO: handle multiple files
 @app.route('/api/v1/process_eeg', methods=['POST'])
-def process():
+def process_eeg():
     try:
         # Check if the POST request contains a file with the key 'file'
         if 'file' not in request.files:
@@ -95,6 +97,40 @@ def process():
 # TODO: endpoint for ERP analysis
 
 
+# Endpoint for CoCoA-PAD analysis
+@app.route('/api/v1/verbal_fluency', methods=['POST'])
+def verbal_fluency():
+    print("request in here")
+    # incoming request - audio_base64, task_type
+    try:
+        # call whisper to get transcript
+        print("request.json", request.json)
+        print("request.json['audio_base64']", request.json['audio_base64'])
+        transcript = cocoa_pad.transcribe(request.json['audio_base64'])
+
+        if not transcript:
+            return jsonify({'error': 'error processing, unable to transcribe'}), 500
+
+        # convert transcript to lower case, remove punctuations
+        clean_transcript = re.sub(r'[^\w\s]', '', transcript.lower()).strip()
+
+        # split transcript into words
+        # TODO: support animals with multiple names
+        words = clean_transcript.split(" ")
+        print("split words", words)
+
+        # words = ["cat", "dog", "parrot", "dog", "tuna", "camel", "play"]
+
+        if request.json['task_type'] == "animal_task":
+            measures = cocoa_pad.animal_task(words)
+
+            return jsonify({'response': measures, 'transcript': transcript}), 200
+
+        return jsonify({'response': "works perfect"}), 200
+    except Exception as e:
+        return jsonify({'error': 'error processing', 'message': e}), 500
+
+
 if __name__ == '__main__':
     app.run(debug=True, port=8000)
     print("running")
diff --git a/analysis_api/cocoa_pad.py b/analysis_api/cocoa_pad.py
@@ -0,0 +1,211 @@
+import requests
+import base64
+import json
+import numpy as np
+import pandas as pd
+import nltk
+from nltk.stem import WordNetLemmatizer
+from nltk.corpus import wordnet as wn
+import os
+
+from dotenv import load_dotenv
+load_dotenv()
+
+nltk.download('wordnet')
+lemmatizer = WordNetLemmatizer()
+
+def lexical_frequency(word):
+    rec = pd.read_csv("unigram_freq.csv")
+    sum = np.sum(rec["count"])
+    rec["count"] = rec["count"] / sum
+    rec = rec.set_index("word")
+
+    return float(rec.loc[word]["count"])
+
+def repetition(list_):
+    rep = 0
+    _list = [lemmatizer.lemmatize(l) for l in list_]
+    words_said = set(_list)
+    return len(_list) - len(words_said)
+
+from itertools import groupby
+def switching_clustering(list_, word_dict):
+    cat = []
+    for l in list_:
+        try:
+            cat.append(word_dict[l])
+        except:
+            cat.append(["NA"])
+
+    # clustering
+    inter = []
+    for i in range(len(cat) - 1):
+        c1, c2 = cat[i], cat[i + 1]
+        intersection = list(set(c1) & set(c2))
+        try:
+            inter.append(intersection[0])
+        except:
+            inter.append("")
+
+    res = []
+    for k, g in groupby(inter):
+        res.extend([k, str(len(list(g)))])
+
+
+    nb_clusters = 0
+    for k in range(0, len(res) - 1, 2):
+        c_name = res[k]
+        c_size = res[k + 1]
+        if c_name == "":
+            nb_clusters += int(c_size)
+        else:
+            nb_clusters += 1
+
+    if len(res) < 1:
+        nb_clusters = 0
+        nb_switches = 0
+        res = 0
+        return nb_clusters, nb_switches, res
+
+    if res[0] == "" and len(res) == 2:
+        nb_clusters = len(cat)
+    nb_switches = nb_clusters - 1
+
+    return nb_clusters, nb_switches, res
+
+def generate_animal_dict():
+
+    f = open("animal_groups.txt", "r")
+    animal_dict = {}
+
+    for line in f:
+        split = line.split(":")
+        cat, words = split[0], split[1].split(",")
+        words = [w.strip() for w in words]
+
+        for w in words:
+            if w in animal_dict:
+                animal_dict[w].append(cat)
+            else:
+                animal_dict[w] = [cat]
+
+    return animal_dict
+
+animal_dict = generate_animal_dict()
+
+def clean_list(list_, word_dict):
+    return [l for l in list_ if l in word_dict]
+
+def animal_task(list_):
+    measures = {}
+    # discrepancy/asides
+    cleaned_list = clean_list(list_, animal_dict)
+    measures["total_word_count"] = len(list_)
+    measures["lexical_frequency"] = [lexical_frequency(word) for word in cleaned_list]
+    measures["repetition"] = repetition(cleaned_list)
+    measures["word_count"] = len(cleaned_list)
+    measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in cleaned_list]))
+    measures["error"] = len([w for w in list_ if w not in animal_dict])
+    measures["nb_clusters"], measures["nb_switches"], measures["clusters"] = switching_clustering(cleaned_list, animal_dict)
+
+    return measures
+
+
+def fruit_veg_task(list_):
+    measures = {}
+    # discrepancy/asides
+    measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
+    measures["repetition"] = repetition(list_)
+    measures["word_count"] = len(list_)
+    measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
+    measures["fruit_veg_categories"] = switching_clustering(list_, fruit_dict)
+    measures["error"] = len([w for w in list_ if w not in fruit_dict])
+    measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)
+
+
+def f_starting_words(list_):
+    measures = {}
+    # discrepancy/asides
+    measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
+    measures["repetition"] = repetition(list_)
+    measures["error"] = len([word for word in list_ if not word[0].lower() == "f"])
+    measures["word_count"] = len(list_)
+    measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
+    measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)
+
+    return measures
+
+def a_starting_words(list_):
+    measures = {}
+    # discrepancy/asides
+    measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
+    measures["repetition"] = repetition(list_)
+    measures["error"] = len([word for word in list_ if not word[0].lower() == "a"])
+    measures["word_count"] = len(list_)
+    measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
+    measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)
+
+    return measures
+
+
+def action_words(list_):
+    measures = {}
+    # discrepancy/asides
+    measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
+    measures["repetition"] = repetition(list_)
+    measures["error"] = 0
+    measures["word_count"] = len(list_)
+    measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
+    measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)
+
+
+    for w in list_:
+        print(w)
+        pos_l = []
+        print(wn.synsets(w))
+        for tmp in wn.synsets(w):
+            if tmp.name().split('.')[0] == w:
+                pos_l.append(tmp.pos())
+        print(pos_l)
+        if not "v" in pos_l:
+            measures["error"] += 1
+
+    return measures
+
+def transcribe(audio_base64):
+    try:
+        endpoint_url = os.getenv('WHISPER_ENDPOINT')
+        azureml_model_token = os.getenv('WHISPER_API_KEY')
+
+        data =  {
+            "input_data": {
+                "audio": [audio_base64],
+                "language": ["en"]
+            },
+        }
+
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {azureml_model_token}',  # You might need to manage authentication
+            'azureml-model-deployment': 'openai-whisper-large-15'
+        }
+        response = requests.post(endpoint_url, headers=headers, data=json.dumps(data))
+
+        if response.status_code == 200:
+            print("worked as expected")
+            print(response.json()[0])
+            text_data = response.json()[0]['text']
+            return text_data
+        else:
+            print("error", response.status_code)
+            return null
+    except Exception as e:
+        print(e)
+        return null
+
+
+if __name__ == "__main__":
+    list = ["cat", "dog", "parrot", "dog", "tuna", "camel", "play"]
+    list = [l.lower() for l in list]
+
+    print(animal_task(list))
diff --git a/analysis_api/requirements.txt b/analysis_api/requirements.txt
@@ -5,4 +5,6 @@ flask
 flask_cors
 pillow
 scipy
-fooof
+fooof
+nltk
+python-dotenv