Skip to content

Commit

Permalink
[Spring Release] Framework for administering and joining data collect…
Browse files Browse the repository at this point in the history
…ion studies on Fusion (#213)

* feat: support for daily insights

* fix: date panning logic

* note: bump build

* misc: build ver & subscription notes

* wip: background insights

* feat: api support for cocoa-pad verb fluency tests

* feat: create & view quests

* feat: edit quest & display quest info on mobile

* build: bump packages

* feat: add health data to quest view

* build: bump packages

* feat: quest routes, wip mail sender

* feat: add organizer name

* feat: present health data

* fix: list quest organizer on edit

* feat: fetch quest subscribers
  • Loading branch information
oreHGA authored May 14, 2024
1 parent a57088c commit 74880a9
Show file tree
Hide file tree
Showing 50 changed files with 336,299 additions and 293 deletions.
3 changes: 2 additions & 1 deletion analysis_api/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
temp_unzip
powerComparisons.png
powerDistributions.png
powerDistributions.png
.env
619 changes: 619 additions & 0 deletions analysis_api/animal_groups.txt

Large diffs are not rendered by default.

38 changes: 37 additions & 1 deletion analysis_api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from PIL import Image
from flask_cors import CORS
import base64
import re

import eeg
import cocoa_pad

app = Flask(__name__)
CORS(app, resources={r"/api/*": {"origins": "*"}})
Expand All @@ -22,7 +24,7 @@ def encode_image_to_base64(image_path):

# TODO: handle multiple files
@app.route('/api/v1/process_eeg', methods=['POST'])
def process():
def process_eeg():
try:
# Check if the POST request contains a file with the key 'file'
if 'file' not in request.files:
Expand Down Expand Up @@ -95,6 +97,40 @@ def process():
# TODO: endpoint for ERP analysis


# Endpoint for CoCoA-PAD analysis
@app.route('/api/v1/verbal_fluency', methods=['POST'])
def verbal_fluency():
print("request in here")
# incoming request - audio_base64, task_type
try:
# call whisper to get transcript
print("request.json", request.json)
print("request.json['audio_base64']", request.json['audio_base64'])
transcript = cocoa_pad.transcribe(request.json['audio_base64'])

if not transcript:
return jsonify({'error': 'error processing, unable to transcribe'}), 500

# convert transcript to lower case, remove punctuations
clean_transcript = re.sub(r'[^\w\s]', '', transcript.lower()).strip()

# split transcript into words
# TODO: support animals with multiple names
words = clean_transcript.split(" ")
print("split words", words)

# words = ["cat", "dog", "parrot", "dog", "tuna", "camel", "play"]

if request.json['task_type'] == "animal_task":
measures = cocoa_pad.animal_task(words)

return jsonify({'response': measures, 'transcript': transcript}), 200

return jsonify({'response': "works perfect"}), 200
except Exception as e:
return jsonify({'error': 'error processing', 'message': e}), 500


if __name__ == '__main__':
app.run(debug=True, port=8000)
print("running")
211 changes: 211 additions & 0 deletions analysis_api/cocoa_pad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import requests
import base64
import json
import numpy as np
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
import os

from dotenv import load_dotenv
load_dotenv()

nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

def lexical_frequency(word):
rec = pd.read_csv("unigram_freq.csv")
sum = np.sum(rec["count"])
rec["count"] = rec["count"] / sum
rec = rec.set_index("word")

return float(rec.loc[word]["count"])

def repetition(list_):
rep = 0
_list = [lemmatizer.lemmatize(l) for l in list_]
words_said = set(_list)
return len(_list) - len(words_said)

from itertools import groupby
def switching_clustering(list_, word_dict):
cat = []
for l in list_:
try:
cat.append(word_dict[l])
except:
cat.append(["NA"])

# clustering
inter = []
for i in range(len(cat) - 1):
c1, c2 = cat[i], cat[i + 1]
intersection = list(set(c1) & set(c2))
try:
inter.append(intersection[0])
except:
inter.append("")

res = []
for k, g in groupby(inter):
res.extend([k, str(len(list(g)))])


nb_clusters = 0
for k in range(0, len(res) - 1, 2):
c_name = res[k]
c_size = res[k + 1]
if c_name == "":
nb_clusters += int(c_size)
else:
nb_clusters += 1

if len(res) < 1:
nb_clusters = 0
nb_switches = 0
res = 0
return nb_clusters, nb_switches, res

if res[0] == "" and len(res) == 2:
nb_clusters = len(cat)
nb_switches = nb_clusters - 1

return nb_clusters, nb_switches, res

def generate_animal_dict():

f = open("animal_groups.txt", "r")
animal_dict = {}

for line in f:
split = line.split(":")
cat, words = split[0], split[1].split(",")
words = [w.strip() for w in words]

for w in words:
if w in animal_dict:
animal_dict[w].append(cat)
else:
animal_dict[w] = [cat]

return animal_dict

animal_dict = generate_animal_dict()

def clean_list(list_, word_dict):
return [l for l in list_ if l in word_dict]

def animal_task(list_):
measures = {}
# discrepancy/asides
cleaned_list = clean_list(list_, animal_dict)
measures["total_word_count"] = len(list_)
measures["lexical_frequency"] = [lexical_frequency(word) for word in cleaned_list]
measures["repetition"] = repetition(cleaned_list)
measures["word_count"] = len(cleaned_list)
measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in cleaned_list]))
measures["error"] = len([w for w in list_ if w not in animal_dict])
measures["nb_clusters"], measures["nb_switches"], measures["clusters"] = switching_clustering(cleaned_list, animal_dict)

return measures


def fruit_veg_task(list_):
measures = {}
# discrepancy/asides
measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
measures["repetition"] = repetition(list_)
measures["word_count"] = len(list_)
measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
measures["fruit_veg_categories"] = switching_clustering(list_, fruit_dict)
measures["error"] = len([w for w in list_ if w not in fruit_dict])
measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)


def f_starting_words(list_):
measures = {}
# discrepancy/asides
measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
measures["repetition"] = repetition(list_)
measures["error"] = len([word for word in list_ if not word[0].lower() == "f"])
measures["word_count"] = len(list_)
measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)

return measures

def a_starting_words(list_):
measures = {}
# discrepancy/asides
measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
measures["repetition"] = repetition(list_)
measures["error"] = len([word for word in list_ if not word[0].lower() == "a"])
measures["word_count"] = len(list_)
measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)

return measures


def action_words(list_):
measures = {}
# discrepancy/asides
measures["lexical_frequency"] = [lexical_frequency(word) for word in list_]
measures["repetition"] = repetition(list_)
measures["error"] = 0
measures["word_count"] = len(list_)
measures["unique_word_count"] = len(set([lemmatizer.lemmatize(l) for l in list_]))
measures["nb_clusters"], measures["nb_switches"] = switching_clustering(list_)


for w in list_:
print(w)
pos_l = []
print(wn.synsets(w))
for tmp in wn.synsets(w):
if tmp.name().split('.')[0] == w:
pos_l.append(tmp.pos())
print(pos_l)
if not "v" in pos_l:
measures["error"] += 1

return measures

def transcribe(audio_base64):
try:
endpoint_url = os.getenv('WHISPER_ENDPOINT')
azureml_model_token = os.getenv('WHISPER_API_KEY')

data = {
"input_data": {
"audio": [audio_base64],
"language": ["en"]
},
}

headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {azureml_model_token}', # You might need to manage authentication
'azureml-model-deployment': 'openai-whisper-large-15'
}
response = requests.post(endpoint_url, headers=headers, data=json.dumps(data))

if response.status_code == 200:
print("worked as expected")
print(response.json()[0])
text_data = response.json()[0]['text']
return text_data
else:
print("error", response.status_code)
return null
except Exception as e:
print(e)
return null


if __name__ == "__main__":
list = ["cat", "dog", "parrot", "dog", "tuna", "camel", "play"]
list = [l.lower() for l in list]

print(animal_task(list))
4 changes: 3 additions & 1 deletion analysis_api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ flask
flask_cors
pillow
scipy
fooof
fooof
nltk
python-dotenv
Loading

0 comments on commit 74880a9

Please sign in to comment.