Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed a bunch of bugs and connected functionality to interface #10

Merged
merged 22 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d69a866
Update main.yml
KleinRana Mar 7, 2022
87120cf
Bug fix concerning renaming field weighted Wer to WER
KleinRana Oct 4, 2023
141dcdd
Merge pull request #1 from opensource-spraakherkenning-nl/KleinRana-p…
KleinRana Oct 4, 2023
8abaabf
Update __main__.py
KleinRana Oct 4, 2023
0f00102
Update __main__.py
KleinRana Oct 4, 2023
207c094
Update pipeline.py
KleinRana Oct 4, 2023
d7bdd0b
Update interface.py
KleinRana Oct 4, 2023
ef7401c
Update select_files.html
KleinRana Oct 4, 2023
45166b9
Fix bug in wer results on speaker level
KleinRana Oct 4, 2023
566689c
Merge pull request #2 from opensource-spraakherkenning-nl/KleinRana-p…
greenw0lf Oct 4, 2023
f3ad66c
fix error in parsing the input args
KleinRana Oct 4, 2023
32ad4d5
Merge pull request #4 from opensource-spraakherkenning-nl/KleinRana-s…
greenw0lf Oct 4, 2023
bf59bfa
Merge pull request #5 from opensource-spraakherkenning-nl/KleinRana-f…
KleinRana Oct 5, 2023
969313d
Add speech recognizer to results filename
greenw0lf Oct 5, 2023
cbcd1fc
Merge pull request #6 from opensource-spraakherkenning-nl/greenw0lf-p…
KleinRana Oct 5, 2023
30d4206
Remove duplicate code
greenw0lf Oct 5, 2023
3d7c024
fix code not looking in results
greenw0lf Oct 5, 2023
52c7cc5
Merge pull request #7 from opensource-spraakherkenning-nl/greenw0lf-p…
KleinRana Oct 5, 2023
0447a2f
Remove adding /input/ to the speech recognizer
greenw0lf Oct 5, 2023
f11bfe3
Merge pull request #8 from opensource-spraakherkenning-nl/greenw0lf-p…
KleinRana Oct 5, 2023
f0f0729
Update pipeline.py
greenw0lf Oct 5, 2023
1a4745d
Merge pull request #9 from opensource-spraakherkenning-nl/greenw0lf-p…
KleinRana Oct 5, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
uses: actions/checkout@v2

- name: Build and push Docker images
if: github.ref == 'refs/heads/master'
if: github.ref == 'refs/heads/main'
uses: docker/build-push-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
Expand Down
8 changes: 6 additions & 2 deletions ASR_NL_benchmark/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
metavar=('reffile_name', 'extension'),
default=['ASR_NL_benchmark/data/test_ref.stm', 'stm'],
help='help: path to the reference file and its extension')
parser.add_argument('-kind', '--kind',
metavar=('speechrecognizer'),
default='',
help='help: enter the name of your speech recognizer')
parser.add_argument('-interactive',
metavar='value',
default='',
Expand All @@ -25,7 +29,7 @@
interface.main()
else:
print('Running benchmarking')
benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1])
benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1], kind=args.kind)
benchmarking.main()
pipeline.process_results()
pipeline.process_results(kind=args.kind)

11 changes: 6 additions & 5 deletions ASR_NL_benchmark/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ def upload_page():
if request.method == 'POST':
hyp = os.path.join(os.path.sep,'input',request.form.get('hyp'))
ref = os.path.join(os.path.sep,'input',request.form.get('ref'))
kind = request.form.get('kind')
global benchmarking
benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm')
benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind)
Thread(target=benchmarking.main).start()
return redirect(f'/progress?ref={ref}&hyp={hyp}')
return render_template('select_files.html')
Expand Down Expand Up @@ -62,12 +63,12 @@ def get_dfs():
dfs[index]['cat']['df'] = pandas.read_csv(folder)
dfs[index]['cat']['kind'] = dfs[index]['cat']['df']['kind'].iloc[0]
dfs[index]['cat']['df'] = dfs[index]['cat']['df'].drop('kind',1)
dfs[index]['cat']['df']['product'] = dfs[index]['cat']['df']['Weighted_wer'] * dfs[index]['cat']['df']['ref_words']
dfs[index]['cat']['df']['product'] = dfs[index]['cat']['df']['WER'] * dfs[index]['cat']['df']['ref_words']
dfs[index]['cat']['wer'] = dfs[index]['cat']['df']['product'].sum() / dfs[index]['cat']['df']['ref_words'].sum()
dfs[index]['cat']['df'] = dfs[index]['cat']['df'].drop('product',1)
print(dfs)

speaker_folders = [f.path for f in os.scandir(os.path.join(os.path.sep,'input','')) if
speaker_folders = [f.path for f in os.scandir(os.path.join(os.path.sep,'input','results','')) if
f.is_file() and f.name.startswith('results_speaker') and f.name.endswith('.csv')]

for folder in speaker_folders:
Expand All @@ -77,16 +78,16 @@ def get_dfs():
except KeyError:
dfs[index] = {}
dfs[index]['spk']= {}
dfs[index]['spk'] = {}
dfs[index]['spk']['agregation'] = 'Per spreker'
dfs[index]['spk']['df'] = pandas.read_csv(folder)
dfs[index]['spk']['kind'] = dfs[index]['spk']['df']['kind'].iloc[0]
dfs[index]['spk']['df'] = dfs[index]['spk']['df'].drop('kind', 1)

dfs[index]['spk']['df']['product'] = dfs[index]['spk']['df']['Weighted_wer'] * dfs[index]['spk']['df'][
dfs[index]['spk']['df']['product'] = dfs[index]['spk']['df']['WER'] * dfs[index]['spk']['df'][
'ref_words']
dfs[index]['spk']['wer'] = dfs[index]['spk']['df']['product'].sum() / dfs[index]['spk']['df']['ref_words'].sum()
dfs[index]['spk']['df'] = dfs[index]['spk']['df'].drop('product', 1)
print(dfs)

return dfs

Expand Down
19 changes: 12 additions & 7 deletions ASR_NL_benchmark/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def calculate_wer(df):
wer = float(df['product'].sum()) / float(df['ref_words'].sum())
return df, wer

def calculate_wer_per_cat(df,category='category', id='', kind=False):
def calculate_wer_per_cat(df,category='category', id='', kind=''):
""" Calculates the WER for every unique value for a certain column
Args:
df: the pandas dataframe
Expand All @@ -98,11 +98,11 @@ def calculate_wer_per_cat(df,category='category', id='', kind=False):
1 banaan 2 0.40 False

"""
df_out = df.groupby('category', as_index=False).agg({'ref_words': 'sum', 'product': 'sum'})
df_out = df.groupby(category, as_index=False).agg({'ref_words': 'sum', 'product': 'sum'})
df_out['WER'] = (df_out['product'] / df_out['ref_words']).round(2)
df_out = df_out.drop('product', 1)
df_out['kind'] = kind
df_out.to_csv(os.path.join(os.path.sep, 'input', 'results', f'results_{category}_{id}.csv'), index=False)
df_out.to_csv(os.path.join(os.path.sep, 'input', 'results', f'results_{category}_{id}_{kind}.csv'), index=False)
return df_out

def process_results_dtl_only(path_parts=('input','results'), id='', kind= False):
Expand Down Expand Up @@ -210,14 +210,19 @@ def process_input(hypfile_arg, reffile_arg):


class Pipeline():
def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension):
def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension, kind):
self.progress = 0
self.failed = 0
self.hypfile_input_path = hypfile_input_path
self.reffile_input_path = reffile_input_path
self.hypfile_input_path = os.path.join(os.path.sep,'input',hypfile_input_path)
self.reffile_input_path = os.path.join(os.path.sep,'input',reffile_input_path)
self.hypextension = hypextension
self.refextension = refextension
self.kind = kind
self.logging = set_logging(logpath=os.path.join(os.path.sep,'input',f'{date.today()}_logging.log'))
self.logging.info(f"hypfile path from terminal: {hypfile_input_path}")
self.logging.info(f"reffile path from terminal: {reffile_input_path}")
self.logging.info(f"Pipeline class' hypfile path: {self.hypfile_input_path}")
self.logging.info(f"Pipeline class' reffile path: {self.reffile_input_path}")

def main(self):
hyp_list, ref_list = process_input(self.hypfile_input_path, self.reffile_input_path)
Expand All @@ -238,7 +243,7 @@ def main(self):
done +=1
self.progress = done/total
self.failed += 1
process_results(path_parts=('input','results'), kind=False)
process_results(path_parts=('input','results'), kind=self.kind)



Expand Down
5 changes: 4 additions & 1 deletion ASR_NL_benchmark/templates/select_files.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ <h1> Select Hypothese and Reference files or folders </h1>
<div class="container pt-3 m-3" width="80%">
<div class="form-group">
<form method="POST">
<label>Name of speech recognizer</label>
<input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
<label>Path to hypothesis file or folder</label>
<input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
<label>Path to reference file or folder</label>
Expand All @@ -38,7 +40,8 @@ <h1> Select Hypothese and Reference files or folders </h1>




<!-- Option 1: Bootstrap Bundle with Popper -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-b5kHyXgcpbZJO/tY9Ul7kGkf1S0CWuKcCD38l8YkeH8z8QjE0GmW1gYU5S9FOnJ0" crossorigin="anonymous"></script>
</body>
</html>
</html>
Loading