Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor SDK Release 8.1.0 #845

Merged
merged 4 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 8.0.1
current_version = 8.1.0
commit = True
message = Bump version: {current_version} → {new_version} [skip ci]

Expand Down
3 changes: 1 addition & 2 deletions examples/microphone-speech-to-text.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ def recognize_using_weboscket(*args):
mycallback = MyRecognizeCallback()
speech_to_text.recognize_using_websocket(audio=audio_source,
content_type='audio/l16; rate=44100',
recognize_callback=mycallback,
interim_results=True)
recognize_callback=mycallback)

###############################################
#### Prepare the for recording using Pyaudio ##
Expand Down
425 changes: 274 additions & 151 deletions ibm_watson/discovery_v2.py

Large diffs are not rendered by default.

307 changes: 202 additions & 105 deletions ibm_watson/speech_to_text_v1.py

Large diffs are not rendered by default.

22 changes: 1 addition & 21 deletions ibm_watson/speech_to_text_v1_adapter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8

# (C) Copyright IBM Corp. 2018, 2021.
# (C) Copyright IBM Corp. 2018, 2024.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,7 +33,6 @@ def recognize_using_websocket(self,
customization_weight=None,
base_model_version=None,
inactivity_timeout=None,
interim_results=None,
keywords=None,
keywords_threshold=None,
max_alternatives=None,
Expand All @@ -55,7 +54,6 @@ def recognize_using_websocket(self,
split_transcript_at_phrase_end=None,
speech_detector_sensitivity=None,
background_audio_suppression=None,
low_latency=None,
character_insertion_bias=None,
**kwargs):
"""
Expand Down Expand Up @@ -271,22 +269,6 @@ def recognize_using_websocket(self,
* 1.0 suppresses all audio (no audio is transcribed).
The values increase on a monotonic curve. See [Background audio
suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
:param bool low_latency: (optional) If `true` for next-generation
`Multimedia` and `Telephony` models that support low latency, directs the
service to produce results even more quickly than it usually does.
Next-generation models produce transcription results faster than
previous-generation models. The `low_latency` parameter causes the models
to produce results even more quickly, though the results might be less
accurate when the parameter is used.
**Note:** The parameter is beta functionality. It is not available for
previous-generation `Broadband` and `Narrowband` models. It is available
only for some next-generation models.
* For a list of next-generation models that support low latency, see
[Supported language
models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
for next-generation models.
* For more information about the `low_latency` parameter, see [Low
latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
:param float character_insertion_bias: (optional) For next-generation
`Multimedia` and `Telephony` models, an indication of whether the service
is biased to recognize shorter or longer strings of characters when
Expand Down Expand Up @@ -355,7 +337,6 @@ def recognize_using_websocket(self,
'customization_weight': customization_weight,
'content_type': content_type,
'inactivity_timeout': inactivity_timeout,
'interim_results': interim_results,
'keywords': keywords,
'keywords_threshold': keywords_threshold,
'max_alternatives': max_alternatives,
Expand All @@ -375,7 +356,6 @@ def recognize_using_websocket(self,
'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
'speech_detector_sensitivity': speech_detector_sensitivity,
'background_audio_suppression': background_audio_suppression,
'low_latency': low_latency,
'character_insertion_bias': character_insertion_bias
}
options = {k: v for k, v in options.items() if v is not None}
Expand Down
2 changes: 1 addition & 1 deletion ibm_watson/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '8.0.1'
__version__ = '8.1.0'
19 changes: 9 additions & 10 deletions ibm_watson/websocket/recognize_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,15 @@ def on_data(self, ws, message, message_type, fin):
# set of transcriptions and send them to the appropriate callbacks.
results = json_object.get('results')
if results:
if (self.options.get('interim_results') is True):
b_final = (results[0].get('final') is True)
alternatives = results[0].get('alternatives')
if alternatives:
hypothesis = alternatives[0].get('transcript')
transcripts = self.extract_transcripts(alternatives)
if b_final:
self.callback.on_transcription(transcripts)
if hypothesis:
self.callback.on_hypothesis(hypothesis)
b_final = (results[0].get('final') is True)
alternatives = results[0].get('alternatives')
if alternatives:
hypothesis = alternatives[0].get('transcript')
transcripts = self.extract_transcripts(alternatives)
if b_final:
self.callback.on_transcription(transcripts)
if hypothesis:
self.callback.on_hypothesis(hypothesis)
else:
final_transcript = []
for result in results:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from setuptools import setup
from os import path

__version__ = '8.0.1'
__version__ = '8.1.0'

# read contents of README file
this_directory = path.abspath(path.dirname(__file__))
Expand Down
77 changes: 0 additions & 77 deletions test/integration/test_speech_to_text_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,83 +118,6 @@ def on_data(self, data):
assert test_callback.data['results'][0]['alternatives'][0]
['transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '

def test_on_transcription_interim_results_false(self):

class MyRecognizeCallback(RecognizeCallback):

def __init__(self):
RecognizeCallback.__init__(self)
self.error = None
self.transcript = None

def on_error(self, error):
self.error = error

def on_transcription(self, transcript):
self.transcript = transcript

test_callback = MyRecognizeCallback()
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
audio_source = AudioSource(audio_file, False)
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
interim_results=False, low_latency=False)
assert test_callback.error is None
assert test_callback.transcript is not None
assert test_callback.transcript[0][0]['transcript'] in ['isolated tornadoes ', 'isolated tornados ']
assert test_callback.transcript[1][0]['transcript'] == 'and heavy rain '

def test_on_transcription_interim_results_true(self):

class MyRecognizeCallback(RecognizeCallback):

def __init__(self):
RecognizeCallback.__init__(self)
self.error = None
self.transcript = None

def on_error(self, error):
self.error = error

def on_transcription(self, transcript):
self.transcript = transcript
assert transcript[0]['confidence'] is not None
assert transcript[0]['transcript'] is not None

test_callback = MyRecognizeCallback()
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
audio_source = AudioSource(audio_file, False)
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
interim_results=True, low_latency=True)
assert test_callback.error is None
assert test_callback.transcript is not None
assert test_callback.transcript[0]['transcript'] == 'and heavy rain '

def test_on_transcription_interim_results_true_low_latency_false(self):

class MyRecognizeCallback(RecognizeCallback):

def __init__(self):
RecognizeCallback.__init__(self)
self.error = None
self.transcript = None

def on_error(self, error):
self.error = error

def on_transcription(self, transcript):
self.transcript = transcript
assert transcript[0]['confidence'] is not None
assert transcript[0]['transcript'] is not None

test_callback = MyRecognizeCallback()
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
audio_source = AudioSource(audio_file, False)
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
interim_results=True, low_latency=False)
assert test_callback.error is None
assert test_callback.transcript is not None
assert test_callback.transcript[0]['transcript'] == 'and heavy rain '

def test_custom_grammars(self):
customization_id = None
for custom_model in self.custom_models.get('customizations'):
Expand Down
Loading
Loading