-
Notifications
You must be signed in to change notification settings - Fork 0
/
ReadTranslateSay.py
136 lines (100 loc) · 4.71 KB
/
ReadTranslateSay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/python3.6
import http.client, urllib.request, urllib.parse, urllib.error, base64, json
import requests
import configparser
import sys
from xml.etree import ElementTree
appConfig = configparser.ConfigParser()
appConfig.read("out/settings.ini")
subscription_key = appConfig.get("vision", "key")
subscription_key_speech = appConfig.get("speech", "key")
subscription_key_translation = appConfig.get("translation", "key")
uri_base = 'westus.api.cognitive.microsoft.com'
def ReadTextFromImage(URLImagePath):
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
# Request parameters. The language setting "unk" means automatically detect the language.
'language': 'unk',
'detectOrientation ': 'true',
})
body = "{'url':'" + URLImagePath + "'}"
try:
# Execute the REST API call and get the response.
conn = http.client.HTTPSConnection('westus.api.cognitive.microsoft.com')
conn.request("POST", "/vision/v1.0/ocr?%s" % params, body, headers)
response = conn.getresponse()
data = response.read()
# 'data' contains the JSON data. The following formats the JSON data for display.
parsed = json.loads(data)
D = eval(str(json.dumps(parsed, sort_keys=True, indent=2)))
textout = ""
for a in (range(len(D["regions"]))):
for i in (range(len(D["regions"][a]["lines"]))):
for j in range(len(D["regions"][a]["lines"][i]["words"])):
# print ((D["regions"][0]["lines"][i]["words"][j]["text"]), end=' ')
textout = textout + " " + D["regions"][a]["lines"][i]["words"][j]["text"]
conn.close()
return textout
except Exception as e:
print('Error:')
print(e)
return False
def GetAudioFromText(text2narrate, Language, VoiceStyle, audiofileoutput):
params_speech = ""
headers_speech = {"Ocp-Apim-Subscription-Key": subscription_key_speech}
AccessTokenHost = "api.cognitive.microsoft.com"
path = "/sts/v1.0/issueToken"
try:
# Connect to server to get the Access Token
conn = http.client.HTTPSConnection(AccessTokenHost)
conn.request("POST", path, params_speech, headers_speech)
response = conn.getresponse()
data = response.read()
conn.close()
accesstoken = data.decode("UTF-8")
# Compose body and set the text2speech
body = ElementTree.Element('speak', version='1.0')
body.set('{http://www.w3.org/XML/1998/namespace}lang', Language)
voice = ElementTree.SubElement(body, 'voice')
voice.set('{http://www.w3.org/XML/1998/namespace}lang', Language)
voice.set('{http://www.w3.org/XML/1998/namespace}gender', 'Female')
# tosetvoice = 'Microsoft Server Speech Text to Speech Voice (' + Language + ',' + Style + ')'
voice.set('name', VoiceStyle)
voice.text = text2narrate
# Set the headers
headers = {"Content-type": "application/ssml+xml",
"X-Microsoft-OutputFormat": "riff-16khz-16bit-mono-pcm",
"Authorization": "Bearer " + accesstoken,
"X-Search-AppId": "07D3234E49CE426DAA29772419F436CA",
"X-Search-ClientID": "1ECFAE91408841A480F00935DC390960",
"User-Agent": "TTSForPython"}
#Connect to server to synthesize the wave
conn = http.client.HTTPSConnection("speech.platform.bing.com")
conn.request("POST", "/synthesize", ElementTree.tostring(body), headers)
response = conn.getresponse()
data = response.read()
conn.close()
# Write audio to file
outfile = open(audiofileoutput, 'wb')
outfile.write(data)
return 1
except Exception as e:
print('Error:')
print(e)
return False
def Translate(Text2Translate, TargetLanguage):
request_headers = {'Ocp-Apim-Subscription-Key': subscription_key_translation}
translateUrl = "http://api.microsofttranslator.com/v2/Http.svc/Translate?text={}&to={}".format(Text2Translate, TargetLanguage)
translationData = requests.get(translateUrl, headers = request_headers)
translation = ElementTree.fromstring(translationData.text.encode('utf-8'))
return translation.text
# Begining of the main body. Uhmm, missing main() :-)
text2read = ReadTextFromImage('http://www.funpedia.net/imgs/may11/very-funny-signs-01.jpg')
print (text2read)
if text2read:
Translation2pt = Translate(text2read, 'pt')
print(Translation2pt)
GetAudioFromText(Translation2pt, 'en-US', 'Microsoft Server Speech Text to Speech Voice (pt-BR, HeloisaRUS)', 'saida.audio')