Skip to content

Commit

Permalink
Refactoring + UDP
Browse files Browse the repository at this point in the history
  • Loading branch information
synesthesiam committed Aug 24, 2023
1 parent 96a0b8a commit f5372f7
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 46 deletions.
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.DS_Store
.idea
*.log
tmp/

*.py[cod]
*.egg
build
htmlcov

.venv/
8 changes: 7 additions & 1 deletion assist_microphone/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
version: 0.2.9
version: 0.2.10
slug: assist_microphone
name: assist_microphone
description: Stream microphone audio to Assist
Expand All @@ -16,6 +16,8 @@ options:
awake_sound: true
done_sound: true
wake_buffer_seconds: 0.0
udp_mic: false
udp_snd: false
debug_logging: false
schema:
token: str
Expand All @@ -26,8 +28,12 @@ schema:
awake_sound: bool
done_sound: bool
wake_buffer_seconds: float
udp_mic: bool
udp_snd: bool
debug_logging: bool
init: false
audio: true
homeassistant_api: true
ports:
"5000/udp": null
homeassistant: 2023.9.0.dev20230809
87 changes: 48 additions & 39 deletions assist_microphone/hass_satellite/__main__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
#!/usr/bin/env python3
import argparse
import asyncio
import contextlib
import functools
import logging
import shutil
import socket
import sys
import threading
from collections import deque
from dataclasses import dataclass
from enum import Enum, auto
from typing import Deque, Optional, Tuple

import sounddevice as sd

from .mic import record
from .mic import record_stream, record_udp
from .remote import stream
from .snd import play
from .snd import play_stream, play_udp
from .state import State, MicState
from .vad import (
SileroVoiceActivityDetector,
VoiceActivityDetector,
Expand All @@ -24,18 +26,6 @@
_LOGGER = logging.getLogger(__name__)


class MicState(str, Enum):
NOT_RECORDING = auto()
WAIT_FOR_VAD = auto()
RECORDING = auto()


@dataclass
class State:
is_running: bool = True
mic: MicState = MicState.NOT_RECORDING


async def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("host", help="Home Assistant server host")
Expand Down Expand Up @@ -76,6 +66,9 @@ async def main() -> None:
#
parser.add_argument("--wake-buffer-seconds", type=float, default=0)
#
parser.add_argument("--udp-mic", type=int, help="UDP port to receive input audio")
parser.add_argument("--udp-snd", type=int, help="UDP port to send output audio")
#
parser.add_argument(
"--debug", action="store_true", help="Print DEBUG messages to the console"
)
Expand Down Expand Up @@ -130,21 +123,45 @@ async def main() -> None:
)
mic_thread.start()

# Audio output
snd_socket: Optional[socket.socket] = None

try:
while True:
try:
if args.udp_snd is not None:
if snd_socket is None:
snd_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
snd_stream = contextlib.nullcontext()
play = functools.partial(
play_udp,
udp_socket=snd_socket,
udp_port=args.udp_snd,
state=state,
sample_rate=16000,
volume=args.volume,
)
else:
snd_stream = sd.RawOutputStream(
device=args.snd_device,
samplerate=snd_sample_rate,
channels=1,
dtype="int16",
)
play = functools.partial(
play_stream,
stream=snd_stream,
sample_rate=snd_sample_rate,
volume=args.volume,
)

if args.vad:
_LOGGER.debug("Waiting for speech")
await speech_detected.wait()
speech_detected.clear()
_LOGGER.debug("Speech detected")

with sd.RawOutputStream(
device=args.snd_device,
samplerate=snd_sample_rate,
channels=1,
dtype="int16",
) as snd_stream:
with snd_stream:
async for _timestamp, event_type, event_data in stream(
host=args.host,
token=args.token,
Expand All @@ -157,32 +174,19 @@ async def main() -> None:
if event_type == "wake_word-end":
if args.awake_sound:
state.mic = MicState.NOT_RECORDING
play(
media=args.awake_sound,
stream=snd_stream,
sample_rate=snd_sample_rate,
volume=args.volume,
)
play(media=args.awake_sound)
state.mic = MicState.RECORDING
elif event_type == "stt-end":
# Stop recording until run ends
state.mic = MicState.NOT_RECORDING
if args.done_sound:
play(
media=args.done_sound,
stream=snd_stream,
sample_rate=snd_sample_rate,
volume=args.volume,
)
play(media=args.done_sound)
elif event_type == "tts-end":
# Play TTS output
tts_url = event_data.get("tts_output", {}).get("url")
if tts_url:
play(
media=f"{args.protocol}://{args.host}:{args.port}{tts_url}",
stream=snd_stream,
sample_rate=snd_sample_rate,
volume=args.volume,
media=f"{args.protocol}://{args.host}:{args.port}{tts_url}"
)
elif event_type in ("run-end", "error"):
# Start recording for next wake word
Expand Down Expand Up @@ -220,7 +224,12 @@ def _mic_proc(
else:
_LOGGER.debug("No VAD")

for ts_chunk in record(args.mic_device):
if args.udp_mic is not None:
mic_stream = record_udp(args.udp_mic, state)
else:
mic_stream = record_stream(args.mic_device)

for ts_chunk in mic_stream:
if not state.is_running:
break

Expand Down
27 changes: 23 additions & 4 deletions assist_microphone/hass_satellite/mic.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import argparse
import asyncio
import sys
import socket
import time
from typing import Final, Iterable, Optional, Tuple, Union

import sounddevice as sd

from .state import State

_RATE: Final = 16000
_WIDTH: Final = 2
_CHANNELS: Final = 1
_SAMPLES_PER_CHUNK = int(0.03 * _RATE) # 30ms


def record(
def record_stream(
device: Optional[Union[str, int]],
samples_per_chunk: int = _SAMPLES_PER_CHUNK,
) -> Iterable[Tuple[int, bytes]]:
Expand All @@ -27,3 +28,21 @@ def record(
chunk, _overflowed = stream.read(samples_per_chunk)
chunk = bytes(chunk)
yield time.monotonic_ns(), chunk


def record_udp(
port: int,
state: State,
host: str = "0.0.0.0",
samples_per_chunk: int = _SAMPLES_PER_CHUNK,
) -> Iterable[Tuple[int, bytes]]:
udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
udp_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
udp_socket.bind((host, port))

while True:
chunk, addr = udp_socket.recvfrom(samples_per_chunk * _WIDTH)
if state.mic_host is None:
state.mic_host = addr[0]

yield time.monotonic_ns(), chunk
48 changes: 46 additions & 2 deletions assist_microphone/hass_satellite/snd.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import logging
import socket
import subprocess
import wave

import sounddevice as sd

_LOGGER = logging.getLogger()
from .state import State

def play(
_LOGGER = logging.getLogger()


def play_stream(
media: str,
stream: sd.RawOutputStream,
sample_rate: int,
Expand Down Expand Up @@ -40,3 +44,43 @@ def play(
while chunk:
stream.write(chunk)
chunk = wav_file.readframes(samples_per_chunk)


def play_udp(
media: str,
udp_socket: socket.socket,
udp_port: int,
state: State,
sample_rate: int,
samples_per_chunk: int = 1024,
volume: float = 1.0,
) -> None:
assert state.mic_host is not None

cmd = [
"ffmpeg",
"-i",
media,
"-f",
"wav",
"-ar",
str(sample_rate),
"-ac",
"1",
"-filter:a",
f"volume={volume}",
"-",
]
_LOGGER.debug("play: %s", cmd)

with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
) as proc:
with wave.open(proc.stdout, "rb") as wav_file:
assert wav_file.getsampwidth() == 2
chunk = wav_file.readframes(samples_per_chunk)
while chunk:
udp_socket.sendto(chunk, (state.mic_host, udp_port))
chunk = wav_file.readframes(samples_per_chunk)
16 changes: 16 additions & 0 deletions assist_microphone/hass_satellite/state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dataclasses import dataclass
from enum import Enum, auto
from typing import Optional


class MicState(str, Enum):
NOT_RECORDING = auto()
WAIT_FOR_VAD = auto()
RECORDING = auto()


@dataclass
class State:
is_running: bool = True
mic: MicState = MicState.NOT_RECORDING
mic_host: Optional[str] = None
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ if bashio::config.true 'done_sound'; then
extra_args+=('--done-sound' '/usr/src/sounds/done.wav')
fi

if bashio::config.true 'udp_mic'; then
extra_args+=('--udp-mic' 5000)
fi

if bashio::config.true 'udp_snd'; then
extra_args+=('--udp-snd' 6055)
fi

exec python3 -m hass_satellite \
'homeassistant' \
"$(bashio::config 'token')" \
Expand Down

0 comments on commit f5372f7

Please sign in to comment.