Skip to content

Commit

Permalink
Merge branch 'master' into panorama-sample-launch-refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
knorth55 authored Dec 22, 2022
2 parents 5a28530 + c4df514 commit 0e0cea3
Show file tree
Hide file tree
Showing 9 changed files with 315 additions and 42 deletions.
14 changes: 10 additions & 4 deletions audio_to_spectrogram/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
cmake_minimum_required(VERSION 2.8.3)
project(audio_to_spectrogram)

find_package(catkin)
if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
catkin_python_setup()
endif()
find_package(catkin REQUIRED COMPONENTS
dynamic_reconfigure
)

catkin_python_setup()

generate_dynamic_reconfigure_options(
cfg/AudioAmplitudePlot.cfg
)

catkin_package()

install(DIRECTORY launch sample test
Expand Down
63 changes: 59 additions & 4 deletions audio_to_spectrogram/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Here is an example using rosbag with 300Hz audio.
roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
```

|Spectrum|Spectrogram|
|---|---|
|![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)|
|Audio Amplitude|Spectrum|Spectrogram|
|---|---|---|
|<img src="docs/images/audio_amplitude.jpg" width="429">|![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)|

# Scripts

Expand All @@ -43,7 +43,15 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch

Period [s] to sample audio data for one FFT.

- `~depth` (`Int`, default: `16`)
- `~n_channel` (`Int`, default: `1`)

Number of channel of microphone.

- `~target_channel` (`Int`, default: `0`)

Target channel.

- `~bitdepth` (`Int`, default: `16`)

Number of bits per audio data.

Expand Down Expand Up @@ -85,6 +93,53 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch

Publish rate [Hz] of spectrogram topic.

## audio_amplitude_plot.py

A script to publish audio amplitude plot image.

![](docs/images/audio_amplitude.jpg)

- ### Publishing topics

- `~output/viz` (`sensor_msgs/Image`)

Audio amplitude plot image.

- ### Subscribing topics

- `~audio` (`audio_common_msgs/AudioData`)

Audio stream data from microphone. The audio format must be `wave`.

- ### Parameters
- `~mic_sampling_rate` (`Int`, default: `16000`)

Sampling rate [Hz] of microphone. Namely, sampling rate of audio topic.

- `~n_channel` (`Int`, default: `1`)

Number of channel of microphone.

- `~target_channel` (`Int`, default: `0`)

Target channel.

- `~bitdepth` (`Int`, default: `16`)

Number of bits per audio data.

- `~maximum_amplitude` (`Int`, default: `10000`)

Maximum range of amplitude to plot.

- `~window_size` (`Double`, default: `10.0`)

Window size of sound input to plot.

- `~rate` (`Double`, default: `10.0`)

Publish rate [Hz] of audio amplitude image topic.

## spectrum_plot.py

A script to publish frequency vs amplitude plot image.
Expand Down
13 changes: 13 additions & 0 deletions audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#! /usr/bin/env python

PACKAGE = 'audio_to_spectrogram'

from dynamic_reconfigure.parameter_generator_catkin import *


gen = ParameterGenerator()

gen.add("maximum_amplitude", int_t, 0, "Maximum range of amplitude to plot", 10000, 1, 100000)
gen.add("window_size", double_t, 0, "Window size of sound input to plot", 10.0, 0.1, 10000)

exit(gen.generate(PACKAGE, PACKAGE, "AudioAmplitudePlot"))
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions audio_to_spectrogram/launch/audio_to_spectrogram.launch
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,28 @@
</rosparam>
</node>

<!-- publish amplitude image -->
<node name="audio_amplitude_plot"
pkg="audio_to_spectrogram" type="audio_amplitude_plot.py" output="screen">
<remap from="~audio" to="$(arg audio_topic)" />
<rosparam subst_value="true">
n_channel: $(arg n_channel)
mic_sampling_rate: $(arg mic_sampling_rate)
bitdepth: $(arg bitdepth)
</rosparam>
</node>

<!-- publish frequency vs amplitude image -->
<node name="spectrum_plot"
pkg="audio_to_spectrogram" type="spectrum_plot.py" >
<remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" />
</node>

<group if="$(arg gui)">
<node pkg="image_view" type="image_view" name="audio_amplitude_view" >
<remap from="image" to="/audio_amplitude_plot/output/viz" />
</node>

<node pkg="image_view" type="image_view" name="spectrum_view" >
<remap from="image" to="/spectrum_plot/output/viz" />
</node>
Expand Down
3 changes: 3 additions & 0 deletions audio_to_spectrogram/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@

<buildtool_depend>catkin</buildtool_depend>

<build_depend>dynamic_reconfigure</build_depend>

<exec_depend>audio_capture</exec_depend>
<exec_depend>audio_common_msgs</exec_depend>
<exec_depend>cv_bridge</exec_depend>
<exec_depend>dynamic_reconfigure</exec_depend>
<exec_depend>image_view</exec_depend>
<exec_depend>jsk_recognition_msgs</exec_depend>
<exec_depend>jsk_topic_tools</exec_depend>
Expand Down
88 changes: 88 additions & 0 deletions audio_to_spectrogram/scripts/audio_amplitude_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division

import cv_bridge
from dynamic_reconfigure.server import Server
from jsk_topic_tools import ConnectionBasedTransport
import matplotlib.pyplot as plt
import numpy as np
import rospy
import sensor_msgs.msg

from audio_to_spectrogram import AudioBuffer
from audio_to_spectrogram.cfg import AudioAmplitudePlotConfig as Config
from audio_to_spectrogram import convert_matplotlib_to_img


class AudioAmplitudePlot(ConnectionBasedTransport):

def __init__(self):
super(AudioAmplitudePlot, self).__init__()

self.audio_buffer = AudioBuffer.from_rosparam()

# Set matplotlib config
self.fig = plt.figure(figsize=(8, 5))
self.ax = self.fig.add_subplot(1, 1, 1)
self.ax.grid(True)
self.ax.set_xlabel('Time [s]', fontsize=12)
self.ax.set_ylabel('Amplitude', fontsize=12)
self.line, = self.ax.plot([0, 0], label='Amplitude of Audio')

# ROS dynamic reconfigure
self.srv = Server(Config, self.config_callback)

self.pub_img = self.advertise(
'~output/viz', sensor_msgs.msg.Image, queue_size=1)

def start_timer(self):
rate = rospy.get_param('~rate', 10)
if rate == 0:
rospy.logwarn('You cannot set 0 as the rate; change it to 10.')
rate = 10
self.timer = rospy.Timer(rospy.Duration(1.0 / rate), self.timer_cb)

def stop_timer(self):
self.timer.shutdown()

def subscribe(self):
self.audio_buffer.subscribe()
self.start_timer()

def unsubscribe(self):
self.audio_buffer.unsubscribe()
self.stop_timer()

def config_callback(self, config, level):
self.maximum_amplitude = config.maximum_amplitude
self.window_size = config.window_size
self.audio_buffer.window_size = self.window_size
return config

def timer_cb(self, timer):
window_size = self.window_size
amp = self.audio_buffer.read(window_size)
if len(amp) == 0:
return
times = np.linspace(-window_size, 0.0, len(amp))

# Plot audio amplitude.
self.line.set_data(times, amp)
self.ax.set_xlim((times.min(), times.max()))
self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude))

self.ax.legend(loc='upper right')
if self.pub_img.get_num_connections() > 0:
bridge = cv_bridge.CvBridge()
img = convert_matplotlib_to_img(self.fig)
img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8')
img_msg.header.stamp = rospy.Time.now()
self.pub_img.publish(img_msg)


if __name__ == '__main__':
rospy.init_node('audio_amplitude_plot')
AudioAmplitudePlot()
rospy.spin()
43 changes: 9 additions & 34 deletions audio_to_spectrogram/scripts/audio_to_spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from audio_common_msgs.msg import AudioData
from jsk_recognition_msgs.msg import Spectrum

from audio_to_spectrogram import AudioBuffer

# This node execute FFT to audio (audio_common_msgs/AudioData)
# The format of audio topic is assumed to be wave.
Expand All @@ -15,27 +16,14 @@ class AudioToSpectrum(object):
def __init__(self):
super(AudioToSpectrum, self).__init__()

# Audio topic config
# The number of channels in audio data
self.n_channel = rospy.get_param('~n_channel', 1)
# Sampling rate of microphone (namely audio topic).
mic_sampling_rate = rospy.get_param('~mic_sampling_rate', 16000)
# Period[s] to sample audio data for one fft
self.audio_buffer = AudioBuffer.from_rosparam(auto_start=True)
fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3)
# Bits per one audio data
bitdepth = rospy.get_param('~bitdepth', 16)
if bitdepth == 16:
self.dtype = 'int16'
else:
rospy.logerr("'~bitdepth' {} is unsupported.".format(bitdepth))
# Audio topic buffer
self.audio_buffer = np.array([], dtype=self.dtype)
# How long audio_buffer should be for one audio topic
self.audio_buffer_len = int(mic_sampling_rate * fft_sampling_period)
self.audio_buffer.window_size = fft_sampling_period
mic_sampling_rate = self.audio_buffer.input_sample_rate

# fft config
window_function = np.arange(
0.0, 1.0, 1.0 / self.audio_buffer_len)
0.0, 1.0, 1.0 / self.audio_buffer.audio_buffer_len)
self.window_function = 0.54 - 0.46 * np.cos(
2 * np.pi * window_function)
high_cut_freq = rospy.get_param('~high_cut_freq', 800)
Expand All @@ -44,7 +32,7 @@ def __init__(self):
mic_sampling_rate / 2))
low_cut_freq = rospy.get_param('~low_cut_freq', 1) # remove 0 Hz
self.freq = np.fft.fftfreq(
self.audio_buffer_len, d=1./mic_sampling_rate)
self.audio_buffer.audio_buffer_len, d=1./mic_sampling_rate)
self.cutoff_mask = np.where(
(low_cut_freq <= self.freq) & (self.freq <= high_cut_freq),
True, False)
Expand All @@ -53,31 +41,18 @@ def __init__(self):
self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50)

# Publisher and Subscriber
rospy.Subscriber(
'~audio', AudioData, self.audio_cb)
self.pub_spectrum = rospy.Publisher(
'~spectrum', Spectrum, queue_size=1)
self.pub_spectrum_filtered = rospy.Publisher(
'~spectrum_filtered', Spectrum, queue_size=1)
rospy.Timer(rospy.Duration(1. / self.fft_exec_rate), self.timer_cb)

def audio_cb(self, msg):
# Convert audio buffer to int array
data = msg.data
audio_buffer = np.frombuffer(data, dtype=self.dtype)
# Retreive one channel data
audio_buffer = audio_buffer[0::self.n_channel]
# Save audio msg to audio_buffer
self.audio_buffer = np.append(
self.audio_buffer, audio_buffer)
self.audio_buffer = self.audio_buffer[
-self.audio_buffer_len:]

def timer_cb(self, timer):
if len(self.audio_buffer) != self.audio_buffer_len:
if len(self.audio_buffer) != self.audio_buffer.audio_buffer_len:
return
audio_data = self.audio_buffer.read()
# Calc spectrum by fft
amplitude = np.fft.fft(self.audio_buffer * self.window_function)
amplitude = np.fft.fft(audio_data * self.window_function)
amplitude = np.log(np.abs(amplitude))
spectrum_msg = Spectrum()
spectrum_msg.header.stamp = rospy.Time.now()
Expand Down
Loading

0 comments on commit 0e0cea3

Please sign in to comment.