feat: stream audio to CB

Uses PyAudio and ZeroMQ to publish audio chunks. ref: N25B-119
2025-10-01 10:50:53 +02:00
parent da99b5cd62
commit afae6fc331
6 changed files with 179 additions and 12 deletions
--- a/README.md
+++ b/README.md
@@ -24,7 +24,9 @@ python -m virtualenv .venv
 source .venv/bin/activate
 ```

-Install the required packages with
+Before installing the Python packages, you'll need to have the `portaudio` system package installed.
+
+Then you can install the required packages with

 ```bash
 pip install -r requirements.txt
--- a/main.py
+++ b/main.py
@@ -1,27 +1,51 @@
 import qi
 import zmq
-import time
+
+from src.audio_streaming import AudioStreaming
+from state import state


 def say(session, message):
    tts = session.service("ALTextToSpeech")
    tts.say(message)

-if __name__ == "__main__":
-    app = qi.Application()
-    app.start()
-    session = app.session

+def listen_for_messages(session):
    context = zmq.Context()
    socket = context.socket(zmq.SUB)
    socket.connect("tcp://localhost:5556")
-    socket.setsockopt_string(zmq.SUBSCRIBE, u"") # u because Python 2 shenanigans
+    socket.setsockopt_string(zmq.SUBSCRIBE, u"")  # u because Python 2 shenanigans

-    while True:
-        print("Listening for message")
+    poller = zmq.Poller()
+    poller.register(socket, zmq.POLLIN)
+
+    print("Listening for messages")
+    while not state.exit_event.is_set():
+        if not poller.poll(200): continue  # At most 200 ms delay after CTRL+C
+        # We now know there's a message waiting for us
        message = socket.recv_string()
        print("Received message: {}".format(message))

-        say(session, message)
+        if session: say(session, message)

-        time.sleep(1)
+
+def main():
+    try:
+        app = qi.Application()
+        app.start()
+        session = app.session
+    except RuntimeError:
+        session = None
+
+    audio_streamer = AudioStreaming()
+    audio_streamer.run()
+
+    listen_for_messages(session)  # Runs indefinitely, until CTRL+C
+
+
+if __name__ == "__main__":
+    try:
+        state.initialize()
+        main()
+    finally:
+        state.deinitialize()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
 pyzmq<16
+pyaudio<=0.2.11
--- a/src/init.py
+++ b/src/init.py
--- a/src/audio_streaming.py
+++ b/src/audio_streaming.py
@@ -0,0 +1,93 @@
+import threading
+
+import pyaudio
+import zmq
+
+from state import state
+
+
+def choose_mic_interactive(audio):
+    """Choose a microphone to use. The `audio` parameter is an instance of PyAudio. Returns a dict."""
+    device_count = audio.get_device_count()
+    print("Found {} audio devices:".format(device_count))
+    for i in range(device_count):
+        print("- {}: {}".format(i, audio.get_device_info_by_index(i)["name"]))
+
+    microphone_index = None
+    while microphone_index is None:
+        chosen = input("Which device would you like to use?\n> ")
+        try:
+            chosen = int(chosen)
+            if chosen < 0 or chosen > device_count: raise ValueError()
+            microphone_index = chosen
+        except ValueError:
+            print("Please enter a number between 0 and {}".format(device_count))
+
+    chosen_microphone = audio.get_device_info_by_index(microphone_index)
+    print("Chose microphone \"{}\"".format(chosen_microphone["name"]))
+    return chosen_microphone
+
+
+def choose_mic_default(audio):
+    """Choose a microphone to use based on defaults. The `audio` parameter is a PyAudio. Returns a dict."""
+    default_device = audio.get_default_input_device_info()
+    return default_device
+
+
+class AudioStreaming:
+    def __init__(self, port=5557):
+        self.port = port
+        self.audio = pyaudio.PyAudio()
+        self.microphone = choose_mic_default(self.audio)
+        self.thread = None
+
+    def run(self):
+        self.thread = threading.Thread(target=self._stream)
+        self.thread.start()
+
+    def wait_until_done(self):
+        if not self.thread: return
+        self.thread.join()
+
+    def _stream(self):
+        context = zmq.Context()
+        socket = context.socket(zmq.PUB)
+        socket.bind("tcp://*:{}".format(self.port))
+
+        chunk = 512  # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
+
+        stream = self.audio.open(
+            format=pyaudio.paFloat32,
+            channels=self.microphone["maxInputChannels"],
+            rate=16000,
+            input=True,
+            input_device_index=self.microphone["index"],
+            frames_per_buffer=chunk,
+        )
+
+        try:
+            while not state.exit_event.is_set():
+                data = stream.read(chunk)
+                socket.send(data)
+        finally:
+            stream.stop_stream()
+            stream.close()
+
+
+if __name__ == "__main__":
+    state.initialize()
+    try:
+        audio = AudioStreaming()
+        print("Starting audio streaming...")
+        audio.run()
+
+        import time
+        end = time.time() + 10
+        while not state.exit_event.is_set() and time.time() < end:
+            print "\rExiting in {:.2f} seconds".format(end - time.time()),
+            time.sleep(0.05)
+
+        state.exit_event.set()
+        audio.wait_until_done()
+    finally:
+        state.deinitialize()
--- a/state.py
+++ b/state.py
@@ -0,0 +1,47 @@
+import signal
+import threading
+
+
+class State(object):
+    """
+    Do not create an instance of this class directly: use the instance `state` below. This state must be initiated once,
+    probably when your program starts.
+
+    This class is used to share state between threads. For example, when the program is quit, that all threads can
+    detect this via the `exit_event` property being set.
+    """
+    def __init__(self):
+        self.is_initialized = False
+        self.exit_event = None
+
+    def initialize(self):
+        if self.is_initialized:
+            print("Already initialized")
+            return
+
+        self.exit_event = threading.Event()
+        def handle_exit(_, __):
+            print("Exiting.")
+            self.exit_event.set()
+        signal.signal(signal.SIGINT, handle_exit)
+        signal.signal(signal.SIGTERM, handle_exit)
+
+        self.is_initialized = True
+
+    def deinitialize(self):
+        if not self.is_initialized: return
+        self.is_initialized = False
+
+    def __getattribute__(self, name):
+        # Enforce that the state is initialized before accessing any property (aside from the basic ones)
+        if name in ("initialize", "deinitialize", "is_initialized", "__dict__", "__class__"):
+            return object.__getattribute__(self, name)
+
+        if not self.is_initialized:
+            raise RuntimeError("State must be initialized before accessing '%s'" % name)
+
+        return object.__getattribute__(self, name)
+
+
+# Must call `.initialize` before use
+state = State()