feat: stream audio to CB

Uses PyAudio and ZeroMQ to publish audio chunks. ref: N25B-119
2025-10-01 10:50:53 +02:00
parent da99b5cd62
commit afae6fc331
6 changed files with 179 additions and 12 deletions
--- a/src/init.py
+++ b/src/init.py
--- a/src/audio_streaming.py
+++ b/src/audio_streaming.py
@@ -0,0 +1,93 @@
+import threading
+
+import pyaudio
+import zmq
+
+from state import state
+
+
+def choose_mic_interactive(audio):
+    """Choose a microphone to use. The `audio` parameter is an instance of PyAudio. Returns a dict."""
+    device_count = audio.get_device_count()
+    print("Found {} audio devices:".format(device_count))
+    for i in range(device_count):
+        print("- {}: {}".format(i, audio.get_device_info_by_index(i)["name"]))
+
+    microphone_index = None
+    while microphone_index is None:
+        chosen = input("Which device would you like to use?\n> ")
+        try:
+            chosen = int(chosen)
+            if chosen < 0 or chosen > device_count: raise ValueError()
+            microphone_index = chosen
+        except ValueError:
+            print("Please enter a number between 0 and {}".format(device_count))
+
+    chosen_microphone = audio.get_device_info_by_index(microphone_index)
+    print("Chose microphone \"{}\"".format(chosen_microphone["name"]))
+    return chosen_microphone
+
+
+def choose_mic_default(audio):
+    """Choose a microphone to use based on defaults. The `audio` parameter is a PyAudio. Returns a dict."""
+    default_device = audio.get_default_input_device_info()
+    return default_device
+
+
+class AudioStreaming:
+    def __init__(self, port=5557):
+        self.port = port
+        self.audio = pyaudio.PyAudio()
+        self.microphone = choose_mic_default(self.audio)
+        self.thread = None
+
+    def run(self):
+        self.thread = threading.Thread(target=self._stream)
+        self.thread.start()
+
+    def wait_until_done(self):
+        if not self.thread: return
+        self.thread.join()
+
+    def _stream(self):
+        context = zmq.Context()
+        socket = context.socket(zmq.PUB)
+        socket.bind("tcp://*:{}".format(self.port))
+
+        chunk = 512  # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
+
+        stream = self.audio.open(
+            format=pyaudio.paFloat32,
+            channels=self.microphone["maxInputChannels"],
+            rate=16000,
+            input=True,
+            input_device_index=self.microphone["index"],
+            frames_per_buffer=chunk,
+        )
+
+        try:
+            while not state.exit_event.is_set():
+                data = stream.read(chunk)
+                socket.send(data)
+        finally:
+            stream.stop_stream()
+            stream.close()
+
+
+if __name__ == "__main__":
+    state.initialize()
+    try:
+        audio = AudioStreaming()
+        print("Starting audio streaming...")
+        audio.run()
+
+        import time
+        end = time.time() + 10
+        while not state.exit_event.is_set() and time.time() < end:
+            print "\rExiting in {:.2f} seconds".format(end - time.time()),
+            time.sleep(0.05)
+
+        state.exit_event.set()
+        audio.wait_until_done()
+    finally:
+        state.deinitialize()