Files
onnx-voice-changer/frontend/src/hooks/useAudioPipeline.ts
T

321 lines
10 KiB
TypeScript

import { useEffect, useRef, useState, useCallback } from 'react';
import { AudioConfig, ConnectionStatus, HardwareDevice } from '../types/audio';
export const useAudioPipeline = (
wsUrl: string,
config: AudioConfig,
onConfigSync: (sr: number, list: HardwareDevice[]) => void
) => {
const [status, setStatus] = useState<ConnectionStatus>('disconnected');
const [rtt, setRtt] = useState<number | null>(null);
const [processingTime, setProcessingTime] = useState<number | null>(null);
const [isTalking, setIsTalking] = useState<boolean>(false);
const [isStreaming, setIsStreaming] = useState<boolean>(false);
const [playOutput, setPlayOutput] = useState<boolean>(true);
const socketRef = useRef<WebSocket | null>(null);
const audioCtxRef = useRef<AudioContext | null>(null);
const micStreamRef = useRef<MediaStream | null>(null);
const micSourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const sampleRateRef = useRef<number>(40000);
// High-performance canvas rolling buffers
const inputDisplayBuf = useRef<Float32Array>(new Float32Array(4096));
const outputDisplayBuf = useRef<Float32Array>(new Float32Array(4096));
const micAccumulator = useRef<Float32Array>(new Float32Array(0));
// Playback scheduling & timing
const sentTimestamps = useRef<{ id: number; sent: number }[]>([]);
const nextPlaybackTime = useRef<number>(0);
const outputChunkQueue = useRef<{ data: Float32Array; startTime: number }[]>([]);
// Function to stringify and sync configs
const sendConfig = useCallback(() => {
const socket = socketRef.current;
if (!socket || socket.readyState !== WebSocket.OPEN) return;
socket.send(JSON.stringify({
type: 'config',
model_name: config.model_name,
device: config.device,
f0_method: config.f0_method,
f0_up_key: config.f0_up_key,
noise_gate: config.noise_gate,
input_gain: config.input_gain,
output_gain: config.output_gain,
input_sr: audioCtxRef.current ? audioCtxRef.current.sampleRate : 44100,
routing_mode: config.routing_mode,
input_device: config.input_device,
output_device: config.output_device,
chunk_size: config.chunk_size
}));
}, [config]);
// Decodes array buffers from Python server
const handleServerAudio = useCallback((arrayBuffer: ArrayBuffer) => {
if (!audioCtxRef.current) return;
const now = performance.now();
if (sentTimestamps.current.length > 0) {
const oldest = sentTimestamps.current.shift();
if (oldest) {
setRtt(Math.round(now - oldest.sent));
}
}
const payload = new Float32Array(arrayBuffer);
const procTime = payload[0];
const pcmData = payload.subarray(1);
setProcessingTime(Math.max(0, Math.round(procTime)));
const ctx = audioCtxRef.current;
const audioBuf = ctx.createBuffer(1, pcmData.length, sampleRateRef.current);
audioBuf.getChannelData(0).set(pcmData);
const source = ctx.createBufferSource();
source.buffer = audioBuf;
// Only route node to speaker output if user didn't mute local listening
if (playOutput) {
source.connect(ctx.destination);
}
// Precise schedule timelines
const currentTime = ctx.currentTime;
const duration = audioBuf.duration;
const adaptiveBuf = Math.min(duration * 2.5, 0.50);
if (nextPlaybackTime.current < currentTime) {
nextPlaybackTime.current = currentTime + adaptiveBuf;
} else if (nextPlaybackTime.current > currentTime + duration * 5.0) {
nextPlaybackTime.current = currentTime + adaptiveBuf; // Latency Buster
}
const startSchedule = nextPlaybackTime.current;
source.start(startSchedule);
nextPlaybackTime.current += duration;
// Queue for syncing waveform outputs
outputChunkQueue.current.push({ data: pcmData, startTime: startSchedule });
while (outputChunkQueue.current.length > 0) {
const c = outputChunkQueue.current[0];
if (c.startTime + c.data.length / sampleRateRef.current < ctx.currentTime - 2.0) {
outputChunkQueue.current.shift();
} else break;
}
// Push output PCM samples to rolling display buffers
const size = 4096;
const display = outputDisplayBuf.current;
if (pcmData.length >= size) {
display.set(pcmData.slice(pcmData.length - size));
} else {
display.copyWithin(0, pcmData.length);
display.set(pcmData, size - pcmData.length);
}
}, [playOutput]);
const disconnect = useCallback(() => {
if (socketRef.current) {
try {
socketRef.current.close();
} catch (e) {}
socketRef.current = null;
}
setStatus('disconnected');
}, []);
const connect = useCallback(() => {
disconnect();
setStatus('connecting');
try {
const ws = new WebSocket(wsUrl);
ws.binaryType = 'arraybuffer';
ws.onopen = () => {
setStatus('connected');
socketRef.current = ws;
sendConfig();
};
ws.onclose = () => {
setStatus('disconnected');
socketRef.current = null;
};
ws.onerror = () => {
setStatus('disconnected');
socketRef.current = null;
};
ws.onmessage = (event) => {
if (typeof event.data === 'string') {
try {
const data = JSON.parse(event.data);
if (data.type === 'config_success') {
sampleRateRef.current = data.target_sr;
} else if (data.type === 'init_devices') {
onConfigSync(data.target_sr || 40000, data.devices || []);
} else if (data.type === 'visualizer') {
// Hardware mode visualizer data stream
inputDisplayBuf.current.set(new Float32Array(data.input));
outputDisplayBuf.current.set(new Float32Array(data.output));
}
} catch (e) {
console.error('WS JSON parse error:', e);
}
} else if (event.data instanceof ArrayBuffer) {
handleServerAudio(event.data);
}
};
} catch (e) {
console.error('WS Connection failed:', e);
setStatus('disconnected');
}
}, [wsUrl, sendConfig, handleServerAudio, onConfigSync, disconnect]);
const stopStream = useCallback(() => {
setIsStreaming(false);
setIsTalking(false);
if (config.routing_mode === 'hardware') {
const socket = socketRef.current;
if (socket && socket.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify({
type: 'config',
routing_mode: 'browser' // tells server hardware stream to stop
}));
}
}
if (micStreamRef.current) {
micStreamRef.current.getTracks().forEach(t => t.stop());
micStreamRef.current = null;
}
if (micSourceRef.current) {
micSourceRef.current.disconnect();
micSourceRef.current = null;
}
if (processorRef.current) {
processorRef.current.disconnect();
processorRef.current = null;
}
micAccumulator.current = new Float32Array(0);
setRtt(null);
setProcessingTime(null);
}, [config.routing_mode]);
const startStream = useCallback(async () => {
if (config.routing_mode === 'hardware') {
setIsStreaming(true);
sendConfig();
return;
}
if (!audioCtxRef.current) {
audioCtxRef.current = new (window.AudioContext || (window as any).webkitAudioContext)({
latencyHint: 'interactive',
});
}
const ctx = audioCtxRef.current;
if (ctx.state === 'suspended') {
await ctx.resume();
}
try {
micStreamRef.current = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
micSourceRef.current = ctx.createMediaStreamSource(micStreamRef.current);
processorRef.current = ctx.createScriptProcessor(4096, 1, 1);
processorRef.current.onaudioprocess = (e) => {
const inputData = e.inputBuffer.getChannelData(0);
// Update input waveform display buffer
const display = inputDisplayBuf.current;
display.copyWithin(0, inputData.length);
display.set(inputData, display.length - inputData.length);
// Append to local accumulator
const nextAcc = new Float32Array(micAccumulator.current.length + inputData.length);
nextAcc.set(micAccumulator.current);
nextAcc.set(inputData, micAccumulator.current.length);
micAccumulator.current = nextAcc;
const size = config.chunk_size;
while (micAccumulator.current.length >= size) {
const chunk = micAccumulator.current.slice(0, size);
micAccumulator.current = micAccumulator.current.slice(size);
// Simple RMS for Voice Activity Badge
let sum = 0;
for (let i = 0; i < chunk.length; i++) sum += chunk[i] * chunk[i];
const rms = Math.sqrt(sum / chunk.length);
setIsTalking(rms > 0.005);
// Stream raw float PCM bytes
const ws = socketRef.current;
if (ws && ws.readyState === WebSocket.OPEN) {
const time = performance.now();
sentTimestamps.current.push({ id: time, sent: time });
ws.send(chunk.buffer);
}
}
};
micSourceRef.current.connect(processorRef.current);
processorRef.current.connect(ctx.destination);
nextPlaybackTime.current = 0;
setIsStreaming(true);
} catch (e) {
console.error('Failed to start microphone streaming:', e);
alert('Microphone access failed: ' + (e instanceof Error ? e.message : String(e)));
stopStream();
}
}, [config.routing_mode, config.chunk_size, sendConfig, stopStream]);
// Sync config whenever React props config changes
useEffect(() => {
sendConfig();
}, [config, sendConfig]);
// Lifecycle cleanups
useEffect(() => {
return () => {
disconnect();
stopStream();
if (audioCtxRef.current) {
audioCtxRef.current.close().catch(() => {});
}
};
}, [disconnect, stopStream]);
return {
status,
rtt,
processingTime,
isTalking,
isStreaming,
playOutput,
setPlayOutput,
connect,
disconnect,
startStream,
stopStream,
inputBuffer: inputDisplayBuf,
outputBuffer: outputDisplayBuf
};
};
export default useAudioPipeline;