/** * Omni Real-Time Voice Changer - Client App * High-performance browser-based mic streaming and RVC playback. */ // UI Elements const wsUrlInput = document.getElementById('ws_url'); const connectionStatus = document.getElementById('connection_status'); const connectBtn = document.getElementById('connect_btn'); const streamBtn = document.getElementById('stream_btn'); const playToggleBtn = document.getElementById('play_toggle_btn'); const modelSelect = document.getElementById('model_select'); const deviceSelect = document.getElementById('device_select'); const transposeSlider = document.getElementById('transpose_slider'); const transposeVal = document.getElementById('transpose_val'); const gateSlider = document.getElementById('gate_slider'); const gateVal = document.getElementById('gate_val'); const inputGainSlider = document.getElementById('input_gain_slider'); const inputGainVal = document.getElementById('input_gain_val'); const outputGainSlider = document.getElementById('output_gain_slider'); const outputGainVal = document.getElementById('output_gain_val'); const chunkSelect = document.getElementById('chunk_select'); const noiseCancelCheckbox = document.getElementById('noise_cancel_checkbox'); const routingModeSelect = document.getElementById('routing_mode_select'); const hardwareDevicesPanel = document.getElementById('hardware_devices_panel'); const serverInputSelect = document.getElementById('server_input_select'); const serverOutputSelect = document.getElementById('server_output_select'); const browserNoiseCancelGroup = document.getElementById('browser_noise_cancel_group'); const presetLatencyBtn = document.getElementById('preset_latency_btn'); const presetQualityBtn = document.getElementById('preset_quality_btn'); const inputCanvas = document.getElementById('input_canvas'); const outputCanvas = document.getElementById('output_canvas'); const hudLatency = document.getElementById('hud_latency'); const hudTime = document.getElementById('hud_time'); const hudGateStatus = document.getElementById('hud_gate_status'); const hudSr = document.getElementById('hud_sr'); // Audio Visualizer Contexts const inputCtx = inputCanvas.getContext('2d'); const outputCtx = outputCanvas.getContext('2d'); // Web Audio State let audioContext = null; let micStream = null; let micSourceNode = null; let scriptProcessorNode = null; let micAccumulator = new Float32Array(0); // Accumulates audio for large/custom chunk sizes // WebSocket State let socket = null; let isStreaming = false; let playOutput = true; let targetSampleRate = 40000; // RVC Model default, updated dynamically // Playback Sync State let nextPlaybackTime = 0; const safetyDelay = 0.10; // 100ms buffer to absorb network/websocket jitter (increased for perfect smoothness!) // Latency Tracking Queues let sentTimestamps = []; const maxSentLogs = 50; // --- SMOOTH VISUALIZER (Rolling Display Buffers + RAF loop) --- // Fixed display buffer size: ~85ms window looks great at all chunk sizes. const VIS_DISPLAY_SIZE = 4096; let inputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); // rolling input (updated ~85ms) let outputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); // fallback for hardware mode let rafHandle = null; // Time-synced output queue: each entry = { data: Float32Array, startTime: number (audioCtx seconds) } let outputChunkQueue = []; function pushToDisplayBuf(displayBuf, newSamples) { if (newSamples.length >= VIS_DISPLAY_SIZE) { displayBuf.set(newSamples.slice(newSamples.length - VIS_DISPLAY_SIZE)); } else { displayBuf.copyWithin(0, newSamples.length); displayBuf.set(newSamples, VIS_DISPLAY_SIZE - newSamples.length); } } // Build a VIS_DISPLAY_SIZE window of output samples ending at audioContext.currentTime function buildTimeSyncedOutputBuf() { if (!audioContext || outputChunkQueue.length === 0) return outputDisplayBuf; const now = audioContext.currentTime; const windowDuration = VIS_DISPLAY_SIZE / targetSampleRate; const windowStart = now - windowDuration; // Drop chunks that ended before our window start while (outputChunkQueue.length > 0) { const c = outputChunkQueue[0]; if (c.startTime + c.data.length / targetSampleRate < windowStart) { outputChunkQueue.shift(); } else break; } const out = new Float32Array(VIS_DISPLAY_SIZE); for (const chunk of outputChunkQueue) { const chunkEnd = chunk.startTime + chunk.data.length / targetSampleRate; // Overlap between [windowStart, now] and [chunk.startTime, chunkEnd] const overlapStart = Math.max(windowStart, chunk.startTime); const overlapEnd = Math.min(now, chunkEnd); if (overlapStart >= overlapEnd) continue; const srcOffset = Math.floor((overlapStart - chunk.startTime) * targetSampleRate); const destOffset = Math.floor((overlapStart - windowStart) * targetSampleRate); const count = Math.floor((overlapEnd - overlapStart) * targetSampleRate); const safeCount = Math.min(count, chunk.data.length - srcOffset, VIS_DISPLAY_SIZE - destOffset); if (safeCount > 0) out.set(chunk.data.subarray(srcOffset, srcOffset + safeCount), destOffset); } return out; } function startVisualizerLoop() { if (rafHandle) return; function frame() { drawWaveform(inputDisplayBuf, inputCanvas, '#6366f1'); // Time-synced output: scrub through queued chunks using audioContext clock drawWaveform(buildTimeSyncedOutputBuf(), outputCanvas, '#a855f7'); rafHandle = requestAnimationFrame(frame); } rafHandle = requestAnimationFrame(frame); } function stopVisualizerLoop() { if (rafHandle) { cancelAnimationFrame(rafHandle); rafHandle = null; } outputChunkQueue = []; } // Setup Canvas Sizes dynamically function resizeCanvases() { inputCanvas.width = inputCanvas.clientWidth * window.devicePixelRatio; inputCanvas.height = inputCanvas.clientHeight * window.devicePixelRatio; outputCanvas.width = outputCanvas.clientWidth * window.devicePixelRatio; outputCanvas.height = outputCanvas.clientHeight * window.devicePixelRatio; } resizeCanvases(); window.addEventListener('resize', resizeCanvases); // Connect / Disconnect WebSocket connectBtn.addEventListener('click', () => { if (socket && (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING)) { disconnectServer(); } else { connectServer(); } }); function connectServer() { const url = wsUrlInput.value.trim(); updateConnectionStatus('connecting'); try { socket = new WebSocket(url); socket.binaryType = 'arraybuffer'; socket.onopen = () => { console.log('Connected to RVC Server'); updateConnectionStatus('connected'); sendConfigToServer(); // Send initial configurations streamBtn.disabled = false; playToggleBtn.disabled = false; }; socket.onclose = () => { console.log('WebSocket Connection Closed'); disconnectServer(); }; socket.onerror = (err) => { console.error('WebSocket Error:', err); disconnectServer(); }; socket.onmessage = (event) => { if (typeof event.data === 'string') { // Config or control response try { const response = JSON.parse(event.data); if (response.type === 'config_success') { targetSampleRate = response.target_sr; console.log('Server configuration synced successfully:', response); } else if (response.type === 'init_devices') { populateServerDevices(response.devices, response.default_input, response.default_output); } else if (response.type === 'visualizer') { // Feed rolling display buffers — RAF loop handles drawing at 60fps pushToDisplayBuf(inputDisplayBuf, new Float32Array(response.input)); pushToDisplayBuf(outputDisplayBuf, new Float32Array(response.output)); if (!rafHandle) startVisualizerLoop(); } else if (response.type === 'error') { alert('Server Error: ' + response.message); } } catch (e) { console.error('Error parsing text message:', e); } } else if (event.data instanceof ArrayBuffer) { // Binary processed PCM audio chunk returned from server (Browser Mode only) handleServerAudioChunk(event.data); } }; } catch (e) { console.error('Connection failed:', e); disconnectServer(); } } function disconnectServer() { if (isStreaming) { stopStreaming(); } if (socket) { try { socket.close(); } catch (e) {} socket = null; } updateConnectionStatus('disconnected'); streamBtn.disabled = true; playToggleBtn.disabled = true; } function updateConnectionStatus(status) { connectionStatus.className = 'status-badge ' + status; if (status === 'connected') { connectionStatus.textContent = 'Terhubung'; connectBtn.textContent = 'Putuskan Server'; connectBtn.className = 'btn btn-primary'; } else if (status === 'connecting') { connectionStatus.textContent = 'Menghubungkan'; connectBtn.textContent = 'Batal'; } else { connectionStatus.textContent = 'Terputus'; connectBtn.textContent = 'Hubungkan Server'; connectBtn.className = 'btn btn-primary'; } } // Config synchronization function sendConfigToServer() { if (!socket || socket.readyState !== WebSocket.OPEN) return; const activeF0 = document.querySelector('input[name="f0_method"]:checked').value; const config = { type: 'config', model_name: modelSelect.value, device: deviceSelect.value, f0_method: activeF0, f0_up_key: parseInt(transposeSlider.value), noise_gate: parseFloat(gateSlider.value), input_gain: parseFloat(inputGainSlider.value), output_gain: parseFloat(outputGainSlider.value), input_sr: audioContext ? audioContext.sampleRate : 44100, routing_mode: routingModeSelect.value, input_device: serverInputSelect.value ? parseInt(serverInputSelect.value) : null, output_device: serverOutputSelect.value ? parseInt(serverOutputSelect.value) : null, chunk_size: parseInt(chunkSelect.value) }; socket.send(jsonEncode(config)); console.log('Sent configuration change:', config); } // Populate Server Audio Devices dropdowns function populateServerDevices(devices, defaultInput, defaultOutput) { serverInputSelect.innerHTML = ''; serverOutputSelect.innerHTML = ''; if (devices.length === 0) { const optIn = document.createElement('option'); optIn.textContent = 'Tidak ada mic terdeteksi di server'; serverInputSelect.appendChild(optIn); const optOut = document.createElement('option'); optOut.textContent = 'Tidak ada output terdeteksi di server'; serverOutputSelect.appendChild(optOut); return; } devices.forEach(device => { if (device.max_input_channels > 0) { const opt = document.createElement('option'); opt.value = device.id; opt.textContent = `[ID ${device.id}] ${device.name}`; if (device.id === defaultInput) opt.selected = true; serverInputSelect.appendChild(opt); } if (device.max_output_channels > 0) { const opt = document.createElement('option'); opt.value = device.id; opt.textContent = `[ID ${device.id}] ${device.name}`; if (device.id === defaultOutput) opt.selected = true; serverOutputSelect.appendChild(opt); } }); console.log('Successfully populated server hardware devices in UI.'); } // UI Event Listeners to trigger instant sync modelSelect.addEventListener('change', sendConfigToServer); deviceSelect.addEventListener('change', sendConfigToServer); document.querySelectorAll('input[name="f0_method"]').forEach(radio => { radio.addEventListener('change', sendConfigToServer); }); transposeSlider.addEventListener('input', () => { transposeVal.textContent = (transposeSlider.value >= 0 ? '+' : '') + transposeSlider.value + ' semitone'; }); transposeSlider.addEventListener('change', sendConfigToServer); gateSlider.addEventListener('input', () => { gateVal.textContent = gateSlider.value + ' dB'; }); gateSlider.addEventListener('change', sendConfigToServer); inputGainSlider.addEventListener('input', () => { inputGainVal.textContent = parseFloat(inputGainSlider.value).toFixed(1) + 'x'; }); inputGainSlider.addEventListener('change', sendConfigToServer); outputGainSlider.addEventListener('input', () => { outputGainVal.textContent = parseFloat(outputGainSlider.value).toFixed(1) + 'x'; }); outputGainSlider.addEventListener('change', sendConfigToServer); chunkSelect.addEventListener('change', () => { // Reinitialize stream if buffer size is changed during active streaming if (isStreaming) { stopStreaming(); startStreaming(); } }); noiseCancelCheckbox.addEventListener('change', () => { // Reinitialize microphone with new noise cancellation constraints if streaming if (isStreaming) { stopStreaming(); startStreaming(); } }); // Helper to dynamically adjust UI layout based on Routing Mode function applyAudioRoutingUI() { if (routingModeSelect.value === 'hardware') { hardwareDevicesPanel.style.display = 'block'; playToggleBtn.style.display = 'none'; // Hide browser-only "Mendengarkan" button browserNoiseCancelGroup.style.display = 'none'; // Hide browser-only Noise Cancel checkbox } else { hardwareDevicesPanel.style.display = 'none'; playToggleBtn.style.display = 'inline-block'; // Show browser-only "Mendengarkan" button browserNoiseCancelGroup.style.display = 'block'; // Show browser-only Noise Cancel checkbox } } // Routing Mode Event Listeners routingModeSelect.addEventListener('change', () => { applyAudioRoutingUI(); sendConfigToServer(); if (isStreaming) { stopStreaming(); startStreaming(); } }); serverInputSelect.addEventListener('change', sendConfigToServer); serverOutputSelect.addEventListener('change', sendConfigToServer); // Quick Presets Event Listeners presetLatencyBtn.addEventListener('click', () => { const radioPM = document.querySelector('input[name="f0_method"][value="pm"]'); if (radioPM) radioPM.checked = true; chunkSelect.value = "8192"; console.log("Preset loaded: Latency (PM + 8192)"); sendConfigToServer(); if (isStreaming) { stopStreaming(); startStreaming(); } }); presetQualityBtn.addEventListener('click', () => { const radioRMVPE = document.querySelector('input[name="f0_method"][value="rmvpe"]'); if (radioRMVPE) radioRMVPE.checked = true; chunkSelect.value = "16384"; console.log("Preset loaded: Quality (RMVPE + 16384)"); sendConfigToServer(); if (isStreaming) { stopStreaming(); startStreaming(); } }); // Helper functions for UI JSON safely function jsonEncode(obj) { return JSON.stringify(obj); } playToggleBtn.addEventListener('click', () => { playOutput = !playOutput; if (playOutput) { playToggleBtn.textContent = '🔊 Mendengarkan: AKTIF'; playToggleBtn.className = 'btn btn-primary'; } else { playToggleBtn.textContent = '🔇 Mendengarkan: SENYAP'; playToggleBtn.className = 'btn btn-accent'; } }); // Stream Toggle streamBtn.addEventListener('click', () => { if (isStreaming) { stopStreaming(); } else { startStreaming(); } }); async function startStreaming() { isStreaming = true; streamBtn.textContent = 'Hentikan Pengubah Suara'; streamBtn.className = 'btn btn-primary'; const isHardwareMode = (routingModeSelect.value === 'hardware'); if (isHardwareMode) { // --- SERVER HARDWARE ROUTING MODE --- inputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); outputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); startVisualizerLoop(); sendConfigToServer(); // Sends config with routing_mode: 'hardware' which triggers stream start on server console.log('Server Hardware Mode initialized.'); return; } // --- CLIENT BROWSER MODE --- // 1. Create AudioContext if not active if (!audioContext) { audioContext = new (window.AudioContext || window.webkitAudioContext)({ latencyHint: 'interactive' }); } if (audioContext.state === 'suspended') { await audioContext.resume(); } hudSr.textContent = audioContext.sampleRate + ' Hz'; sendConfigToServer(); // sync actual input sample rate // 2. Request user microphone with high-fidelity, lowest possible latency constraints try { const useNoiseCancel = noiseCancelCheckbox.checked; micStream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: useNoiseCancel, noiseSuppression: useNoiseCancel, autoGainControl: useNoiseCancel } }); micSourceNode = audioContext.createMediaStreamSource(micStream); // 3. Create Audio Processing Loop Node (ScriptProcessorNode) // BaseAudioContext's createScriptProcessor buffer size MUST be a power of two between 256 and 16384. // We use a fixed, highly supported buffer size of 4096 for recording, and accumulate samples in-memory // to support ANY arbitrary or extremely large chunk size (like 12288, 24576, 32768) selected by the user! const recordBufferSize = 4096; scriptProcessorNode = audioContext.createScriptProcessor(recordBufferSize, 1, 1); scriptProcessorNode.onaudioprocess = (event) => { if (!isStreaming) return; const inputBuffer = event.inputBuffer; const inputData = inputBuffer.getChannelData(0); // 4096 samples // Push latest mic samples into the rolling display buffer every callback (~85ms) pushToDisplayBuf(inputDisplayBuf, inputData); // Append incoming recorded samples to our accumulator const temp = new Float32Array(micAccumulator.length + inputData.length); temp.set(micAccumulator); temp.set(inputData, micAccumulator.length); micAccumulator = temp; const targetChunkSize = parseInt(chunkSelect.value); // Process and send chunks of the user's selected target size while (micAccumulator.length >= targetChunkSize) { const chunkToSend = micAccumulator.slice(0, targetChunkSize); micAccumulator = micAccumulator.slice(targetChunkSize); // Keep remainder // Voice Activity Detection for gate status badge let maxVal = 0; for (let i = 0; i < chunkToSend.length; i++) maxVal = Math.max(maxVal, Math.abs(chunkToSend[i])); if (maxVal > 0.005) { hudGateStatus.textContent = 'Bicara'; hudGateStatus.className = 'hud-value active-badge'; } else { hudGateStatus.textContent = 'Berdiam'; hudGateStatus.className = 'hud-value text-muted'; } // Send binary PCM Float32 audio chunk of target size to Python Server if (socket && socket.readyState === WebSocket.OPEN) { const packetTime = performance.now(); sentTimestamps.push({ id: packetTime, sent: packetTime }); if (sentTimestamps.length > maxSentLogs) { sentTimestamps.shift(); } socket.send(chunkToSend.buffer); // Send direct array buffer } } }; micSourceNode.connect(scriptProcessorNode); scriptProcessorNode.connect(audioContext.destination); // Required to trigger onaudioprocess // Reset playback sync clock nextPlaybackTime = 0; micAccumulator = new Float32Array(0); // Reset accumulator inputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); outputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); startVisualizerLoop(); console.log('Browser Streaming active. Recording buffer size: 4096 | Target chunk size:', chunkSelect.value); } catch (e) { console.error('Failed to access microphone:', e); alert('Gagal mengakses mikrofon Anda: ' + e.message); stopStreaming(); } } function stopStreaming() { isStreaming = false; streamBtn.textContent = 'Mulai Mengubah Suara'; streamBtn.className = 'btn btn-accent'; playOutput = true; playToggleBtn.textContent = '🔊 Mendengarkan: AKTIF'; playToggleBtn.className = 'btn btn-primary'; const isHardwareMode = (routingModeSelect.value === 'hardware'); if (isHardwareMode) { // --- SERVER HARDWARE ROUTING MODE --- if (socket && socket.readyState === WebSocket.OPEN) { const config = { type: 'config', routing_mode: 'browser' // Tells server to stop local hardware stream }; socket.send(jsonEncode(config)); } console.log('Server Hardware Mode stopped.'); hudGateStatus.textContent = 'Berdiam'; hudGateStatus.className = 'hud-value text-muted'; hudLatency.textContent = '-- ms'; hudTime.textContent = '-- ms'; stopVisualizerLoop(); inputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); outputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); clearCanvas(inputCanvas); clearCanvas(outputCanvas); return; } // --- CLIENT BROWSER MODE --- // Stop microphone stream tracks if (micStream) { micStream.getTracks().forEach(track => track.stop()); micStream = null; } // Disconnect Web Audio nodes if (micSourceNode) { micSourceNode.disconnect(); micSourceNode = null; } if (scriptProcessorNode) { scriptProcessorNode.disconnect(); scriptProcessorNode = null; } micAccumulator = new Float32Array(0); // Reset accumulator stopVisualizerLoop(); inputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); outputDisplayBuf = new Float32Array(VIS_DISPLAY_SIZE); hudGateStatus.textContent = 'Berdiam'; hudGateStatus.className = 'hud-value text-muted'; hudLatency.textContent = '-- ms'; hudTime.textContent = '-- ms'; clearCanvas(inputCanvas); clearCanvas(outputCanvas); } // Seamless Audio Playback Scheduler (Absorbs WebSocket & processing jitter) function handleServerAudioChunk(arrayBuffer) { if (!isStreaming) return; // 1. Measure Round-Trip Time Latency (RTT) const now = performance.now(); let rtt = 0; if (sentTimestamps.length > 0) { const oldestSent = sentTimestamps.shift(); rtt = now - oldestSent.sent; hudLatency.textContent = Math.round(rtt) + ' ms'; } // Convert arrayBuffer to Float32 samples const payload = new Float32Array(arrayBuffer); const processingTime = payload[0]; // first float32 is the server processing time in ms const pcmData = payload.subarray(1); // the rest is the audio // 2. Schedule chunk smoothly inside the AudioContext timeline const audioBuf = audioContext.createBuffer(1, pcmData.length, targetSampleRate); audioBuf.getChannelData(0).set(pcmData); const source = audioContext.createBufferSource(); source.buffer = audioBuf; if (playOutput) { source.connect(audioContext.destination); } // Calculate precise playback clock scheduling const currentTime = audioContext.currentTime; const chunkDuration = audioBuf.duration; // actual chunk duration in seconds // Adaptive buffer: enough headroom so next chunk always arrives before this one ends. // 2.5× chunk or 500ms cap — absorbs even 300ms+ processing spikes. const adaptiveBuf = Math.min(chunkDuration * 2.5, 0.50); if (nextPlaybackTime < currentTime) { // Clock behind — first chunk or dropout recovery. // Use full adaptiveBuf on BOTH cases so recovery fully rebuilds headroom. // (0.5× recovery was causing cascading dropouts: one late chunk → the next also late) nextPlaybackTime = currentTime + adaptiveBuf; } else if (nextPlaybackTime > currentTime + chunkDuration * 5.0) { // --- ADAPTIVE LATENCY BUSTER --- // Only snap when queue is >5 chunk-durations ahead (genuine backlog, not normal look-ahead). // At 8192 (170ms): threshold = 850ms // At 65536 (1.6s): threshold = 8s const snapTarget = currentTime + adaptiveBuf; console.log(`Latency Buster: ${Math.round((nextPlaybackTime-currentTime)*1000)}ms → ${Math.round(adaptiveBuf*1000)}ms`); nextPlaybackTime = snapTarget; } // Record schedule start time BEFORE advancing the clock (for time-synced visualizer) const scheduleStartTime = nextPlaybackTime; // Schedule play source.start(nextPlaybackTime); hudTime.textContent = Math.max(0, Math.round(processingTime)) + ' ms'; // Advance playback sync clock nextPlaybackTime += audioBuf.duration; // Push to time-synced output queue for visualizer (keyed by when audio actually plays) outputChunkQueue.push({ data: pcmData, startTime: scheduleStartTime }); // Keep queue bounded to ~10 seconds of audio max while (outputChunkQueue.length > 0) { const c = outputChunkQueue[0]; if (c.startTime + c.data.length / targetSampleRate < audioContext.currentTime - 2.0) { outputChunkQueue.shift(); } else break; } } // --- VISUALIZATION / DRAWING ROUTINES --- function drawWaveform(dataArray, canvas, strokeColor) { const ctx = canvas.getContext('2d'); const width = canvas.width; const height = canvas.height; // Dark transparent redraw for trace/motion-blur effect ctx.fillStyle = 'rgba(11, 12, 19, 0.4)'; ctx.fillRect(0, 0, width, height); ctx.lineWidth = 2 * window.devicePixelRatio; ctx.strokeStyle = strokeColor; ctx.beginPath(); const sliceWidth = width / dataArray.length; let x = 0; for (let i = 0; i < dataArray.length; i++) { // Center the wave around half-height and scale scale amplitude const v = dataArray[i] * 1.5; const y = (v * (height / 2)) + (height / 2); if (i === 0) { ctx.moveTo(x, y); } else { ctx.lineTo(x, y); } x += sliceWidth; } ctx.lineTo(width, height / 2); ctx.stroke(); // Draw a subtle baseline center glowing path ctx.strokeStyle = 'rgba(255, 255, 255, 0.05)'; ctx.lineWidth = 1; ctx.beginPath(); ctx.moveTo(0, height / 2); ctx.lineTo(width, height / 2); ctx.stroke(); } function clearCanvas(canvas) { const ctx = canvas.getContext('2d'); ctx.fillStyle = '#0b0c13'; ctx.fillRect(0, 0, canvas.width, canvas.height); } // Apply initial UI layout on startup applyAudioRoutingUI();