feat(voice): 实现语音输入功能

- 添加 WebSocket 通用工具类 - 集成 recorder-core 库实现音频录制 - 添加音频波形显示和实时文本显示功能 - 实现语音数据实时发送到 WebSocket 服务器 - 优化语音输入界面和交互逻辑
2025-04-27 11:51:47 +08:00 · 2025-04-27 11:51:47 +08:00 · 15514759e2
parent 6a20e26e5d
commit 15514759e2
4 changed files with 395 additions and 107 deletions
--- a/package.json
+++ b/package.json
@ -42,10 +42,13 @@
    "echarts": "^5.6.0",
    "element-plus": "^2.9.7",
    "marked": "^15.0.8",
    "pcm": "^1.0.3",
    "pinia": "^3.0.2",
    "pinia-plugin-persistedstate": "^4.2.0",
    "recorder-core": "^1.3.25011100",
    "vue": "^3.5.13",
-    "vue-router": "^4.5.0"
+    "vue-router": "^4.5.0",
    "wav": "^1.0.2"
  },
  "devDependencies": {
    "@types/node": "^20.11.19",
--- a/src/utils/websocket.js
+++ b/src/utils/websocket.js
@ -0,0 +1,128 @@
 /**
 * WebSocket 通用工具类
 * @author Lexcubia
 * @date 2024-04-25
 */
 class WebSocketClient {
  constructor(config) {
    this.socket = null;
    this.msgHandle = config.msgHandle;
    this.stateHandle = config.stateHandle;
    this.config = config;
  }
  /**
   * 建立 WebSocket 连接
   * @param {string} url - WebSocket 服务器地址
   * @returns {boolean} - 连接是否成功
   */
  connect(url) {
    if (!url) {
      console.error('WebSocket URL 不能为空');
      return false;
    }
    if (!url.match(/wss?:\S*/)) {
      console.error('WebSocket URL 格式不正确');
      return false;
    }
    if (!('WebSocket' in window)) {
      console.error('当前浏览器不支持 WebSocket');
      return false;
    }
    try {
      this.socket = new WebSocket(url);
      this.socket.onopen = this._onOpen.bind(this);
      this.socket.onclose = this._onClose.bind(this);
      this.socket.onmessage = this._onMessage.bind(this);
      this.socket.onerror = this._onError.bind(this);
      return true;
    } catch (error) {
      console.error('WebSocket 连接失败:', error);
      return false;
    }
  }
  /**
   * 关闭 WebSocket 连接
   */
  disconnect() {
    if (this.socket) {
      this.socket.close();
      this.socket = null;
    }
  }
  /**
   * 发送数据
   * @param {any} data - 要发送的数据
   */
  send(data) {
    if (!this.socket || this.socket.readyState !== WebSocket.OPEN) {
      console.error('WebSocket 未连接');
      return;
    }
    try {
      if (typeof data === 'object') {
        data = JSON.stringify(data);
      }
      this.socket.send(data);
    } catch (error) {
      console.error('发送数据失败:', error);
    }
  }
  /**
   * 连接建立时的回调
   * @private
   */
  _onOpen() {
    console.log('WebSocket 连接成功');
    this.stateHandle(0);
    // 发送初始化配置
    const initConfig = {
      chunk_size: [5, 10, 5],
      wav_name: 'h5',
      is_speaking: true,
      chunk_interval: 10,
      itn: this.config.itn || false,
      mode: this.config.mode || '2pass',
      ...this.config.initConfig
    };
    this.send(initConfig);
  }
  /**
   * 连接关闭时的回调
   * @private
   */
  _onClose() {
    console.log('WebSocket 连接关闭');
    this.stateHandle(1);
  }
  /**
   * 收到消息时的回调
   * @private
   */
  _onMessage(event) {
    this.msgHandle(event);
  }
  /**
   * 发生错误时的回调
   * @private
   */
  _onError(error) {
    console.error('WebSocket 错误:', error);
    this.stateHandle(2);
  }
 }
 export default WebSocketClient; 
--- a/src/views/chat/components/VoiceInput.vue
+++ b/src/views/chat/components/VoiceInput.vue
@ -15,8 +15,16 @@
      <!-- Voice Input Modal -->
      <div class="voice-input-modal-overlay" v-if="showVoiceModal">
        <div class="voice-bubble">
-          <!-- TODO: Add voice visualizer here -->
+          <!-- Voice Visualizer -->
-          <div class="voice-wave-placeholder">语音波形区域</div>
+          <AudioWaveform :pcmData="pcmData" />
          <!-- Timer Display -->
          <div class="timer" :class="{ 'warning': remainingTime <= 5 }">
            {{ formatTime(remainingTime) }}
          </div>
          <!-- Real-time Text Display -->
          <div class="real-time-text" v-if="realTimeText">
            {{ realTimeText }}
          </div>
        </div>
        <div class="voice-modal-controls">
          <div class="modal-control-item">
@ -36,12 +44,12 @@
              >
              </el-button>
          </div>
-          <div class="modal-control-item">
+          <!-- <div class="modal-control-item">
            <span class="modal-btn-label">转文字</span>
            <el-button class="modal-btn text-send-btn" circle @click="sendAsText">
               <el-icon><ChatDotSquare /></el-icon>
            </el-button>
-          </div>
+          </div> -->
        </div>
        <div class="voice-modal-tip">{{ isRecording ? '正在录音...' : '点击麦克风开始录音' }}</div>
      </div>
@ -61,6 +69,11 @@
 import { ref, onMounted, onUnmounted } from 'vue'
 import { ElButton, ElMessage, ElIcon } from 'element-plus'
 import { Microphone, ChatDotSquare, Close } from '@element-plus/icons-vue'
 import Recorder from 'recorder-core'
 import 'recorder-core/src/engine/pcm'
 import 'recorder-core/src/engine/wav'
 import WebSocketClient from '@/utils/websocket'
 import AudioWaveform from '@/components/AudioWaveform.vue'
 // Props definition
 const props = defineProps({
@ -71,137 +84,224 @@ const props = defineProps({
 })
 // Emits definition
-const emit = defineEmits(['sendText'])
+const emit = defineEmits(['sendText', 'sendVoice'])
 // Refs related to voice input
 const isRecording = ref(false)
 const isVoiceToTextConverting = ref(false)
 const showVoiceModal = ref(false)
-let mediaRecorder = null
+const realTimeText = ref('')
 const remainingTime = ref(30)
 const pcmData = ref([])
 let recorder = null
 let websocketClient = null
 let timer = null
 let audioChunks = []
-// --- Voice Input Logic ---
+// WebSocket configuration
-
+const wsConfig = {
-const openVoiceModal = () => {
+  url: 'wss://160.202.224.52:10096/',
-  if (props.isDisabled) return;
+  initConfig: {
-  showVoiceModal.value = true;
+    mode: '2pass',
-  // Reset state in case modal was closed unexpectedly before
+    wav_format: 'pcm',
-  isRecording.value = false; 
+    chunk_size: [5, 10, 5],
-  audioChunks = [];
+    itn: true
-}
+  },
-
+  msgHandle: (event) => {
-const closeVoiceModal = () => {
+    try {
-  showVoiceModal.value = false;
+      const data = JSON.parse(event.data)
-  if (isRecording.value) {
+      if (data.text) {
-    stopRecording(); // Stop recording if modal is closed while recording
+        realTimeText.value = data.text
-    isRecording.value = false;
+      }
    } catch (error) {
      console.error('WebSocket message parse error:', error)
    }
  },
  stateHandle: (state) => {
    console.log('WebSocket state:', state)
  }
 }
 // --- Voice Input Logic ---
 const openVoiceModal = async () => {
  if (props.isDisabled) return
  showVoiceModal.value = true
  isRecording.value = false
  realTimeText.value = ''
  remainingTime.value = 30
  audioChunks = []
  // 自动开始录音
  try {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
    startRecording(stream)
    isRecording.value = true
    startTimer()
    connectWebSocket()
  } catch (err) {
    ElMessage.error('无法访问麦克风，请检查权限设置')
    console.error('获取麦克风权限失败:', err)
    closeVoiceModal()
  }
 }
 const closeVoiceModal = () => {
  showVoiceModal.value = false
  if (isRecording.value) {
    stopRecording()
    isRecording.value = false
  }
  clearInterval(timer)
  // 释放麦克风资源
  if (recorder) {
    recorder.close(() => {
      // 确保所有音频轨道都被停止
      if (recorder.stream) {
        recorder.stream.getTracks().forEach(track => track.stop())
      }
      recorder = null
    })
  }
  // 关闭WebSocket连接
  if (websocketClient) {
    websocketClient.disconnect()
    websocketClient = null
  }
  // 清理音频数据
  audioChunks = []
  realTimeText.value = ''
  remainingTime.value = 30
 }
 const cancelVoiceInputFromModal = () => {
-  closeVoiceModal();
+  closeVoiceModal()
 }
 const confirmVoiceInput = async () => {
-  if (!isRecording.value) {
+  if (isRecording.value) {
-    // Start recording
+    stopRecording()
-    try {
+    isRecording.value = false
-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    clearInterval(timer)
      startRecording(stream);
      isRecording.value = true;
    } catch (err) {
      ElMessage.error('无法访问麦克风，请检查权限设置');
      console.error('获取麦克风权限失败:', err);
      closeVoiceModal(); // Close modal on error
    }
  } else {
    // Stop recording and process
    stopRecording();
    isRecording.value = false;
    if (audioChunks.length === 0) {
-        ElMessage.warning('录音时间太短');
+      ElMessage.warning('录音时间太短')
-        closeVoiceModal();
+      closeVoiceModal()
-        return;
+      return
    }
-    isVoiceToTextConverting.value = true; // Show converting overlay
+    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' })
-    closeVoiceModal(); // Close the modal after stopping
+    emit('sendVoice', audioBlob)
-    
+    closeVoiceModal()
    // Simulate conversion & emit result
    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
    console.log('录音完成，模拟转换，音频大小:', audioBlob.size); 
    setTimeout(() => {
        const simulatedText = "模拟语音识别结果" + Date.now(); // Example text
        emit('sendText', simulatedText);
        isVoiceToTextConverting.value = false;
        audioChunks = []; // Clear chunks after processing
    }, 1500);
  }
 }
 const sendAsText = () => {
  // Currently, just acts like confirm/stop
  if (isRecording.value) {
-    confirmVoiceInput(); 
+    stopRecording()
    isRecording.value = false
    clearInterval(timer)
    if (realTimeText.value) {
      emit('sendText', realTimeText.value)
    } else {
      ElMessage.warning('没有识别到文字')
    }
    closeVoiceModal()
  } else {
-    // If not recording, maybe do nothing or just close?
+    closeVoiceModal()
    closeVoiceModal();
  }
 }
 const startRecording = (stream) => {
-  audioChunks = []; // Clear previous chunks
+  audioChunks = []
-  mediaRecorder = new MediaRecorder(stream);
+  recorder = new Recorder({
-  
+    type: 'pcm', // 音频格式
-  mediaRecorder.ondataavailable = (event) => {
+    sampleRate: 16000, // 采样率
-    if (event.data.size > 0) {
+    bitRate: 16, // 比特率
-        audioChunks.push(event.data);
+    onProcess: (buffers, powerLevel, duration, sampleRate) => {
      const pcm = buffers[buffers.length - 1]
      if (pcm && pcm.length > 0) {
        pcmData.value = Array.from(pcm)
      }
      // WebSocket 发送逻辑保留
      if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
        const pcmDataSend = buffers[buffers.length - 1]
        websocketClient.send({
          is_speaking: true,
          audio_data: pcmDataSend
        })
      }
    }
-  };
+  })
  mediaRecorder.onstop = () => {
    // Stop associated media stream tracks
    mediaRecorder.stream.getTracks().forEach(track => track.stop());
    // Note: Actual processing is now triggered in confirmVoiceInput after calling stopRecording
  };
-  mediaRecorder.onerror = (event) => {
+  recorder.open(() => {
-      console.error('MediaRecorder error:', event.error);
+    recorder.start()
-      ElMessage.error('录音出错');
+  }, (msg, isUserNotAllow) => {
-      isRecording.value = false; // Reset state on error
+    ElMessage.error('录音失败: ' + msg)
-      closeVoiceModal();
+    closeVoiceModal()
-  };
+  })
  mediaRecorder.start();
 }
 const stopRecording = () => {
-  if (mediaRecorder && mediaRecorder.state === 'recording') {
+  if (recorder) {
-    try {
+    recorder.stop((blob, duration) => {
-        mediaRecorder.stop();
+      // Convert PCM to WAV before saving
-    } catch (error) {
+      Recorder.pcm2wav({
-        console.error("Error stopping MediaRecorder:", error);
+        blob: blob,
-        // Attempt to stop tracks directly as a fallback
+        sampleRate: 16000,
-        if (mediaRecorder.stream) {
+        bitRate: 16
-            mediaRecorder.stream.getTracks().forEach(track => track.stop());
+      }, (wavBlob) => {
        audioChunks.push(wavBlob)
        // Send end signal to WebSocket
        if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
          websocketClient.send({
            is_speaking: false
          })
        }
-    }
+      }, (msg) => {
        console.error('PCM转WAV失败:', msg)
      })
    }, (msg) => {
      console.error('录音停止失败:', msg)
    })
  }
-   // Reset recorder instance
+}
-  mediaRecorder = null; 
+
 const startTimer = () => {
  timer = setInterval(() => {
    remainingTime.value--
    if (remainingTime.value <= 0) {
      stopRecording()
      isRecording.value = false
      clearInterval(timer)
    }
  }, 1000)
 }
 const formatTime = (seconds) => {
  const mins = Math.floor(seconds / 60)
  const secs = seconds % 60
  return `${mins}:${secs.toString().padStart(2, '0')}`
 }
 const connectWebSocket = () => {
  websocketClient = new WebSocketClient(wsConfig)
  websocketClient.connect(wsConfig.url)
 }
 // --- Lifecycle hooks ---
 onMounted(() => {
  if (!('MediaRecorder' in window) || !navigator.mediaDevices) {
-    console.warn('浏览器不支持语音录制功能');
+    console.warn('浏览器不支持语音录制功能')
    // Optionally disable voice functionality entirely
  }
 })
 onUnmounted(() => {
-  stopRecording(); // Ensure recording is stopped on component unmount
+  closeVoiceModal()
 })
 </script>
@ -247,8 +347,40 @@ onUnmounted(() => {
  text-align: center;
  position: relative; 
-  .voice-wave-placeholder {
+  .voice-wave {
    width: 100%;
    height: 48px;
    display: flex;
    align-items: center;
    justify-content: center;
    margin: 10px 0;
    background: transparent;
  }
  .voice-wave canvas {
    width: 240px;
    height: 48px;
    display: block;
    background: transparent;
  }
  .timer {
    font-size: 14px;
    color: white;
    margin: 5px 0;
    &.warning {
      color: #ff4d4f;
      animation: blink 1s infinite;
    }
  }
  .real-time-text {
    font-size: 14px;
    color: white;
    margin: 10px 0;
    min-height: 20px;
    word-break: break-all;
  }
  &::after {
@ -395,4 +527,29 @@ html.dark .voice-to-text-content {
 html.dark .converting-text {
  color: #eee;
 }
 .timer {
  font-size: 14px;
  color: white;
  margin: 5px 0;
  &.warning {
    color: #ff4d4f;
    animation: blink 1s infinite;
  }
 }
 .real-time-text {
  font-size: 14px;
  color: white;
  margin: 10px 0;
  min-height: 20px;
  word-break: break-all;
 }
@keyframes blink {
  0% { opacity: 1; }
  50% { opacity: 0.5; }
  100% { opacity: 1; }
 }
 </style> 
--- a/vite.config.js
+++ b/vite.config.js
@ -58,21 +58,21 @@ export default defineConfig(({ command, mode }) => {
          rewrite: (path) => path.replace(/^\/api/, ''),
          secure: false,
          configure: (proxy, options) => {
-            proxy.on('proxyReq', (proxyReq, req, res) => {
+            // proxy.on('proxyReq', (proxyReq, req, res) => {
-              const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
+            //   const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
-              proxyReq.setHeader('Origin', targetOrigin);
+            //   proxyReq.setHeader('Origin', targetOrigin);
-              if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
+            //   if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
-                proxyReq.setHeader('Accept', 'text/event-stream');
+            //     proxyReq.setHeader('Accept', 'text/event-stream');
-              }
+            //   }
-            });
+            // });
-            proxy.on('proxyRes', (proxyRes, req, res) => {
+            // proxy.on('proxyRes', (proxyRes, req, res) => {
-              if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
+            //   if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
-                proxyRes.headers['content-type'] = 'text/event-stream';
+            //     proxyRes.headers['content-type'] = 'text/event-stream';
-                proxyRes.headers['Cache-Control'] = 'no-cache';
+            //     proxyRes.headers['Cache-Control'] = 'no-cache';
-                proxyRes.headers['Connection'] = 'keep-alive';
+            //     proxyRes.headers['Connection'] = 'keep-alive';
-              }
+            //   }
-            });
+            // });
          }
        }
      }