wx/src/views/chat/components/VoiceInput.vue

560 lines
12 KiB
Vue

<template>
<div class="voice-input-container">
<!-- Voice Mode Trigger Button -->
<el-button
class="voice-trigger-btn"
@click="openVoiceModal"
v-if="!showVoiceModal"
:disabled="isDisabled"
>
点击 说话
</el-button>
<!-- Teleport the modal and overlay to the body -->
<Teleport to="body">
<!-- Voice Input Modal -->
<div class="voice-input-modal-overlay" v-if="showVoiceModal">
<div class="voice-bubble">
<!-- Voice Visualizer -->
<AudioWaveform :pcmData="pcmData" />
<!-- Timer Display -->
<div class="timer" :class="{ 'warning': remainingTime <= 5 }">
{{ formatTime(remainingTime) }}
</div>
<!-- Real-time Text Display -->
<div class="real-time-text" v-if="realTimeText">
{{ realTimeText }}
</div>
</div>
<div class="voice-modal-controls">
<div class="modal-control-item">
<span class="modal-btn-label">取消</span>
<el-button class="modal-btn cancel-btn" circle @click="cancelVoiceInputFromModal">
<el-icon><Close /></el-icon>
</el-button>
</div>
<div class="modal-control-item">
<span class="modal-btn-label record-label">{{ isRecording ? '完成' : ' ' }}</span>
<el-button
class="modal-btn confirm-btn"
type="primary"
circle
@click="confirmVoiceInput"
:icon="isRecording ? Check : Microphone"
>
</el-button>
</div>
<!-- <div class="modal-control-item">
<span class="modal-btn-label">转文字</span>
<el-button class="modal-btn text-send-btn" circle @click="sendAsText">
<el-icon><ChatDotSquare /></el-icon>
</el-button>
</div> -->
</div>
<div class="voice-modal-tip">{{ isRecording ? '正在录音...' : '点击麦克风开始录音' }}</div>
</div>
<!-- Voice-to-Text Conversion Overlay -->
<div class="voice-to-text-overlay" v-if="isVoiceToTextConverting">
<div class="voice-to-text-content">
<div class="converting-indicator"></div>
<div class="converting-text">转文字发送</div>
</div>
</div>
</Teleport>
</div>
</template>
<script setup>
import { ref, onMounted, onUnmounted } from 'vue'
import { ElButton, ElMessage, ElIcon } from 'element-plus'
import { Microphone, Check, Close } from '@element-plus/icons-vue'
import Recorder from 'recorder-core'
import 'recorder-core/src/engine/pcm'
import 'recorder-core/src/engine/wav'
import WebSocketClient from '@/utils/websocket'
import AudioWaveform from '@/components/AudioWaveform.vue'
import { useSpeakStore } from '@/store/speak'
// Props definition
const props = defineProps({
isDisabled: {
type: Boolean,
default: false
}
})
// Emits definition
const emit = defineEmits(['sendText', 'fillInput'])
// 获取设置
const speakStore = useSpeakStore()
// Refs related to voice input
const isRecording = ref(false)
const isVoiceToTextConverting = ref(false)
const showVoiceModal = ref(false)
const realTimeText = ref('')
const remainingTime = ref(30)
const pcmData = ref([])
let recorder = null
let websocketClient = null
let timer = null
let audioChunks = []
let sampleBuf = new Int16Array(0)
const chunk_size = 960 // 16kHz下60ms一帧
// WebSocket configuration
const wsConfig = {
url: 'wss://fire.lexcubia.com:31003/ws',
initConfig: {
mode: '2pass',
wav_format: 'pcm',
chunk_size: [5, 10, 5],
itn: true
},
msgHandle: (event) => {
try {
const data = JSON.parse(event.data)
if (data.text) {
realTimeText.value += data.text
}
} catch (error) {
console.error('WebSocket message parse error:', error)
}
},
stateHandle: (state) => {
console.log('WebSocket state:', state)
}
}
// --- Voice Input Logic ---
const openVoiceModal = async () => {
if (props.isDisabled) return
showVoiceModal.value = true
isRecording.value = false
realTimeText.value = ''
remainingTime.value = 30
audioChunks = []
// 自动开始录音
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
startRecording(stream)
isRecording.value = true
startTimer()
connectWebSocket()
} catch (err) {
ElMessage.error('无法访问麦克风,请检查权限设置')
console.error('获取麦克风权限失败:', err)
closeVoiceModal()
}
}
const closeVoiceModal = () => {
// 1. 关闭WebSocket连接
if (websocketClient) {
websocketClient.disconnect()
websocketClient = null
}
// 2. 停止录音并释放资源
if (isRecording.value) {
stopRecording()
isRecording.value = false
}
if (recorder) {
recorder.close(() => {
if (recorder.stream) {
recorder.stream.getTracks().forEach(track => track.stop())
}
recorder = null
})
}
// 3. 清理定时器
if (timer) {
clearInterval(timer)
timer = null
}
// 4. 清理数据
audioChunks = []
realTimeText.value = ''
remainingTime.value = 30
sampleBuf = new Int16Array(0)
pcmData.value = []
// 5. 最后关闭弹窗
showVoiceModal.value = false
}
const cancelVoiceInputFromModal = () => {
closeVoiceModal()
}
const confirmVoiceInput = async () => {
if (isRecording.value) {
stopRecording()
isRecording.value = false
if (realTimeText.value) {
if (speakStore.sendDirectly) {
emit('sendText', realTimeText.value)
} else {
emit('fillInput', realTimeText.value)
}
} else {
ElMessage.warning('没有识别到文字')
}
}
closeVoiceModal()
}
const startRecording = (stream) => {
audioChunks = []
recorder = new Recorder({
type: 'pcm', // 音频格式
sampleRate: 16000, // 采样率
bitRate: 16, // 比特率
onProcess: (buffers, powerLevel, duration, sampleRate) => {
const pcm48k = buffers[buffers.length - 1]
if (pcm48k && pcm48k.length > 0) {
// 1. 重采样为16kHz
const array_48k = [pcm48k]
const data_16k = Recorder.SampleData(array_48k, sampleRate, 16000).data
// 2. 拼接到sampleBuf
const merged = new Int16Array(sampleBuf.length + data_16k.length)
merged.set(sampleBuf)
merged.set(data_16k, sampleBuf.length)
sampleBuf = merged
// 3. 分片发送
while(sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size)
sampleBuf = sampleBuf.slice(chunk_size)
if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
websocketClient.send(sendBuf.buffer)
}
}
// 4. 波形显示
pcmData.value = Array.from(data_16k)
}
}
})
recorder.open(() => {
recorder.start()
}, (msg, isUserNotAllow) => {
ElMessage.error('录音失败: ' + msg)
closeVoiceModal()
})
}
const stopRecording = () => {
if (recorder) {
recorder.stop((blob, duration) => {
if (duration < 500) { // 小于0.5秒
ElMessage.warning('录音时间太短')
return
}
Recorder.pcm2wav({
blob: blob,
sampleRate: 16000,
bitRate: 16
}, (wavBlob) => {
audioChunks.push(wavBlob)
// Send end signal to WebSocket
if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
websocketClient.send({
is_speaking: false
})
}
}, (msg) => {
console.error('PCM转WAV失败:', msg)
})
}, (msg) => {
console.error('录音停止失败:', msg)
})
}
}
const startTimer = () => {
timer = setInterval(() => {
remainingTime.value--
if (remainingTime.value <= 0) {
stopRecording()
isRecording.value = false
clearInterval(timer)
}
}, 1000)
}
const formatTime = (seconds) => {
const mins = Math.floor(seconds / 60)
const secs = seconds % 60
return `${mins}:${secs.toString().padStart(2, '0')}`
}
const connectWebSocket = () => {
websocketClient = new WebSocketClient(wsConfig)
websocketClient.connect(wsConfig.url)
}
// --- Lifecycle hooks ---
onMounted(() => {
if (!('MediaRecorder' in window) || !navigator.mediaDevices) {
console.warn('浏览器不支持语音录制功能')
}
})
onUnmounted(() => {
closeVoiceModal()
})
</script>
<style lang="scss" scoped>
.voice-input-container {
width: 100%;
}
.voice-trigger-btn {
display: flex;
align-items: center;
justify-content: center;
font-size: 14px;
min-height: 40px;
width: 100%;
height: 40px;
box-sizing: border-box;
}
// Styles for the Voice Input Modal
.voice-input-modal-overlay {
position: fixed;
bottom: 0;
left: 0;
right: 0;
top: 0;
background-color: rgba(0, 0, 0, 0.7);
z-index: 3000;
display: flex;
flex-direction: column;
justify-content: flex-end;
align-items: center;
padding-bottom: 30px;
}
.voice-bubble {
background-color: #4CAF50;
color: white;
padding: 15px 25px;
border-radius: 15px;
margin-bottom: 40px;
min-width: 150px;
text-align: center;
position: relative;
.voice-wave {
width: 100%;
height: 48px;
display: flex;
align-items: center;
justify-content: center;
margin: 10px 0;
background: transparent;
}
.voice-wave canvas {
width: 240px;
height: 48px;
display: block;
background: transparent;
}
.timer {
font-size: 14px;
color: white;
margin: 5px 0;
&.warning {
color: #ff4d4f;
animation: blink 1s infinite;
}
}
.real-time-text {
font-size: 14px;
color: white;
margin: 10px 0;
min-height: 20px;
word-break: break-all;
}
&::after {
content: '';
position: absolute;
bottom: -10px;
left: 50%;
transform: translateX(-50%);
width: 0;
height: 0;
border-left: 10px solid transparent;
border-right: 10px solid transparent;
border-top: 10px solid #4CAF50;
}
}
.voice-modal-controls {
display: flex;
justify-content: space-around;
align-items: flex-start;
width: 100%;
max-width: 350px;
margin-bottom: 15px;
}
.modal-control-item {
display: flex;
flex-direction: column;
align-items: center;
text-align: center;
}
.modal-btn-label {
font-size: 12px;
color: #b0b0b0;
margin-bottom: 8px;
min-height: 18px;
}
.record-label {
color: white;
}
.modal-btn {
width: 60px;
height: 60px;
font-size: 24px;
}
.confirm-btn .el-icon {
font-size: 28px;
}
.cancel-btn,
.text-send-btn {
background-color: rgba(255, 255, 255, 0.2);
border: none;
color: white;
&:hover {
background-color: rgba(255, 255, 255, 0.3);
}
}
.voice-modal-tip {
color: #a0a0a0;
font-size: 13px;
min-height: 20px;
}
// Styles for Voice-to-Text Conversion Overlay
.voice-to-text-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.5);
z-index: 2001; // Ensure it's above the voice modal if somehow both appear
display: flex;
align-items: center;
justify-content: center;
}
.voice-to-text-content {
background-color: white;
border-radius: 12px;
padding: 20px;
display: flex;
flex-direction: column;
align-items: center;
gap: 16px;
}
.converting-indicator {
width: 40px;
height: 40px;
border: 4px solid #f3f3f3;
border-top: 4px solid var(--el-color-primary);
border-radius: 50%;
animation: spin 1s linear infinite;
}
.converting-text {
font-size: 16px;
color: #333;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
html.dark .modal-btn-label {
color: #707070;
}
html.dark .record-label {
color: #d0d0d0;
}
html.dark .cancel-btn,
html.dark .text-send-btn {
background-color: rgba(0, 0, 0, 0.3);
color: #ccc;
&:hover {
background-color: rgba(0, 0, 0, 0.4);
}
}
html.dark .voice-modal-tip {
color: #707070;
}
html.dark .voice-to-text-content {
background-color: #333;
}
html.dark .converting-text {
color: #eee;
}
.timer {
font-size: 14px;
color: white;
margin: 5px 0;
&.warning {
color: #ff4d4f;
animation: blink 1s infinite;
}
}
.real-time-text {
font-size: 14px;
color: white;
margin: 10px 0;
min-height: 20px;
word-break: break-all;
}
@keyframes blink {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
</style>