From 99b06b574b4e18312bc09125054699c811face9b Mon Sep 17 00:00:00 2001 From: neru Date: Mon, 9 Feb 2026 04:49:59 -0300 Subject: [PATCH] feat: refactor everything, handle websocket close --- src/modules/tts-modes/azure.ts | 219 ++++++++++++++++++--------------- 1 file changed, 120 insertions(+), 99 deletions(-) diff --git a/src/modules/tts-modes/azure.ts b/src/modules/tts-modes/azure.ts index 95907bb..60fb008 100644 --- a/src/modules/tts-modes/azure.ts +++ b/src/modules/tts-modes/azure.ts @@ -19,6 +19,15 @@ const SEC_VERSION = `1-${CHROME_VERSION}`; const USER_AGENT = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROME_VERSION.split('.')[0]}.0.0.0 Safari/537.36 Edg/${CHROME_VERSION.split('.')[0]}.0.0.0`; const WIN_EPOCH = 11644473600; +const WS_RECONNECT_DELAY = 2000; +const MAX_RECONNECT_ATTEMPTS = 5; + +interface PendingRequest { + resolve: (value: TTSResponse) => void; + reject: (reason: Error) => void; + audioBuff: Buffer[]; +} + class AzureTTS implements TTSModule { private voices: Array | undefined = undefined; @@ -26,61 +35,20 @@ class AzureTTS implements TTSModule { public defaultVoice: string = 'en-US-AvaNeural'; private ready: boolean = false; - private readyPromise: Promise; + private readyPromise: Promise | null = null; + private readyResolve: (() => void) | null = null; private ws: WebSocket | undefined = undefined; + private reconnectAttempts: number = 0; + private reconnectTimer: NodeJS.Timeout | null = null; + private isReconnecting: boolean = false; private log: Logger; - + // Map keyed by X-RequestId + private pendingRequests: Map = new Map(); constructor() { this.log = new Logger('Azure TTS'); - - this.readyPromise = new Promise((resolve, reject) => { - this.ws = new WebSocket(`${WEBSOCKET_URL}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`, { - host: 'speech.platform.bing.com', - origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold', - headers: { - 'Pragma': 'no-cache', - 'Cache-Control': 'no-cache', - 'User-Agent': USER_AGENT, - 'Accept-Encoding': 'gzip, deflate, br, zstd', - 'Accept-Language': 'en-US,en;q=0.9' - } - }); - - this.ws.on('open', () => { - this.log.verbose('WebSocket open'); - - const config = `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n - { - "context": { - "synthesis": { - "audio": { - "metadataoptions": { - "sentenceBoundaryEnabled": "false", - "wordBoundaryEnabled": "true" - }, - "outputFormat": "audio-24khz-48kbitrate-mono-mp3" - } - } - } - }`; - - this.ws?.send(config.trim()); - this.ready = true; - resolve(); - }); - - this.ws.on('error', (err) => { - this.log.error('WebSocket error:', err); - reject(err); - }); - - this.ws.on('close', (code: number, reason: Buffer) => { - this.log.verbose('WebSocket closed (%d, %s)', code, reason.toString()); - this.ready = false; - }); - }); + this.initializeConnection(); } async getVoices(): Promise | undefined> { @@ -129,63 +97,20 @@ class AzureTTS implements TTSModule { async generate(voice: string, text: string): Promise { await this.readyPromise; - if (!this.ready || !this.ws || this.ws.readyState !== WebSocket.OPEN) - return { error: 'Not initialized' }; + if (!this.ready || !this.ws) return { error: 'Not initialized' }; + + const reqId = randomBytes(16).toString('hex'); + const lang = voice.split('-').slice(0, 2).join('-'); return new Promise((resolve, reject) => { - const audioBuff: Buffer[] = []; - - const msgHandler = async (data: Buffer, isBinary: boolean) => { - this.log.verbose('msg %s', data.toString()); - if (isBinary) { - const separator = 'Path:audio\r\n'; - let index = data.indexOf(separator) + separator.length; - let audioData = data.subarray(index); - audioBuff.push(audioData); - } else { - let message = data.toString(); - - if (message.includes('Path:turn.end')) { - this.ws?.off('message', msgHandler); - if (audioBuff.length > 0) - resolve({ data: Buffer.concat(audioBuff) }); - else { - this.log.error("Generation error (Azure returned no data)"); - reject(new Error('No audio data received from Azure')); - } - } else if (message.includes('Path:error') || message.includes('Path:turn.error')) { - this.log.error('Generation error %s', message); - reject(new Error('Generation error (Azure returned error)')); - } - } - } - - this.ws?.on('message', msgHandler); - - let reqId = randomBytes(16).toString('hex') - const lang = voice.split('-').slice(0, 2).join('-'); + this.pendingRequests.set(reqId, { resolve, reject, audioBuff: [] }); const headers = `X-RequestId:${reqId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n`; - const ssml = `${this.escapeXml(text)}`; + const ssml = `${this.escapeXml(text)}`; - console.log('=== DEBUG SSML ==='); - console.log('Headers length:', headers.length); - console.log('SSML length:', ssml.length); - console.log('Full message (escaped):'); - console.log(JSON.stringify(headers + ssml).substring(0, 500)); - console.log('SSML content:'); - console.log(ssml); - console.log('=== END DEBUG ==='); - - // Also log the escaped text - console.log('Escaped text:', JSON.stringify(this.escapeXml(text))); - - - this.log.verbose('WS Generation send'); this.ws?.send(headers + ssml, (err) => { if (err) { - this.ws?.off('message', msgHandler); - this.log.error('ws error'); + this.pendingRequests.delete(reqId); reject(err); } }); @@ -196,6 +121,102 @@ class AzureTTS implements TTSModule { return true; } + private initializeConnection(): void { + this.ready = false; + this.readyPromise = new Promise((resolve) => { + this.readyResolve = resolve; + this.connect(); + }); + } + + private connect(): void { + const url = `${WEBSOCKET_URL}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`; + + this.ws = new WebSocket(url, { + host: 'speech.platform.bing.com', + origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold', + headers: { + 'Pragma': 'no-cache', + 'User-Agent': USER_AGENT, + } + }); + + this.ws.on('open', () => { + this.log.verbose('WebSocket open'); + this.reconnectAttempts = 0; + this.isReconnecting = false; + + const config = `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n + { + "context": { + "synthesis": { + "audio": { + "metadataoptions": { "sentenceBoundaryEnabled": "false", "wordBoundaryEnabled": "true" }, + "outputFormat": "audio-24khz-48kbitrate-mono-mp3" + } + } + } + }`; + this.ws?.send(config.trim()); + this.ready = true; + this.readyResolve?.(); + }); + + this.ws.on('message', (data: Buffer, isBinary: boolean) => { + this.handleIncomingMessage(data, isBinary); + }); + + this.ws.on('close', (code/*, reason*/) => { + this.ready = false; + this.log.verbose(`WS Closed: ${code}`); + this.rejectAllPending(new Error("Connection closed")); + this.scheduleReconnect(); + }); + + this.ws.on('error', (err) => { + this.log.error('WS Error:', err); + }); + } + + private scheduleReconnect() { + if (this.reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) return; + + const delay = WS_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts++); + setTimeout(() => this.connect(), delay); + } + + private handleIncomingMessage(data: Buffer, isBinary: boolean) { + const message = data.toString(); + const reqId = message.match(/X-RequestId:(.*?)\r\n/)?.[1]; + if (!reqId) return; + + const request = this.pendingRequests.get(reqId); + if (!request) return; + + if (isBinary) { + const separator = 'Path:audio\r\n'; + const index = data.indexOf(separator); + if (index !== -1) { + request.audioBuff.push(data.subarray(index + separator.length)); + } + } else { + if (message.includes('Path:turn.end')) { + request.resolve({ data: Buffer.concat(request.audioBuff) }); + this.pendingRequests.delete(reqId); + } else if (message.includes('Path:turn.error') || message.includes('Path:error')) { + request.reject(new Error("Azure synthesis error")); + this.pendingRequests.delete(reqId); + } + } + } + + private rejectAllPending(err: Error) { + for (const [id, req] of this.pendingRequests) { + req.reject(err); + this.pendingRequests.delete(id); + } + } + private genSecToken(): string { const ticks = BigInt(Math.floor((Date.now() / 1000) + Number(WIN_EPOCH))) * 10000000n const roundedTicks = ticks - (ticks % 3000000000n)