feat: refactor everything, handle websocket close

This commit is contained in:
2026-02-09 04:49:59 -03:00
parent 7c3a5f6b56
commit 99b06b574b
+120 -99
View File
@@ -19,6 +19,15 @@ const SEC_VERSION = `1-${CHROME_VERSION}`;
const USER_AGENT = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROME_VERSION.split('.')[0]}.0.0.0 Safari/537.36 Edg/${CHROME_VERSION.split('.')[0]}.0.0.0`; const USER_AGENT = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROME_VERSION.split('.')[0]}.0.0.0 Safari/537.36 Edg/${CHROME_VERSION.split('.')[0]}.0.0.0`;
const WIN_EPOCH = 11644473600; const WIN_EPOCH = 11644473600;
const WS_RECONNECT_DELAY = 2000;
const MAX_RECONNECT_ATTEMPTS = 5;
interface PendingRequest {
resolve: (value: TTSResponse) => void;
reject: (reason: Error) => void;
audioBuff: Buffer[];
}
class AzureTTS implements TTSModule { class AzureTTS implements TTSModule {
private voices: Array<string> | undefined = undefined; private voices: Array<string> | undefined = undefined;
@@ -26,61 +35,20 @@ class AzureTTS implements TTSModule {
public defaultVoice: string = 'en-US-AvaNeural'; public defaultVoice: string = 'en-US-AvaNeural';
private ready: boolean = false; private ready: boolean = false;
private readyPromise: Promise<void>; private readyPromise: Promise<void> | null = null;
private readyResolve: (() => void) | null = null;
private ws: WebSocket | undefined = undefined; private ws: WebSocket | undefined = undefined;
private reconnectAttempts: number = 0;
private reconnectTimer: NodeJS.Timeout | null = null;
private isReconnecting: boolean = false;
private log: Logger; private log: Logger;
// Map keyed by X-RequestId
private pendingRequests: Map<string, PendingRequest> = new Map();
constructor() { constructor() {
this.log = new Logger('Azure TTS'); this.log = new Logger('Azure TTS');
this.initializeConnection();
this.readyPromise = new Promise((resolve, reject) => {
this.ws = new WebSocket(`${WEBSOCKET_URL}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`, {
host: 'speech.platform.bing.com',
origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
headers: {
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'User-Agent': USER_AGENT,
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US,en;q=0.9'
}
});
this.ws.on('open', () => {
this.log.verbose('WebSocket open');
const config = `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
{
"context": {
"synthesis": {
"audio": {
"metadataoptions": {
"sentenceBoundaryEnabled": "false",
"wordBoundaryEnabled": "true"
},
"outputFormat": "audio-24khz-48kbitrate-mono-mp3"
}
}
}
}`;
this.ws?.send(config.trim());
this.ready = true;
resolve();
});
this.ws.on('error', (err) => {
this.log.error('WebSocket error:', err);
reject(err);
});
this.ws.on('close', (code: number, reason: Buffer) => {
this.log.verbose('WebSocket closed (%d, %s)', code, reason.toString());
this.ready = false;
});
});
} }
async getVoices(): Promise<Array<string> | undefined> { async getVoices(): Promise<Array<string> | undefined> {
@@ -129,63 +97,20 @@ class AzureTTS implements TTSModule {
async generate(voice: string, text: string): Promise<TTSResponse> { async generate(voice: string, text: string): Promise<TTSResponse> {
await this.readyPromise; await this.readyPromise;
if (!this.ready || !this.ws || this.ws.readyState !== WebSocket.OPEN) if (!this.ready || !this.ws) return { error: 'Not initialized' };
return { error: 'Not initialized' };
return new Promise((resolve, reject) => { const reqId = randomBytes(16).toString('hex');
const audioBuff: Buffer[] = [];
const msgHandler = async (data: Buffer, isBinary: boolean) => {
this.log.verbose('msg %s', data.toString());
if (isBinary) {
const separator = 'Path:audio\r\n';
let index = data.indexOf(separator) + separator.length;
let audioData = data.subarray(index);
audioBuff.push(audioData);
} else {
let message = data.toString();
if (message.includes('Path:turn.end')) {
this.ws?.off('message', msgHandler);
if (audioBuff.length > 0)
resolve({ data: Buffer.concat(audioBuff) });
else {
this.log.error("Generation error (Azure returned no data)");
reject(new Error('No audio data received from Azure'));
}
} else if (message.includes('Path:error') || message.includes('Path:turn.error')) {
this.log.error('Generation error %s', message);
reject(new Error('Generation error (Azure returned error)'));
}
}
}
this.ws?.on('message', msgHandler);
let reqId = randomBytes(16).toString('hex')
const lang = voice.split('-').slice(0, 2).join('-'); const lang = voice.split('-').slice(0, 2).join('-');
return new Promise((resolve, reject) => {
this.pendingRequests.set(reqId, { resolve, reject, audioBuff: [] });
const headers = `X-RequestId:${reqId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n`; const headers = `X-RequestId:${reqId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n`;
const ssml = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${lang}"><voice name="${voice}"><prosody rate="default" pitch="default" volume="default">${this.escapeXml(text)}</prosody></voice></speak>`; const ssml = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="${lang}"><voice name="${voice}"><prosody rate="default" pitch="default">${this.escapeXml(text)}</prosody></voice></speak>`;
console.log('=== DEBUG SSML ===');
console.log('Headers length:', headers.length);
console.log('SSML length:', ssml.length);
console.log('Full message (escaped):');
console.log(JSON.stringify(headers + ssml).substring(0, 500));
console.log('SSML content:');
console.log(ssml);
console.log('=== END DEBUG ===');
// Also log the escaped text
console.log('Escaped text:', JSON.stringify(this.escapeXml(text)));
this.log.verbose('WS Generation send');
this.ws?.send(headers + ssml, (err) => { this.ws?.send(headers + ssml, (err) => {
if (err) { if (err) {
this.ws?.off('message', msgHandler); this.pendingRequests.delete(reqId);
this.log.error('ws error');
reject(err); reject(err);
} }
}); });
@@ -196,6 +121,102 @@ class AzureTTS implements TTSModule {
return true; return true;
} }
private initializeConnection(): void {
this.ready = false;
this.readyPromise = new Promise((resolve) => {
this.readyResolve = resolve;
this.connect();
});
}
private connect(): void {
const url = `${WEBSOCKET_URL}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`;
this.ws = new WebSocket(url, {
host: 'speech.platform.bing.com',
origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
headers: {
'Pragma': 'no-cache',
'User-Agent': USER_AGENT,
}
});
this.ws.on('open', () => {
this.log.verbose('WebSocket open');
this.reconnectAttempts = 0;
this.isReconnecting = false;
const config = `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
{
"context": {
"synthesis": {
"audio": {
"metadataoptions": { "sentenceBoundaryEnabled": "false", "wordBoundaryEnabled": "true" },
"outputFormat": "audio-24khz-48kbitrate-mono-mp3"
}
}
}
}`;
this.ws?.send(config.trim());
this.ready = true;
this.readyResolve?.();
});
this.ws.on('message', (data: Buffer, isBinary: boolean) => {
this.handleIncomingMessage(data, isBinary);
});
this.ws.on('close', (code/*, reason*/) => {
this.ready = false;
this.log.verbose(`WS Closed: ${code}`);
this.rejectAllPending(new Error("Connection closed"));
this.scheduleReconnect();
});
this.ws.on('error', (err) => {
this.log.error('WS Error:', err);
});
}
private scheduleReconnect() {
if (this.reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) return;
const delay = WS_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts++);
setTimeout(() => this.connect(), delay);
}
private handleIncomingMessage(data: Buffer, isBinary: boolean) {
const message = data.toString();
const reqId = message.match(/X-RequestId:(.*?)\r\n/)?.[1];
if (!reqId) return;
const request = this.pendingRequests.get(reqId);
if (!request) return;
if (isBinary) {
const separator = 'Path:audio\r\n';
const index = data.indexOf(separator);
if (index !== -1) {
request.audioBuff.push(data.subarray(index + separator.length));
}
} else {
if (message.includes('Path:turn.end')) {
request.resolve({ data: Buffer.concat(request.audioBuff) });
this.pendingRequests.delete(reqId);
} else if (message.includes('Path:turn.error') || message.includes('Path:error')) {
request.reject(new Error("Azure synthesis error"));
this.pendingRequests.delete(reqId);
}
}
}
private rejectAllPending(err: Error) {
for (const [id, req] of this.pendingRequests) {
req.reject(err);
this.pendingRequests.delete(id);
}
}
private genSecToken(): string { private genSecToken(): string {
const ticks = BigInt(Math.floor((Date.now() / 1000) + Number(WIN_EPOCH))) * 10000000n const ticks = BigInt(Math.floor((Date.now() / 1000) + Number(WIN_EPOCH))) * 10000000n
const roundedTicks = ticks - (ticks % 3000000000n) const roundedTicks = ticks - (ticks % 3000000000n)