import { createHash, randomBytes } from 'crypto'; import { TTSModule, TTSResponse } from '../tts'; import * as https from 'https'; import { WebSocket } from 'ws' import { Logger } from '../../utils/log'; const CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"; const AZURE_ENDPOINT = "speech.platform.bing.com"; const READALOUD_PATH = `/consumer/speech/synthesize/readaloud` const WEBSOCKET_URL = `wss://${AZURE_ENDPOINT}${READALOUD_PATH}/edge/v1?TrustedClientToken=${CLIENT_TOKEN}`; const VOICES_PATH = `${READALOUD_PATH}/voices/list?TrustedClientToken=${CLIENT_TOKEN}`; const CHROME_VERSION = '138.0.7204.157'; const SEC_VERSION = `1-${CHROME_VERSION}`; const USER_AGENT = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROME_VERSION.split('.')[0]}.0.0.0 Safari/537.36 Edg/${CHROME_VERSION.split('.')[0]}.0.0.0`; const WIN_EPOCH = 11644473600; const WS_RECONNECT_DELAY = 2000; const MAX_RECONNECT_ATTEMPTS = 5; interface PendingRequest { resolve: (value: TTSResponse) => void; reject: (reason: Error) => void; audioBuff: Buffer[]; } class AzureTTS implements TTSModule { private voices: Array | undefined = undefined; public name: string = 'Azure'; public defaultVoice: string = 'en-US-AvaNeural'; private ready: boolean = false; private readyPromise: Promise | null = null; private readyResolve: (() => void) | null = null; private ws: WebSocket | undefined = undefined; private reconnectAttempts: number = 0; private reconnectTimer: NodeJS.Timeout | null = null; private isReconnecting: boolean = false; private log: Logger; // Map keyed by X-RequestId private pendingRequests: Map = new Map(); constructor() { this.log = new Logger('Azure TTS'); this.initializeConnection(); } async getVoices(): Promise | undefined> { if (this.voices) return this.voices; const options: https.RequestOptions = { hostname: AZURE_ENDPOINT, path: `${VOICES_PATH}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`, method: 'GET', headers: { 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'User-Agent': USER_AGENT, "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9", "Authority": "speech.platform.bing.com", "Sec-CH-UA": `" Not;A Brand";v="99", "Microsoft Edge";v="${CHROME_VERSION.split('.')[0]}", "Chromium";v="${CHROME_VERSION.split('.')[0]}"`, "Sec-CH-UA-Mobile": "?0", "Accept": "*/*", "Sec-Fetch-Site": "none", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", } }; return new Promise((resolve) => { const req = https.request(options, (res) => { const chunks: Buffer[] = []; res.on('data', (chunk) => chunks.push(chunk)); res.on('end', () => { const body = Buffer.concat(chunks).toString(); this.voices = JSON.parse(body).map((v: any) => v.ShortName) resolve(this.voices); }); req.on('error', (err) => { throw err; }); res.on('aborted', () => { throw new Error('Response aborted') }); }); req.end(); }); } async generate(voice: string, text: string): Promise { await this.readyPromise; if (!this.ready || !this.ws) return { error: 'Not initialized' }; const reqId = randomBytes(16).toString('hex'); const lang = voice.split('-').slice(0, 2).join('-'); return new Promise((resolve, reject) => { this.pendingRequests.set(reqId, { resolve, reject, audioBuff: [] }); const headers = `X-RequestId:${reqId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n`; const ssml = `${this.escapeXml(text)}`; this.ws?.send(headers + ssml, (err) => { if (err) { this.pendingRequests.delete(reqId); reject(err); } }); }); } canBeUsed(): boolean { return true; } private initializeConnection(): void { this.ready = false; this.readyPromise = new Promise((resolve) => { this.readyResolve = resolve; this.connect(); }); } private connect(): void { const url = `${WEBSOCKET_URL}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`; this.ws = new WebSocket(url, { host: 'speech.platform.bing.com', origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold', headers: { 'Pragma': 'no-cache', 'User-Agent': USER_AGENT, } }); this.ws.on('open', () => { // this.log.verbose('WebSocket open'); this.reconnectAttempts = 0; this.isReconnecting = false; const config = `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n { "context": { "synthesis": { "audio": { "metadataoptions": { "sentenceBoundaryEnabled": "false", "wordBoundaryEnabled": "true" }, "outputFormat": "audio-24khz-48kbitrate-mono-mp3" } } } }`; this.ws?.send(config.trim()); this.ready = true; this.readyResolve?.(); }); this.ws.on('message', (data: Buffer, isBinary: boolean) => { this.handleIncomingMessage(data, isBinary); }); this.ws.on('close', (code/*, reason*/) => { this.ready = false; // this.log.verbose(`WS Closed: ${code}`); this.rejectAllPending(new Error("Connection closed")); this.scheduleReconnect(); }); this.ws.on('error', (err) => { this.log.error('WS Error:', err); }); } private scheduleReconnect() { if (this.reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) return; const delay = WS_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts++); setTimeout(() => this.connect(), delay); } private handleIncomingMessage(data: Buffer, isBinary: boolean) { const message = data.toString(); const reqId = message.match(/X-RequestId:(.*?)\r\n/)?.[1]; if (!reqId) return; const request = this.pendingRequests.get(reqId); if (!request) return; if (isBinary) { const separator = 'Path:audio\r\n'; const index = data.indexOf(separator); if (index !== -1) { request.audioBuff.push(data.subarray(index + separator.length)); } } else { if (message.includes('Path:turn.end')) { request.resolve({ data: Buffer.concat(request.audioBuff) }); this.pendingRequests.delete(reqId); } else if (message.includes('Path:turn.error') || message.includes('Path:error')) { request.reject(new Error("Azure synthesis error")); this.pendingRequests.delete(reqId); } } } private rejectAllPending(err: Error) { for (const [id, req] of this.pendingRequests) { req.reject(err); this.pendingRequests.delete(id); } } private genSecToken(): string { const ticks = BigInt(Math.floor((Date.now() / 1000) + Number(WIN_EPOCH))) * 10000000n const roundedTicks = ticks - (ticks % 3000000000n) const strToHash = `${roundedTicks}${CLIENT_TOKEN}` const hash = createHash('sha256') hash.update(strToHash, 'ascii') return hash.digest('hex').toUpperCase() } private escapeXml(unsafe: string): string { return unsafe.replace(/[<>&"']/g, (c) => { switch (c) { case '<': return '<' case '>': return '>' case '&': return '&' case '"': return '"' case "'": return ''' default: return c } }) } } export default new AzureTTS();