fix: discard lib, implement azure tts
This commit is contained in:
+199
-16
@@ -1,6 +1,23 @@
|
||||
import { createHash, randomBytes } from 'crypto';
|
||||
import { TTSModule, TTSResponse } from '../tts';
|
||||
|
||||
import { VoicesManager, Communicate } from 'edge-tts-universal';
|
||||
import * as https from 'https';
|
||||
|
||||
import { WebSocket } from 'ws'
|
||||
import { Logger } from '../../utils/log';
|
||||
|
||||
const CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
|
||||
const AZURE_ENDPOINT = "speech.platform.bing.com";
|
||||
|
||||
const READALOUD_PATH = `/consumer/speech/synthesize/readaloud`
|
||||
const WEBSOCKET_URL = `wss://${AZURE_ENDPOINT}${READALOUD_PATH}/edge/v1?TrustedClientToken=${CLIENT_TOKEN}`;
|
||||
const VOICES_PATH = `${READALOUD_PATH}/voices/list?TrustedClientToken=${CLIENT_TOKEN}`;
|
||||
|
||||
const CHROME_VERSION = '138.0.7204.157';
|
||||
const SEC_VERSION = `1-${CHROME_VERSION}`;
|
||||
|
||||
const USER_AGENT = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROME_VERSION.split('.')[0]}.0.0.0 Safari/537.36 Edg/${CHROME_VERSION.split('.')[0]}.0.0.0`;
|
||||
const WIN_EPOCH = 11644473600;
|
||||
|
||||
class AzureTTS implements TTSModule {
|
||||
private voices: Array<string> | undefined = undefined;
|
||||
@@ -8,35 +25,201 @@ class AzureTTS implements TTSModule {
|
||||
public name: string = 'Azure';
|
||||
public defaultVoice: string = 'en-US-AvaNeural';
|
||||
|
||||
private ready: boolean = false;
|
||||
private readyPromise: Promise<void>;
|
||||
|
||||
private ws: WebSocket | undefined = undefined;
|
||||
|
||||
private log: Logger;
|
||||
|
||||
constructor() {
|
||||
this.log = new Logger('Azure TTS');
|
||||
|
||||
this.readyPromise = new Promise((resolve, reject) => {
|
||||
this.ws = new WebSocket(`${WEBSOCKET_URL}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`, {
|
||||
host: 'speech.platform.bing.com',
|
||||
origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
|
||||
headers: {
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
||||
'Accept-Language': 'en-US,en;q=0.9'
|
||||
}
|
||||
});
|
||||
|
||||
this.ws.on('open', () => {
|
||||
this.log.verbose('WebSocket open');
|
||||
|
||||
const config = `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
|
||||
{
|
||||
"context": {
|
||||
"synthesis": {
|
||||
"audio": {
|
||||
"metadataoptions": {
|
||||
"sentenceBoundaryEnabled": "false",
|
||||
"wordBoundaryEnabled": "true"
|
||||
},
|
||||
"outputFormat": "audio-24khz-48kbitrate-mono-mp3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}`;
|
||||
|
||||
this.ws?.send(config.trim());
|
||||
this.ready = true;
|
||||
resolve();
|
||||
});
|
||||
|
||||
this.ws.on('error', (err) => {
|
||||
this.log.error('WebSocket error:', err);
|
||||
reject(err);
|
||||
});
|
||||
|
||||
this.ws.on('close', (code: number, reason: Buffer) => {
|
||||
this.log.verbose('WebSocket closed (%d, %s)', code, reason.toString());
|
||||
this.ready = false;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async getVoices(): Promise<Array<string> | undefined> {
|
||||
if (!this.voices) {
|
||||
const voiceMgr = await VoicesManager.create();
|
||||
const voiceQuery = await voiceMgr.find({});
|
||||
if (this.voices)
|
||||
return this.voices;
|
||||
|
||||
this.voices = voiceQuery.map((voice) => voice.ShortName);
|
||||
}
|
||||
const options: https.RequestOptions = {
|
||||
hostname: AZURE_ENDPOINT,
|
||||
path: `${VOICES_PATH}&Sec-MS-GEC=${this.genSecToken()}&Sec-MS-GEC-Version=${SEC_VERSION}`,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
'User-Agent': USER_AGENT,
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Authority": "speech.platform.bing.com",
|
||||
"Sec-CH-UA": `" Not;A Brand";v="99", "Microsoft Edge";v="${CHROME_VERSION.split('.')[0]}", "Chromium";v="${CHROME_VERSION.split('.')[0]}"`,
|
||||
"Sec-CH-UA-Mobile": "?0",
|
||||
"Accept": "*/*",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
}
|
||||
};
|
||||
|
||||
return this.voices;
|
||||
return new Promise((resolve) => {
|
||||
const req = https.request(options, (res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on('data', (chunk) => chunks.push(chunk));
|
||||
res.on('end', () => {
|
||||
const body = Buffer.concat(chunks).toString();
|
||||
this.voices = JSON.parse(body).map((v: any) => v.ShortName)
|
||||
resolve(this.voices);
|
||||
});
|
||||
req.on('error', (err) => {
|
||||
throw err;
|
||||
});
|
||||
res.on('aborted', () => {
|
||||
throw new Error('Response aborted')
|
||||
});
|
||||
});
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
async generate(voice: string, text: string): Promise<TTSResponse> {
|
||||
const comm = new Communicate(text, {
|
||||
voice: voice
|
||||
});
|
||||
await this.readyPromise;
|
||||
if (!this.ready || !this.ws || this.ws.readyState !== WebSocket.OPEN)
|
||||
return { error: 'Not initialized' };
|
||||
|
||||
const buffers: Buffer[] = [];
|
||||
for await (const chunk of comm.stream()) {
|
||||
if (chunk.type === 'audio' && chunk.data) {
|
||||
buffers.push(chunk.data);
|
||||
return new Promise((resolve, reject) => {
|
||||
const audioBuff: Buffer[] = [];
|
||||
|
||||
const msgHandler = async (data: Buffer, isBinary: boolean) => {
|
||||
this.log.verbose('msg %s', data.toString());
|
||||
if (isBinary) {
|
||||
const separator = 'Path:audio\r\n';
|
||||
let index = data.indexOf(separator) + separator.length;
|
||||
let audioData = data.subarray(index);
|
||||
audioBuff.push(audioData);
|
||||
} else {
|
||||
let message = data.toString();
|
||||
|
||||
if (message.includes('Path:turn.end')) {
|
||||
this.ws?.off('message', msgHandler);
|
||||
if (audioBuff.length > 0)
|
||||
resolve({ data: Buffer.concat(audioBuff) });
|
||||
else {
|
||||
this.log.error("Generation error (Azure returned no data)");
|
||||
reject(new Error('No audio data received from Azure'));
|
||||
}
|
||||
} else if (message.includes('Path:error') || message.includes('Path:turn.error')) {
|
||||
this.log.error('Generation error %s', message);
|
||||
reject(new Error('Generation error (Azure returned error)'));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { data: Buffer.concat(buffers) };
|
||||
this.ws?.on('message', msgHandler);
|
||||
|
||||
let reqId = randomBytes(16).toString('hex')
|
||||
const lang = voice.split('-').slice(0, 2).join('-');
|
||||
|
||||
const headers = `X-RequestId:${reqId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n`;
|
||||
const ssml = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${lang}"><voice name="${voice}"><prosody rate="default" pitch="default" volume="default">${this.escapeXml(text)}</prosody></voice></speak>`;
|
||||
|
||||
console.log('=== DEBUG SSML ===');
|
||||
console.log('Headers length:', headers.length);
|
||||
console.log('SSML length:', ssml.length);
|
||||
console.log('Full message (escaped):');
|
||||
console.log(JSON.stringify(headers + ssml).substring(0, 500));
|
||||
console.log('SSML content:');
|
||||
console.log(ssml);
|
||||
console.log('=== END DEBUG ===');
|
||||
|
||||
// Also log the escaped text
|
||||
console.log('Escaped text:', JSON.stringify(this.escapeXml(text)));
|
||||
|
||||
|
||||
this.log.verbose('WS Generation send');
|
||||
this.ws?.send(headers + ssml, (err) => {
|
||||
if (err) {
|
||||
this.ws?.off('message', msgHandler);
|
||||
this.log.error('ws error');
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
canBeUsed(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
private genSecToken(): string {
|
||||
const ticks = BigInt(Math.floor((Date.now() / 1000) + Number(WIN_EPOCH))) * 10000000n
|
||||
const roundedTicks = ticks - (ticks % 3000000000n)
|
||||
|
||||
const strToHash = `${roundedTicks}${CLIENT_TOKEN}`
|
||||
|
||||
const hash = createHash('sha256')
|
||||
hash.update(strToHash, 'ascii')
|
||||
|
||||
return hash.digest('hex').toUpperCase()
|
||||
}
|
||||
|
||||
private escapeXml(unsafe: string): string {
|
||||
return unsafe.replace(/[<>&"']/g, (c) => {
|
||||
switch (c) {
|
||||
case '<': return '<'
|
||||
case '>': return '>'
|
||||
case '&': return '&'
|
||||
case '"': return '"'
|
||||
case "'": return '''
|
||||
default: return c
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export default new AzureTTS();
|
||||
|
||||
Reference in New Issue
Block a user