277 lines
7.0 KiB
TypeScript
277 lines
7.0 KiB
TypeScript
import { config } from '../../utils/config';
|
|
import { TTSModule, TTSResponse } from '../tts';
|
|
|
|
import * as https from 'https';
|
|
|
|
const ELEVENLABS_API_ENDPOINT = 'api.elevenlabs.io';
|
|
|
|
const FIREBASE_API_KEY = 'AIzaSyBSsRE_1Os04-bxpd5JTLIniy3UK4OqKys';
|
|
const FIREBASE_URL = `https://securetoken.googleapis.com/v1/token?key=${FIREBASE_API_KEY}`;
|
|
|
|
/*
|
|
TO-DO: Implement previous text
|
|
*/
|
|
|
|
interface ElevenLabsVoice {
|
|
voice_id: string;
|
|
name: string;
|
|
// ...
|
|
}
|
|
|
|
interface ElevenLabsModel {
|
|
model_id: string;
|
|
name: string;
|
|
// ...
|
|
}
|
|
|
|
interface ElevenLabsVoicesRes {
|
|
voices?: Array<ElevenLabsVoice>;
|
|
}
|
|
|
|
interface ElevenLabsVoiceSettings {
|
|
stability: number;
|
|
similarity_boost: number;
|
|
style: number;
|
|
speed: number;
|
|
user_speaker_boost: boolean;
|
|
}
|
|
|
|
interface ElevenLabsStreamRequest {
|
|
text: string;
|
|
model_id: string;
|
|
voice_settings: ElevenLabsVoiceSettings;
|
|
}
|
|
|
|
interface FirebaseSession {
|
|
idToken: string;
|
|
refreshToken: string;
|
|
expiresAt: number;
|
|
}
|
|
|
|
export class ElevenLabsTTS implements TTSModule {
|
|
private voices: Array<ElevenLabsVoice> | undefined = undefined;
|
|
private models: Array<ElevenLabsModel> | undefined = undefined;
|
|
|
|
public name: string = 'ElevenLabs';
|
|
|
|
public settings: ElevenLabsVoiceSettings;
|
|
public modelId: string;
|
|
|
|
private session: FirebaseSession | undefined = undefined;
|
|
|
|
private initializationPromise: Promise<void> | undefined = undefined;
|
|
|
|
public static readonly DEFAULT_SETTINGS: ElevenLabsVoiceSettings = {
|
|
stability: 0.0,
|
|
similarity_boost: 0.5,
|
|
style: 1.0,
|
|
speed: 1.0,
|
|
user_speaker_boost: true
|
|
};
|
|
|
|
constructor() {
|
|
this.settings = ElevenLabsTTS.DEFAULT_SETTINGS;
|
|
this.modelId = 'eleven_v3';
|
|
|
|
if (this.canBeUsed()) this.initializationPromise = this.init();
|
|
|
|
this.setSettings = this.setSettings.bind(this);
|
|
this.setModel = this.setModel.bind(this);
|
|
this.getModels = this.getModels.bind(this);
|
|
}
|
|
|
|
private async init(): Promise<void> {
|
|
await this.ensureSession();
|
|
await Promise.all([this.fetchVoices(), this.fetchModels()]);
|
|
}
|
|
|
|
/*
|
|
TTSModule methods
|
|
*/
|
|
async getVoices(): Promise<Array<string> | undefined> {
|
|
if (this.voices) return this.voices.map((voice) => voice.name);
|
|
}
|
|
|
|
async generate(voice: string, text: string): Promise<TTSResponse> {
|
|
await this.initializationPromise;
|
|
await this.ensureSession();
|
|
|
|
if (!this.voices) return { error: 'no voices' };
|
|
if (!this.session) return { error: 'no session' };
|
|
|
|
const voiceData = this.voices.find((entry) => entry.name === voice);
|
|
if (!voiceData) return { error: 'Invalid voice' };
|
|
|
|
const options: https.RequestOptions = {
|
|
hostname: ELEVENLABS_API_ENDPOINT,
|
|
path: `/v1/text-to-speech/${voiceData.voice_id}/stream`,
|
|
method: 'POST',
|
|
headers: {
|
|
accept: 'application/json',
|
|
'Content-Type': 'application/json',
|
|
origin: 'https://elevenlabs.io',
|
|
'user-agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
|
|
'Sec-Ch-Ua': '"Not)A;Brand";v="8", "Chromium";v="138"',
|
|
'Sec-Ch-Ua-Mobile': '?0',
|
|
'Sec-Ch-Ua-Platform': '"Windows"',
|
|
'Sec-Fetch-Site': 'same-site',
|
|
'Sec-Fetch-Mode': 'cors',
|
|
'Sec-Fetch-Dest': 'empty',
|
|
host: 'api.elevenlabs.io',
|
|
Authorization: `Bearer ${this.session.idToken}`
|
|
}
|
|
};
|
|
|
|
const body: ElevenLabsStreamRequest = {
|
|
text: text,
|
|
model_id: this.modelId,
|
|
voice_settings: this.settings
|
|
};
|
|
|
|
return new Promise((resolve) => {
|
|
const req = https.request(options, (res) => {
|
|
const chunks: Buffer[] = [];
|
|
res.on('data', (chunk) => chunks.push(chunk));
|
|
res.on('end', () => {
|
|
resolve({
|
|
data: Buffer.concat(chunks)
|
|
});
|
|
});
|
|
});
|
|
|
|
req.on('error', (error) => resolve({ error: error.message }));
|
|
|
|
req.write(JSON.stringify(body));
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
canBeUsed(): boolean {
|
|
return config.tts_elevenlabs_refreshtoken != undefined;
|
|
}
|
|
|
|
/*
|
|
ElevenLabs specific methods
|
|
*/
|
|
public setSettings(settings: Partial<ElevenLabsVoiceSettings>) {
|
|
this.settings = { ...this.settings, ...settings };
|
|
}
|
|
|
|
public setModel(name: string) {
|
|
if (!this.models) return;
|
|
|
|
const model = this.models.find((mod) => mod.name == name);
|
|
if (!model) return;
|
|
|
|
this.modelId = model.model_id;
|
|
}
|
|
|
|
public getModels(): Array<string> {
|
|
if (!this.models) return [];
|
|
return this.models.map((mod) => mod.name);
|
|
}
|
|
|
|
private async fetchVoices(): Promise<void> {
|
|
if (!this.session) return;
|
|
|
|
const opt: https.RequestOptions = {
|
|
hostname: ELEVENLABS_API_ENDPOINT,
|
|
path: '/v2/voices',
|
|
method: 'GET',
|
|
headers: {
|
|
Accept: 'application/json',
|
|
Authorization: `Bearer ${this.session.idToken}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
};
|
|
|
|
return new Promise((resolve) => {
|
|
const req = https.get(opt, (res) => {
|
|
const chunks: Buffer[] = [];
|
|
res.on('data', (chunk) => chunks.push(chunk));
|
|
res.on('end', () => {
|
|
const voicesJSON = Buffer.concat(chunks).toString('utf-8');
|
|
const voicesParsed = JSON.parse(voicesJSON) as ElevenLabsVoicesRes;
|
|
if (!voicesParsed.voices) {
|
|
console.error('ElevenLabs voice fetch responded:', voicesJSON);
|
|
throw new Error('Failed to get ElevenLabs voices');
|
|
}
|
|
this.voices = voicesParsed.voices;
|
|
resolve();
|
|
});
|
|
});
|
|
|
|
req.on('error', (err) => {
|
|
console.error('Failed to get ElevenLabs voices:', err);
|
|
throw err;
|
|
});
|
|
});
|
|
}
|
|
|
|
private async fetchModels(): Promise<void> {
|
|
if (!this.session) return;
|
|
|
|
const opt: https.RequestOptions = {
|
|
hostname: ELEVENLABS_API_ENDPOINT,
|
|
path: '/v1/models',
|
|
method: 'GET',
|
|
headers: {
|
|
Accept: 'application/json',
|
|
Authorization: `Bearer ${this.session.idToken}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
};
|
|
|
|
return new Promise((resolve) => {
|
|
const req = https.get(opt, (res) => {
|
|
const chunks: Buffer[] = [];
|
|
res.on('data', (chunk) => chunks.push(chunk));
|
|
res.on('end', () => {
|
|
const modelsJSON = Buffer.concat(chunks).toString('utf-8');
|
|
const modelsParsed = JSON.parse(modelsJSON) as Array<ElevenLabsModel>;
|
|
this.models = modelsParsed;
|
|
resolve();
|
|
});
|
|
});
|
|
|
|
req.on('error', (err) => {
|
|
console.error('Failed to get ElevenLabs models:', err);
|
|
throw err;
|
|
});
|
|
});
|
|
}
|
|
|
|
private async ensureSession(): Promise<void> {
|
|
if (this.session && Date.now() < this.session.expiresAt - 300000) return;
|
|
|
|
const refreshToken =
|
|
this.session?.refreshToken || config.tts_elevenlabs_refreshtoken;
|
|
if (!refreshToken) throw new Error('No refresh token available');
|
|
|
|
const response = await fetch(FIREBASE_URL, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
Referer: 'https://elevenlabs.io/',
|
|
Origin: 'https://elevenlabs.io'
|
|
},
|
|
body: new URLSearchParams({
|
|
grant_type: 'refresh_token',
|
|
refresh_token: refreshToken
|
|
})
|
|
});
|
|
|
|
if (!response.ok)
|
|
throw new Error(`Auth Refresh Failed: ${await response.text()}`);
|
|
const data = await response.json();
|
|
this.session = {
|
|
idToken: data.id_token,
|
|
refreshToken: data.refresh_token,
|
|
expiresAt: Date.now() + parseInt(data.expires_in) * 1000
|
|
};
|
|
}
|
|
}
|
|
|
|
export default new ElevenLabsTTS();
|