luma/src/modules/tts-modes/elevenlabs.ts

import { config } from '../../utils/config';
import { TTSModule, TTSResponse } from '../tts';

import * as https from 'https';

const ELEVENLABS_API_ENDPOINT = 'api.elevenlabs.io';

interface ElevenLabsVoice {
	voice_id: string;
	name: string;
	// ...
}

interface ElevenLabsModel {
	model_id: string;
	name: string;
	// ...
}

interface ElevenLabsVoicesRes {
	voices?: Array<ElevenLabsVoice>;
}

interface ElevenLabsVoiceSettings {
	stability: number;
	similarity_boost: number;
	style: number;
	speed: number;
	user_speaker_boost: boolean;
}

interface ElevenLabsStreamRequest {
	text: string;
	model_id: string;
	voice_settings: ElevenLabsVoiceSettings;
}

export class ElevenLabsTTS implements TTSModule {
	private voices: Array<ElevenLabsVoice> | undefined = undefined;
	private models: Array<ElevenLabsModel> | undefined = undefined;

	public name: string = 'ElevenLabs';

	public settings: ElevenLabsVoiceSettings;
	public modelId: string;

	public static readonly DEFAULT_SETTINGS: ElevenLabsVoiceSettings = {
		stability: 0.5,
		similarity_boost: 0.5,
		style: 0.0,
		speed: 1.0,
		user_speaker_boost: true
	};

	constructor() {
		if (this.canBeUsed()) {
			this.fetchVoices();
			this.fetchModels();
		}

		this.settings = ElevenLabsTTS.DEFAULT_SETTINGS;

		this.modelId = 'eleven_flash_v2_5';

		this.setSettings = this.setSettings.bind(this);
		this.setModel = this.setModel.bind(this);
		this.getModels = this.getModels.bind(this);
	}

	/*
        TTSModule methods
    */
	async getVoices(): Promise<Array<string> | undefined> {
		if (this.voices) return this.voices.map((voice) => voice.name);
	}

	async generate(voice: string, text: string): Promise<TTSResponse> {
		if (!this.voices) return {};

		const voiceData = this.voices.find((entry) => entry.name === voice);
		if (!voiceData) return { error: 'Invalid voice' };

		const options: https.RequestOptions = {
			hostname: ELEVENLABS_API_ENDPOINT,
			path: `/v1/text-to-speech/${voiceData.voice_id}/stream`,
			method: 'POST',
			headers: {
				Accept: 'application/json',
				'xi-api-key': config.tts_elevenlabs_key,
				'Content-Type': 'application/json'
			}
		};

		const body: ElevenLabsStreamRequest = {
			text: text,
			model_id: this.modelId,
			voice_settings: this.settings
		};

		return new Promise((resolve) => {
			const req = https.request(options, (res) => {
				const chunks: Buffer[] = [];
				res.on('data', (chunk) => chunks.push(chunk));
				res.on('end', () => {
					resolve({
						data: Buffer.concat(chunks)
					});
				});
			});

			req.on('error', (error) => resolve({ error: error.message }));

			req.write(JSON.stringify(body));
			req.end();
		});
	}

	canBeUsed(): boolean {
		return config.tts_elevenlabs_key != undefined;
	}

	/*
        ElevenLabs specific methods
    */
	public setSettings(settings: Partial<ElevenLabsVoiceSettings>) {
		this.settings = { ...this.settings, ...settings };
	}

	public setModel(name: string) {
		if (!this.models) return;

		const model = this.models.find((mod) => mod.name == name);
		if (!model) return;

		this.modelId = model.model_id;
	}

	public getModels(): Array<string> {
		if (!this.models) return [];
		return this.models.map((mod) => mod.model_id);
	}

	private async fetchVoices(): Promise<void> {
		const opt: https.RequestOptions = {
			hostname: ELEVENLABS_API_ENDPOINT,
			path: '/v2/voices',
			method: 'GET',
			headers: {
				Accept: 'application/json',
				'xi-api-key': config.tts_elevenlabs_key,
				'Content-Type': 'application/json'
			}
		};

		return new Promise((resolve) => {
			const req = https.get(opt, (res) => {
				const chunks: Buffer[] = [];
				res.on('data', (chunk) => chunks.push(chunk));
				res.on('end', () => {
					const voicesJSON = Buffer.concat(chunks).toString('utf-8');
					const voicesParsed = JSON.parse(voicesJSON) as ElevenLabsVoicesRes;
					if (!voicesParsed.voices) {
						console.error('ElevenLabs voice fetch responded:', voicesJSON);
						throw new Error('Failed to get ElevenLabs voices');
					}
					this.voices = voicesParsed.voices;
					resolve();
				});
			});

			req.on('error', (err) => {
				console.error('Failed to get ElevenLabs voices:', err);
				throw err;
			});
		});
	}

	private async fetchModels(): Promise<void> {
		const opt: https.RequestOptions = {
			hostname: ELEVENLABS_API_ENDPOINT,
			path: '/v1/models',
			method: 'GET',
			headers: {
				Accept: 'application/json',
				'xi-api-key': config.tts_elevenlabs_key,
				'Content-Type': 'application/json'
			}
		};

		return new Promise((resolve) => {
			const req = https.get(opt, (res) => {
				const chunks: Buffer[] = [];
				res.on('data', (chunk) => chunks.push(chunk));
				res.on('end', () => {
					const modelsJSON = Buffer.concat(chunks).toString('utf-8');
					const modelsParsed = JSON.parse(modelsJSON) as Array<ElevenLabsModel>;
					this.models = modelsParsed;
					resolve();
				});
			});

			req.on('error', (err) => {
				console.error('Failed to get ElevenLabs models:', err);
				throw err;
			});
		});
	}
}

export default new ElevenLabsTTS();