From 7cbb5f3a9fa90b7c4f607079cf8010439900c9fc Mon Sep 17 00:00:00 2001 From: neru Date: Fri, 23 Jan 2026 13:52:12 -0300 Subject: [PATCH] feat: add ElevenLabs --- src/commands/tts/elevenlabs-settings.ts | 119 ++++++++++++++ src/modules/tts-modes/elevenlabs.ts | 210 ++++++++++++++++++++++++ 2 files changed, 329 insertions(+) create mode 100644 src/commands/tts/elevenlabs-settings.ts create mode 100644 src/modules/tts-modes/elevenlabs.ts diff --git a/src/commands/tts/elevenlabs-settings.ts b/src/commands/tts/elevenlabs-settings.ts new file mode 100644 index 0000000..27ba9ae --- /dev/null +++ b/src/commands/tts/elevenlabs-settings.ts @@ -0,0 +1,119 @@ +import { + AutocompleteInteraction, + ChatInputCommandInteraction, + SlashCommandBuilder +} from 'discord.js'; +import { Command } from '../../commands'; +import { TTSManager } from '../../modules/tts'; +import { ElevenLabsTTS } from '../../modules/tts-modes/elevenlabs'; +import { config } from '../../utils/config'; + +const builder = new SlashCommandBuilder() + .setName('elevenlabs-settings') + .setDescription('Configures ElevenLabs generation') + .addNumberOption((opt) => + opt + .setName('stability') + .setDescription('Determines whether to be stable or more variable') + .setMaxValue(1) + .setMinValue(0) + ) + .addNumberOption((opt) => + opt + .setName('similarity-boost') + .setDescription('Boosts clarity and target voice similarity') + .setMaxValue(1.0) + .setMinValue(0) + ) + .addNumberOption((opt) => + opt + .setName('style') + .setDescription('How much should the style be exaggerated') + .setMaxValue(1.0) + .setMinValue(0) + ) + .addNumberOption((opt) => + opt + .setName('speed') + .setDescription('The speed at which the text should be read') + .setMaxValue(1.2) + .setMinValue(0.7) + ) + .addBooleanOption((opt) => + opt + .setName('speaker-boost') + .setDescription('Should speaker boost be enabled?') + ) + .addStringOption((opt) => + opt + .setName('model') + .setDescription('Which generation model to use') + .setAutocomplete(true) + ); + +const cmd: Command = { + name: builder.name, + builder: builder, + ownerOnly: true, + + execute: async (interaction: ChatInputCommandInteraction): Promise => { + const mod = TTSManager.get.getModule('ElevenLabs') as + | ElevenLabsTTS + | undefined; + if (!mod) return; + + const stability = + interaction.options.getNumber('stability') || + ElevenLabsTTS.DEFAULT_SETTINGS.stability; + + const similarityBoost = + interaction.options.getNumber('similarity-boost') || + ElevenLabsTTS.DEFAULT_SETTINGS.similarity_boost; + const style = + interaction.options.getNumber('style') || + ElevenLabsTTS.DEFAULT_SETTINGS.style; + const speed = + interaction.options.getNumber('speed') || + ElevenLabsTTS.DEFAULT_SETTINGS.speed; + const speakerBoost = + interaction.options.getBoolean('speaker-boost') || + ElevenLabsTTS.DEFAULT_SETTINGS.user_speaker_boost; + + mod.setSettings({ + stability: stability, + style: style, + speed: speed, + user_speaker_boost: speakerBoost, + similarity_boost: similarityBoost + }); + + const model = interaction.options.getString('model'); + if (model) mod.setModel(model); + + interaction.reply('ElevenLabs settings applied'); + }, + + autocomplete: async (interaction: AutocompleteInteraction): Promise => { + const focused = interaction.options.getFocused(true); + if (focused.name != 'model') return; + + const mod = TTSManager.get.getModule('ElevenLabs') as + | ElevenLabsTTS + | undefined; + if (!mod) return; + + const models = await mod.getModels(); + + const filtered: string[] = models + .filter((model) => + model.toLowerCase().startsWith(focused.value.toLowerCase()) + ) + .slice(0, 25); + + await interaction.respond( + filtered.map((choice) => ({ name: choice, value: choice })) + ); + } +}; + +export default cmd; diff --git a/src/modules/tts-modes/elevenlabs.ts b/src/modules/tts-modes/elevenlabs.ts new file mode 100644 index 0000000..607f7b1 --- /dev/null +++ b/src/modules/tts-modes/elevenlabs.ts @@ -0,0 +1,210 @@ +import { config } from '../../utils/config'; +import { TTSModule, TTSResponse } from '../tts'; + +import * as https from 'https'; + +const ELEVENLABS_API_ENDPOINT = 'api.elevenlabs.io'; + +interface ElevenLabsVoice { + voice_id: string; + name: string; + // ... +} + +interface ElevenLabsModel { + model_id: string; + name: string; + // ... +} + +interface ElevenLabsVoicesRes { + voices?: Array; +} + +interface ElevenLabsVoiceSettings { + stability: number; + similarity_boost: number; + style: number; + speed: number; + user_speaker_boost: boolean; +} + +interface ElevenLabsStreamRequest { + text: string; + model_id: string; + voice_settings: ElevenLabsVoiceSettings; +} + +export class ElevenLabsTTS implements TTSModule { + private voices: Array | undefined = undefined; + private models: Array | undefined = undefined; + + public name: string = 'ElevenLabs'; + + public settings: ElevenLabsVoiceSettings; + public modelId: string; + + public static readonly DEFAULT_SETTINGS: ElevenLabsVoiceSettings = { + stability: 0.5, + similarity_boost: 0.5, + style: 0.0, + speed: 1.0, + user_speaker_boost: true + }; + + constructor() { + if (this.canBeUsed()) { + this.fetchVoices(); + this.fetchModels(); + } + + this.settings = ElevenLabsTTS.DEFAULT_SETTINGS; + + this.modelId = 'eleven_flash_v2_5'; + + this.setSettings = this.setSettings.bind(this); + this.setModel = this.setModel.bind(this); + this.getModels = this.getModels.bind(this); + } + + /* + TTSModule methods + */ + async getVoices(): Promise | undefined> { + if (this.voices) return this.voices.map((voice) => voice.name); + } + + async generate(voice: string, text: string): Promise { + if (!this.voices) return {}; + + const voiceData = this.voices.find((entry) => entry.name === voice); + if (!voiceData) return { error: 'Invalid voice' }; + + const options: https.RequestOptions = { + hostname: ELEVENLABS_API_ENDPOINT, + path: `/v1/text-to-speech/${voiceData.voice_id}/stream`, + method: 'POST', + headers: { + Accept: 'application/json', + 'xi-api-key': config.tts_elevenlabs_key, + 'Content-Type': 'application/json' + } + }; + + const body: ElevenLabsStreamRequest = { + text: text, + model_id: this.modelId, + voice_settings: this.settings + }; + + return new Promise((resolve) => { + const req = https.request(options, (res) => { + const chunks: Buffer[] = []; + res.on('data', (chunk) => chunks.push(chunk)); + res.on('end', () => { + resolve({ + data: Buffer.concat(chunks) + }); + }); + }); + + req.on('error', (error) => resolve({ error: error.message })); + + req.write(JSON.stringify(body)); + req.end(); + }); + } + + canBeUsed(): boolean { + return config.tts_elevenlabs_key != undefined; + } + + /* + ElevenLabs specific methods + */ + public setSettings(settings: Partial) { + this.settings = { ...this.settings, ...settings }; + } + + public setModel(name: string) { + if (!this.models) return; + + const model = this.models.find((mod) => mod.name == name); + if (!model) return; + + this.modelId = model.model_id; + } + + public getModels(): Array { + if (!this.models) return []; + return this.models.map((mod) => mod.model_id); + } + + private async fetchVoices(): Promise { + const opt: https.RequestOptions = { + hostname: ELEVENLABS_API_ENDPOINT, + path: '/v2/voices', + method: 'GET', + headers: { + Accept: 'application/json', + 'xi-api-key': config.tts_elevenlabs_key, + 'Content-Type': 'application/json' + } + }; + + return new Promise((resolve) => { + const req = https.get(opt, (res) => { + const chunks: Buffer[] = []; + res.on('data', (chunk) => chunks.push(chunk)); + res.on('end', () => { + const voicesJSON = Buffer.concat(chunks).toString('utf-8'); + const voicesParsed = JSON.parse(voicesJSON) as ElevenLabsVoicesRes; + if (!voicesParsed.voices) { + console.error('ElevenLabs voice fetch responded:', voicesJSON); + throw new Error('Failed to get ElevenLabs voices'); + } + this.voices = voicesParsed.voices; + resolve(); + }); + }); + + req.on('error', (err) => { + console.error('Failed to get ElevenLabs voices:', err); + throw err; + }); + }); + } + + private async fetchModels(): Promise { + const opt: https.RequestOptions = { + hostname: ELEVENLABS_API_ENDPOINT, + path: '/v1/models', + method: 'GET', + headers: { + Accept: 'application/json', + 'xi-api-key': config.tts_elevenlabs_key, + 'Content-Type': 'application/json' + } + }; + + return new Promise((resolve) => { + const req = https.get(opt, (res) => { + const chunks: Buffer[] = []; + res.on('data', (chunk) => chunks.push(chunk)); + res.on('end', () => { + const modelsJSON = Buffer.concat(chunks).toString('utf-8'); + const modelsParsed = JSON.parse(modelsJSON) as Array; + this.models = modelsParsed; + resolve(); + }); + }); + + req.on('error', (err) => { + console.error('Failed to get ElevenLabs models:', err); + throw err; + }); + }); + } +} + +export default new ElevenLabsTTS();