Compare commits

..

4 Commits

Author SHA1 Message Date
neru feabc732cf style: run lint and format 2026-01-23 14:03:45 -03:00
neru bfc749a034 fix: remove unneeded cast 2026-01-23 14:03:15 -03:00
neru 11539d149b fix: misc style / variable consistency changes 2026-01-23 14:03:02 -03:00
neru 7cbb5f3a9f feat: add ElevenLabs 2026-01-23 13:52:12 -03:00
5 changed files with 338 additions and 15 deletions
+118
View File
@@ -0,0 +1,118 @@
import {
AutocompleteInteraction,
ChatInputCommandInteraction,
SlashCommandBuilder
} from 'discord.js';
import { Command } from '../../commands';
import { TTSManager } from '../../modules/tts';
import { ElevenLabsTTS } from '../../modules/tts-modes/elevenlabs';
const builder = new SlashCommandBuilder()
.setName('elevenlabs-settings')
.setDescription('Configures ElevenLabs generation')
.addNumberOption((opt) =>
opt
.setName('stability')
.setDescription('Determines whether to be stable or more variable')
.setMaxValue(1)
.setMinValue(0)
)
.addNumberOption((opt) =>
opt
.setName('similarity-boost')
.setDescription('Boosts clarity and target voice similarity')
.setMaxValue(1.0)
.setMinValue(0)
)
.addNumberOption((opt) =>
opt
.setName('style')
.setDescription('How much should the style be exaggerated')
.setMaxValue(1.0)
.setMinValue(0)
)
.addNumberOption((opt) =>
opt
.setName('speed')
.setDescription('The speed at which the text should be read')
.setMaxValue(1.2)
.setMinValue(0.7)
)
.addBooleanOption((opt) =>
opt
.setName('speaker-boost')
.setDescription('Should speaker boost be enabled?')
)
.addStringOption((opt) =>
opt
.setName('model')
.setDescription('Which generation model to use')
.setAutocomplete(true)
);
const cmd: Command = {
name: builder.name,
builder: builder,
ownerOnly: true,
execute: async (interaction: ChatInputCommandInteraction): Promise<void> => {
const mod = TTSManager.get.getModule('ElevenLabs') as
| ElevenLabsTTS
| undefined;
if (!mod) return;
const stability =
interaction.options.getNumber('stability') ||
ElevenLabsTTS.DEFAULT_SETTINGS.stability;
const similarityBoost =
interaction.options.getNumber('similarity-boost') ||
ElevenLabsTTS.DEFAULT_SETTINGS.similarity_boost;
const style =
interaction.options.getNumber('style') ||
ElevenLabsTTS.DEFAULT_SETTINGS.style;
const speed =
interaction.options.getNumber('speed') ||
ElevenLabsTTS.DEFAULT_SETTINGS.speed;
const speakerBoost =
interaction.options.getBoolean('speaker-boost') ||
ElevenLabsTTS.DEFAULT_SETTINGS.user_speaker_boost;
mod.setSettings({
stability: stability,
style: style,
speed: speed,
user_speaker_boost: speakerBoost,
similarity_boost: similarityBoost
});
const model = interaction.options.getString('model');
if (model) mod.setModel(model);
interaction.reply('ElevenLabs settings applied');
},
autocomplete: async (interaction: AutocompleteInteraction): Promise<void> => {
const focused = interaction.options.getFocused(true);
if (focused.name != 'model') return;
const mod = TTSManager.get.getModule('ElevenLabs') as
| ElevenLabsTTS
| undefined;
if (!mod) return;
const models = await mod.getModels();
const filtered: string[] = models
.filter((model) =>
model.toLowerCase().startsWith(focused.value.toLowerCase())
)
.slice(0, 25);
await interaction.respond(
filtered.map((choice) => ({ name: choice, value: choice }))
);
}
};
export default cmd;
+210
View File
@@ -0,0 +1,210 @@
import { config } from '../../utils/config';
import { TTSModule, TTSResponse } from '../tts';
import * as https from 'https';
const ELEVENLABS_API_ENDPOINT = 'api.elevenlabs.io';
interface ElevenLabsVoice {
voice_id: string;
name: string;
// ...
}
interface ElevenLabsModel {
model_id: string;
name: string;
// ...
}
interface ElevenLabsVoicesRes {
voices?: Array<ElevenLabsVoice>;
}
interface ElevenLabsVoiceSettings {
stability: number;
similarity_boost: number;
style: number;
speed: number;
user_speaker_boost: boolean;
}
interface ElevenLabsStreamRequest {
text: string;
model_id: string;
voice_settings: ElevenLabsVoiceSettings;
}
export class ElevenLabsTTS implements TTSModule {
private voices: Array<ElevenLabsVoice> | undefined = undefined;
private models: Array<ElevenLabsModel> | undefined = undefined;
public name: string = 'ElevenLabs';
public settings: ElevenLabsVoiceSettings;
public modelId: string;
public static readonly DEFAULT_SETTINGS: ElevenLabsVoiceSettings = {
stability: 0.5,
similarity_boost: 0.5,
style: 0.0,
speed: 1.0,
user_speaker_boost: true
};
constructor() {
if (this.canBeUsed()) {
this.fetchVoices();
this.fetchModels();
}
this.settings = ElevenLabsTTS.DEFAULT_SETTINGS;
this.modelId = 'eleven_flash_v2_5';
this.setSettings = this.setSettings.bind(this);
this.setModel = this.setModel.bind(this);
this.getModels = this.getModels.bind(this);
}
/*
TTSModule methods
*/
async getVoices(): Promise<Array<string> | undefined> {
if (this.voices) return this.voices.map((voice) => voice.name);
}
async generate(voice: string, text: string): Promise<TTSResponse> {
if (!this.voices) return {};
const voiceData = this.voices.find((entry) => entry.name === voice);
if (!voiceData) return { error: 'Invalid voice' };
const options: https.RequestOptions = {
hostname: ELEVENLABS_API_ENDPOINT,
path: `/v1/text-to-speech/${voiceData.voice_id}/stream`,
method: 'POST',
headers: {
Accept: 'application/json',
'xi-api-key': config.tts_elevenlabs_key,
'Content-Type': 'application/json'
}
};
const body: ElevenLabsStreamRequest = {
text: text,
model_id: this.modelId,
voice_settings: this.settings
};
return new Promise((resolve) => {
const req = https.request(options, (res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk) => chunks.push(chunk));
res.on('end', () => {
resolve({
data: Buffer.concat(chunks)
});
});
});
req.on('error', (error) => resolve({ error: error.message }));
req.write(JSON.stringify(body));
req.end();
});
}
canBeUsed(): boolean {
return config.tts_elevenlabs_key != undefined;
}
/*
ElevenLabs specific methods
*/
public setSettings(settings: Partial<ElevenLabsVoiceSettings>) {
this.settings = { ...this.settings, ...settings };
}
public setModel(name: string) {
if (!this.models) return;
const model = this.models.find((mod) => mod.name == name);
if (!model) return;
this.modelId = model.model_id;
}
public getModels(): Array<string> {
if (!this.models) return [];
return this.models.map((mod) => mod.model_id);
}
private async fetchVoices(): Promise<void> {
const opt: https.RequestOptions = {
hostname: ELEVENLABS_API_ENDPOINT,
path: '/v2/voices',
method: 'GET',
headers: {
Accept: 'application/json',
'xi-api-key': config.tts_elevenlabs_key,
'Content-Type': 'application/json'
}
};
return new Promise((resolve) => {
const req = https.get(opt, (res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk) => chunks.push(chunk));
res.on('end', () => {
const voicesJSON = Buffer.concat(chunks).toString('utf-8');
const voicesParsed = JSON.parse(voicesJSON) as ElevenLabsVoicesRes;
if (!voicesParsed.voices) {
console.error('ElevenLabs voice fetch responded:', voicesJSON);
throw new Error('Failed to get ElevenLabs voices');
}
this.voices = voicesParsed.voices;
resolve();
});
});
req.on('error', (err) => {
console.error('Failed to get ElevenLabs voices:', err);
throw err;
});
});
}
private async fetchModels(): Promise<void> {
const opt: https.RequestOptions = {
hostname: ELEVENLABS_API_ENDPOINT,
path: '/v1/models',
method: 'GET',
headers: {
Accept: 'application/json',
'xi-api-key': config.tts_elevenlabs_key,
'Content-Type': 'application/json'
}
};
return new Promise((resolve) => {
const req = https.get(opt, (res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk) => chunks.push(chunk));
res.on('end', () => {
const modelsJSON = Buffer.concat(chunks).toString('utf-8');
const modelsParsed = JSON.parse(modelsJSON) as Array<ElevenLabsModel>;
this.models = modelsParsed;
resolve();
});
});
req.on('error', (err) => {
console.error('Failed to get ElevenLabs models:', err);
throw err;
});
});
}
}
export default new ElevenLabsTTS();
+6 -9
View File
@@ -5,8 +5,7 @@ import * as https from 'https';
import * as zlib from 'zlib';
import TIKTOK_TTS_VOICES from './tiktok_voices.json';
const TIKTOK_TTS_ENDPOINT =
'https://api16-normal-v6.tiktokv.com/media/api/text/speech/invoke';
const TIKTOK_API_ENDPOINT = 'api16-normal-v6.tiktokv.com';
class TikTokTTS implements TTSModule {
public name: string = 'TikTok';
@@ -18,18 +17,16 @@ class TikTokTTS implements TTSModule {
async generate(voice: string, text: string): Promise<TTSResponse> {
const reqText = encodeURIComponent(text);
const path = `/?text_speaker=${voice}&req_text=${reqText}&speaker_map_type=0&aid=1233`;
const endpoint = new URL(TIKTOK_TTS_ENDPOINT);
const path = `/media/api/text/speech/invoke/?text_speaker=${voice}&req_text=${reqText}&speaker_map_type=0&aid=1233`;
const options: https.RequestOptions = {
hostname: endpoint.hostname,
path: endpoint.pathname + path,
hostname: TIKTOK_API_ENDPOINT,
path: path,
method: 'POST',
headers: {
'User-Agent':
'com.zhiliaoapp.musically/2022600030 (Linux; U; Android 7.1.2; es_ES; SM-G988N; Build/NRD90M;tt-ok/3.12.13.1)',
Cookie: `sessionid=${config.tiktok_session_id}`,
Cookie: `sessionid=${config.tts_tiktok_sessionid}`,
'Accept-Encoding': 'gzip,deflate,compress',
'Content-Type': 'application/x-www-form-urlencoded'
}
@@ -97,7 +94,7 @@ class TikTokTTS implements TTSModule {
}
canBeUsed(): boolean {
return config.tiktok_session_id != undefined;
return config.tts_tiktok_sessionid != undefined;
}
handleStatusError(code: number): string {
+2 -3
View File
@@ -54,9 +54,8 @@ export class TTSManager {
return;
}
const mod = (modRaw.default?.default ||
modRaw.default ||
modRaw) as TTSModule;
const mod = modRaw.default?.default || modRaw.default || modRaw;
if (!mod.name || typeof mod.generate !== 'function') {
this.log.warning('Invalid module format in %s', filePath);
return;
+2 -3
View File
@@ -8,13 +8,12 @@ export interface Config {
tts_azure_key: string | undefined;
tts_elevenlabs_key: string | undefined;
tts_tiktok_sessionid: string | undefined;
steam_webapi_key: string | undefined;
aws_access_id: string | undefined;
aws_access_key: string | undefined;
tiktok_session_id: string | undefined;
}
function loadConfig(): Config {
@@ -36,7 +35,7 @@ function loadConfig(): Config {
steam_webapi_key: process.env.STEAM_WEBAPI_KEY,
aws_access_id: process.env.AWS_ACCESS_ID,
aws_access_key: process.env.AWS_ACCESS_KEY,
tiktok_session_id: process.env.TIKTOK_SESSION_ID
tts_tiktok_sessionid: process.env.TTS_TIKTOK_SESSIONID
};
}