diff --git a/src/modules/tts-modes/polly.ts b/src/modules/tts-modes/polly.ts new file mode 100644 index 0000000..7846b80 --- /dev/null +++ b/src/modules/tts-modes/polly.ts @@ -0,0 +1,100 @@ +import { + PollyClient, + DescribeVoicesCommand, + Voice, + SynthesizeSpeechCommand, + Engine +} from '@aws-sdk/client-polly'; +import { TTSModule, TTSResponse } from '../tts'; +import { config } from '../../utils/config'; + +const ENGINE_PRIORITY: Engine[] = [ + 'generative', + 'neural', + 'standard', + 'long-form' +]; + +class PollyTTS implements TTSModule { + private client: PollyClient | undefined = undefined; + private voices: Array | undefined = undefined; + + public name: string = 'AWS Polly'; + + constructor() { + if (!config.aws_access_id || !config.aws_access_key) return; + + this.client = new PollyClient({ + credentials: { + accessKeyId: config.aws_access_id, + secretAccessKey: config.aws_access_key + } + }); + } + + async getVoices(): Promise | undefined> { + if (!this.client) return []; + + if (!this.voices) { + const cmd = new DescribeVoicesCommand({}); + + try { + const res = await this.client.send(cmd); + if (res.Voices) this.voices = res.Voices; + } catch (err) { + console.error('AWS Polly getVoices error:', err); + } + } + + if (this.voices) + return this.voices.map((voice) => `${voice.LanguageCode} ${voice.Id}`); + } + + async generate(voice: string, text: string): Promise { + if (!this.client || !this.voices) return { data: Buffer.from([]) }; + + voice = voice.split(' ').slice(1).join(' '); + const voiceData = this.voices.find((voiceDesc) => voiceDesc.Name == voice); + if (!voiceData) return {}; + + const bestEngine = this.getBestEngine(voiceData); + if (!bestEngine) return {}; + + const cmd = new SynthesizeSpeechCommand({ + Engine: bestEngine, + LanguageCode: voiceData.LanguageCode, + OutputFormat: 'mp3', + Text: text, + VoiceId: voiceData.Id + }); + + try { + const res = await this.client.send(cmd); + if (!res.AudioStream) return {}; + + const buffer = Buffer.from(await res.AudioStream.transformToByteArray()); + + return { data: buffer }; + } catch (err) { + console.error('AWS Polly gen error:', err); + } + + return {}; + } + + async canBeUsed(): Promise { + if (!config.aws_access_id || !config.aws_access_key) + return false; + return true; + } + + private getBestEngine(voice: Voice): Engine | null { + if (!voice.SupportedEngines || voice.SupportedEngines.length === 0) { + return null; + } + const supportedSet = new Set(voice.SupportedEngines); + return ENGINE_PRIORITY.find((engine) => supportedSet.has(engine)) || null; + } +} + +export default new PollyTTS(); diff --git a/src/utils/config.ts b/src/utils/config.ts index ed7a44f..3cb15bf 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -10,6 +10,9 @@ export interface Config { tts_elevenlabs_key: string | undefined; steam_webapi_key: string | undefined; + + aws_access_id: string | undefined; + aws_access_key: string | undefined; } function loadConfig(): Config { @@ -28,7 +31,9 @@ function loadConfig(): Config { tts_default_voice: process.env.DEFAULT_TTS_VOICE, tts_azure_key: process.env.TTS_AZURE_KEY, tts_elevenlabs_key: process.env.TTS_ELEVENLABS_KEY, - steam_webapi_key: process.env.STEAM_WEBAPI_KEY + steam_webapi_key: process.env.STEAM_WEBAPI_KEY, + aws_access_id: process.env.AWS_ACCESS_ID, + aws_access_key: process.env.AWS_ACCESS_KEY }; }