AudioPod AI’s Text to Speech service provides unified text-to-speech capabilities for both standard pre-built voices and custom voice clones. Generate speech with any voice in 60+ supported languages using our advanced AI models.
# 1. List available voices to get a voice UUIDcurl -X GET "https://api.audiopod.ai/api/v1/voice/voice-profiles?include_public=true&limit=10" \ -H "X-API-Key: $AUDIOPOD_API_KEY"# 2. Generate speech (replace VOICE_UUID with actual UUID from step 1)curl -X POST "https://api.audiopod.ai/api/v1/voice/voices/VOICE_UUID/generate" \ -H "X-API-Key: $AUDIOPOD_API_KEY" \ -H "Content-Type: application/x-www-form-urlencoded" \ -d "input_text=Hello, this is a test." \ -d "audio_format=mp3" \ -d "speed=1.0"# Returns: {"job_id": 1234, "status": "pending", ...}# 3. Poll job status until completedcurl -X GET "https://api.audiopod.ai/api/v1/voice/tts-jobs/1234/status" \ -H "X-API-Key: $AUDIOPOD_API_KEY"# Returns: {"status": "completed", "output_url": "https://...", ...}# 4. Download the audio from output_urlcurl -o output.mp3 "OUTPUT_URL_FROM_STEP_3"
Generate speech from text using any voice (standard or custom) by voice UUID, ID, or name. All generation is processed asynchronously with job tracking.
Python
Node.js
Raw HTTP
cURL
from audiopod import Clientimport requestsimport time# Initialize clientclient = Client()# Generate speech using voice UUIDjob = client.voice.generate_speech( voice_id="550e8400-e29b-41d4-a716-446655440000", # Voice UUID text="Hello! This is AudioPod AI generating natural speech.", audio_format="mp3", language="en", speed=1.0)print(f"Generation job created: {job.id}")print(f"Status: {job.status}")# Check job status until completionwhile True: status = client.voice.get_job_status(job.id) print(f"Job status: {status.status}") if status.status == 'completed': # Get output URL from job result if status.result and 'output_url' in status.result: audio_url = status.result['output_url'] print(f"Audio ready: {audio_url}") # Download the audio file audio_response = requests.get(audio_url) with open("generated_speech.mp3", "wb") as f: f.write(audio_response.content) print("Audio saved as generated_speech.mp3") break elif status.status == 'failed': print(f"Job failed: {status.error_message}") break time.sleep(2) # Wait 2 seconds before checking again# Alternative: Generate using voice name (for standard voices)job = client.voice.generate_speech( voice_id="aura", # Voice name text="Hello! This uses a standard voice by name.", audio_format="mp3", speed=1.0)
const { AudioPodClient } = require('audiopod-js');const fs = require('fs');// Initialize clientconst client = new AudioPodClient();async function generateSpeech() { try { // Generate speech using voice UUID const job = await client.voice.generateSpeech( '550e8400-e29b-41d4-a716-446655440000', // Voice ID/UUID 'Hello! This is AudioPod AI generating natural speech.', // Text { language: 'en', audioFormat: 'mp3', generationParams: { speed: 1.0 } } ); console.log(`Generation job created: ${job.id}`); console.log(`Status: ${job.status}`); // Check job status until completion while (true) { const status = await client.voice.getJobStatus(job.id); console.log(`Job status: ${status.status}`); if (status.status === 'completed') { const audioUrl = status.output_url; console.log(`Audio ready: ${audioUrl}`); // Download the audio file const fetch = require('node-fetch'); const audioResponse = await fetch(audioUrl); const buffer = await audioResponse.buffer(); fs.writeFileSync('generated_speech.mp3', buffer); console.log('Audio saved as generated_speech.mp3'); break; } else if (status.status === 'failed') { console.log(`Job failed: ${status.error_message}`); break; } await new Promise(resolve => setTimeout(resolve, 2000)); // Wait 2 seconds } } catch (error) { console.error('Error:', error.message); }}generateSpeech();
import requestsimport time# Create TTS job using voice UUIDresponse = requests.post( "https://api.audiopod.ai/api/v1/voice/voices/550e8400-e29b-41d4-a716-446655440000/generate", headers={"X-API-Key": api_key}, data={ "input_text": "Hello! This is AudioPod AI generating natural speech.", "audio_format": "mp3", "speed": 1.0, "language": "en" })if response.status_code == 200: job_data = response.json() job_id = job_data["job_id"] print(f"Voice generation job created: {job_id}") print(f"Credits reserved: {job_data.get('credits_reserved')}") # Poll job status until completion while True: status_response = requests.get( f"https://api.audiopod.ai/api/v1/voice/tts-jobs/{job_id}/status", headers={"X-API-Key": api_key} ) if status_response.status_code == 200: status_data = status_response.json() print(f"Job status: {status_data['status']}") if status_data['status'] == 'completed': audio_url = status_data['output_url'] print(f"Audio ready: {audio_url}") # Download the audio file audio_response = requests.get(audio_url) with open("generated_speech.mp3", "wb") as f: f.write(audio_response.content) print("Audio saved as generated_speech.mp3") break elif status_data['status'] == 'failed': print(f"Job failed: {status_data.get('error_message')}") break time.sleep(2) # Wait 2 seconds before checking again
# Create TTS job using voice UUIDJOB_RESPONSE=$(curl -s -X POST "https://api.audiopod.ai/api/v1/voice/voices/550e8400-e29b-41d4-a716-446655440000/generate" \ -H "X-API-Key: $AUDIOPOD_API_KEY" \ -H "Content-Type: application/x-www-form-urlencoded" \ -d 'input_text=Hello! This is AudioPod AI generating natural speech.' \ -d 'audio_format=mp3' \ -d 'speed=1.0' \ -d 'language=en')# Extract job IDJOB_ID=$(echo $JOB_RESPONSE | jq -r '.job_id')echo "Job created: $JOB_ID"# Poll job statuswhile true; do STATUS_RESPONSE=$(curl -s -X GET "https://api.audiopod.ai/api/v1/voice/tts-jobs/$JOB_ID/status" \ -H "X-API-Key: $AUDIOPOD_API_KEY") STATUS=$(echo $STATUS_RESPONSE | jq -r '.status') echo "Job status: $STATUS" if [ "$STATUS" = "completed" ]; then AUDIO_URL=$(echo $STATUS_RESPONSE | jq -r '.output_url') echo "Audio ready: $AUDIO_URL" # Download the audio curl -o generated_speech.mp3 "$AUDIO_URL" echo "Audio saved as generated_speech.mp3" break elif [ "$STATUS" = "failed" ]; then echo "Job failed" break fi sleep 2done
Parameters:
voice_id (required): Voice UUID, ID, or name from your voice collection
text (required): Text to convert to speech (max 5000 characters)
audio_format (optional): Output format - mp3, wav, ogg (default: mp3)
from audiopod import Clientclient = Client()# Using standard pre-built voice by namejob1 = client.voice.generate_speech( voice_identifier="aura", # Standard voice name input_text="Hello! This uses a standard pre-built voice.", audio_format="mp3", speed=1.0)print(f"Standard voice job: {job1.job_id}")# Using custom voice clone (by integer ID)job2 = client.voice.generate_speech( voice_identifier=123, # Your custom voice ID input_text="Hello! This uses my custom cloned voice.", audio_format="mp3", speed=1.0)print(f"Custom voice job: {job2.job_id}")# Using voice via UUID (works for both standard and custom voices)job3 = client.voice.generate_speech( voice_identifier="550e8400-e29b-41d4-a716-446655440000", # Voice UUID input_text="Hello! This uses voice identification via UUID.", audio_format="mp3", speed=1.0)print(f"UUID voice job: {job3.job_id}")# List available voicesvoices = client.voice.list_voices(include_public=True)print("Available voices:")for voice in voices: print(f"- {voice.name} (UUID: {voice.uuid}): {voice.description}")
const { AudioPodClient } = require('audiopod-js');const client = new AudioPodClient();async function demonstrateVoiceIdentification() { try { // Using standard pre-built voice by name const job1 = await client.voice.generateSpeech({ voiceIdentifier: 'aura', // Standard voice name inputText: 'Hello! This uses a standard pre-built voice.', audioFormat: 'mp3', speed: 1.0 }); console.log(`Standard voice job: ${job1.jobId}`); // Using custom voice clone (by integer ID) const job2 = await client.voice.generateSpeech({ voiceIdentifier: 123, // Your custom voice ID inputText: 'Hello! This uses my custom cloned voice.', audioFormat: 'mp3', speed: 1.0 }); console.log(`Custom voice job: ${job2.jobId}`); // Using voice via UUID (works for both standard and custom voices) const job3 = await client.voice.generateSpeech({ voiceIdentifier: '550e8400-e29b-41d4-a716-446655440000', // Voice UUID inputText: 'Hello! This uses voice identification via UUID.', audioFormat: 'mp3', speed: 1.0 }); console.log(`UUID voice job: ${job3.jobId}`); // List available voices const voices = await client.voice.listVoices({ includePublic: true }); console.log('Available voices:'); voices.forEach(voice => { console.log(`- ${voice.name} (UUID: ${voice.uuid}): ${voice.description}`); }); } catch (error) { console.error('Error:', error.message); }}demonstrateVoiceIdentification();
import requestsheaders = {"X-API-Key": api_key}# Generate with a standard voice by nameresponse1 = requests.post( "https://api.audiopod.ai/api/v1/voice/voices/aura/generate", headers=headers, data={ "input_text": "Hello! This uses a standard pre-built voice.", "audio_format": "mp3", "speed": 1.0 })print(f"Standard voice job: {response1.json()['job_id']}")# Generate with your custom voice by IDresponse2 = requests.post( "https://api.audiopod.ai/api/v1/voice/voices/123/generate", # Your custom voice ID headers=headers, data={ "input_text": "Hello! This uses my custom cloned voice.", "audio_format": "mp3", "speed": 1.0 })print(f"Custom voice job: {response2.json()['job_id']}")# Generate with voice using UUIDresponse3 = requests.post( "https://api.audiopod.ai/api/v1/voice/voices/550e8400-e29b-41d4-a716-446655440000/generate", headers=headers, data={ "input_text": "Hello! This uses voice identification via UUID.", "audio_format": "mp3", "speed": 1.0 })print(f"UUID voice job: {response3.json()['job_id']}")# List available voicesvoices_response = requests.get( "https://api.audiopod.ai/api/v1/voice/voice-profiles?include_public=true", headers=headers)voices = voices_response.json()print("Available voices:")for voice in voices: print(f"- {voice['name']} (UUID: {voice['uuid']}): {voice['description']}")
Use the voice profiles endpoint to discover all available voices:
# List all voices (including your custom voices)voices = client.voice.list_voices(include_public=True)# Filter by voice typestandard_voices = [v for v in voices if v.voice_type == "standard"]custom_voices = [v for v in voices if v.voice_type == "custom"]print(f"Found {len(standard_voices)} standard voices")print(f"Found {len(custom_voices)} custom voices")
# Generate with custom settingsjob = client.voice.generate_speech( voice_identifier="aura", input_text="Customized voice output with speed control", audio_format="wav", # High quality WAV format speed=1.1, # 10% faster than normal language="en" # English language)
AudioPod AI automatically detects the language of your input text, but you can specify it explicitly for better results:
# Automatic language detectionjob1 = client.voice.generate_speech( voice_identifier="aura", input_text="Hello, how are you today?" # Language will be auto-detected as English)# Explicit language specificationjob2 = client.voice.generate_speech( voice_identifier="aura", input_text="Bonjour, comment allez-vous?", language="fr" # Specify French)
# Get supported languages for a specific voicesupported_languages = client.voice.get_supported_languages(voice_identifier="aura")print(f"Voice supports {len(supported_languages)} languages:")for code, name in supported_languages.items(): print(f"- {code}: {name}")
from audiopod import Clientimport reclient = Client()def split_text_by_sentences(text, max_length=2000): """Split text into chunks by sentences, respecting max length""" sentences = re.split(r'[.!?]+', text) chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk + sentence) > max_length: if current_chunk: chunks.append(current_chunk.strip()) current_chunk = sentence else: current_chunk += sentence + ". " if current_chunk: chunks.append(current_chunk.strip()) return chunksdef create_audiobook_chapter(chapter_text, narrator_voice="sage"): """Create audiobook chapter with professional narration""" # Split long text into manageable chunks chunks = split_text_by_sentences(chapter_text, max_length=2000) audio_urls = [] for i, chunk in enumerate(chunks): print(f"Processing chunk {i+1}/{len(chunks)}: {chunk[:50]}...") result = client.voice.generate_speech( voice_id=narrator_voice, text=chunk, audio_format="wav", # High quality for audiobooks speed=0.95, # Slightly slower for audiobooks wait_for_completion=True ) audio_urls.append(result.output_url) print(f"Generated audio for chunk {i+1}") return audio_urls# Example usagechapter_text = """Chapter 1: The BeginningIt was the best of times, it was the worst of times. The era was filled withcontradictions that would shape the destiny of nations. In this tumultuousperiod, heroes would rise and fall, love would conquer fear, and the veryfabric of society would be tested by forces beyond imagination.As dawn broke over the ancient city, our protagonist began a journey thatwould change everything they thought they knew about the world."""audio_urls = create_audiobook_chapter(chapter_text, "sage")print(f"Audiobook chapter complete! Generated {len(audio_urls)} audio segments.")
const { AudioPodClient } = require('audiopod-js');const client = new AudioPodClient();function splitTextBySentences(text, maxLength = 2000) { const sentences = text.split(/[.!?]+/); const chunks = []; let currentChunk = ''; for (const sentence of sentences) { if ((currentChunk + sentence).length > maxLength) { if (currentChunk) { chunks.push(currentChunk.trim()); } currentChunk = sentence; } else { currentChunk += sentence + '. '; } } if (currentChunk) { chunks.push(currentChunk.trim()); } return chunks;}async function createAudiobookChapter(chapterText, narratorVoice = 'sage') { try { // Split long text into manageable chunks const chunks = splitTextBySentences(chapterText, 2000); const audioUrls = []; for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]; console.log(`Processing chunk ${i+1}/${chunks.length}: ${chunk.substring(0, 50)}...`); const result = await client.voice.generateSpeech({ voiceId: narratorVoice, text: chunk, audioFormat: 'wav', // High quality for audiobooks speed: 0.95, // Slightly slower for audiobooks waitForCompletion: true }); audioUrls.push(result.outputUrl); console.log(`Generated audio for chunk ${i+1}`); } return audioUrls; } catch (error) { console.error('Error creating audiobook:', error.message); throw error; }}// Example usageconst chapterText = `Chapter 1: The BeginningIt was the best of times, it was the worst of times. The era was filled withcontradictions that would shape the destiny of nations. In this tumultuousperiod, heroes would rise and fall, love would conquer fear, and the veryfabric of society would be tested by forces beyond imagination.As dawn broke over the ancient city, our protagonist began a journey thatwould change everything they thought they knew about the world.`;createAudiobookChapter(chapterText, 'sage') .then(audioUrls => { console.log(`Audiobook chapter complete! Generated ${audioUrls.length} audio segments.`); audioUrls.forEach((url, index) => { console.log(`Segment ${index + 1}: ${url}`); }); }) .catch(error => { console.error('Failed to create audiobook:', error); });
from audiopod import Clientimport asyncioimport aiohttpimport jsonclient = Client()async def create_audiobook_chapter_parallel(chapter_text, narrator_voice="sage"): """Create audiobook with parallel processing for faster generation""" chunks = split_text_by_sentences(chapter_text, max_length=2000) # Create all jobs first (parallel creation) jobs = [] for chunk in chunks: job = client.voice.generate_speech( voice_id=narrator_voice, text=chunk, audio_format="wav", speed=0.95, wait_for_completion=False # Don't wait, create jobs in parallel ) jobs.append(job) print(f"Created {len(jobs)} generation jobs") # Wait for all jobs to complete audio_urls = [] for i, job in enumerate(jobs): print(f"Waiting for job {i+1}/{len(jobs)}") # Poll job status while True: status = client.voice.get_job_status(job.id) if status.status == 'completed': audio_urls.append(status.output_url) break elif status.status == 'failed': print(f"Job {job.id} failed: {status.error_message}") break await asyncio.sleep(2) return audio_urls# Example with error handling and progress trackingdef create_audiobook_with_progress(chapter_text, narrator_voice="sage"): """Create audiobook with detailed progress tracking""" chunks = split_text_by_sentences(chapter_text, max_length=2000) total_chunks = len(chunks) completed_chunks = 0 audio_urls = [] print(f"Starting audiobook generation: {total_chunks} chunks") for i, chunk in enumerate(chunks): try: print(f"[{i+1}/{total_chunks}] Processing: {chunk[:50]}...") result = client.voice.generate_speech( voice_id=narrator_voice, text=chunk, audio_format="wav", speed=0.95, wait_for_completion=True ) audio_urls.append(result.output_url) completed_chunks += 1 progress = (completed_chunks / total_chunks) * 100 print(f"[{i+1}/{total_chunks}] ✅ Complete ({progress:.1f}%)") except Exception as e: print(f"[{i+1}/{total_chunks}] ❌ Failed: {e}") # You could implement retry logic here print(f"Audiobook generation complete! {completed_chunks}/{total_chunks} chunks successful") return audio_urls
intro_script = '''<speak> <p><emphasis level="strong">Welcome back to the Tech Talk Podcast!</emphasis></p> <break time="1s"/> <p>I'm your host, and today we're diving deep into <emphasis>artificial intelligence</emphasis> and its impact on content creation.</p> <break time="0.5s"/> <p>Let's get started!</p></speak>'''intro_audio = client.text_to_speech.create( text=intro_script, voice_id="ava", text_format="ssml", emotion="professional", quality="high")
ivr_prompts = { "welcome": "Thank you for calling AudioPod AI. Your call is important to us.", "menu": "Press 1 for sales, 2 for support, or 3 for billing.", "hold": "Please hold while we connect you to the next available agent."}for prompt_name, text in ivr_prompts.items(): audio = client.text_to_speech.create( text=text, voice_id="ava", # Professional voice for business quality="standard", # Lower quality for phone systems sample_rate=8000, # Phone quality output_format="wav" ) with open(f"ivr_{prompt_name}.wav", "wb") as f: f.write(audio.audio_data)
# Efficient: Single request for multiple sentenceslong_text = "First sentence. Second sentence. Third sentence."job = client.voice.generate_speech( voice_identifier="aura", input_text=long_text, audio_format="mp3")# Inefficient: Multiple requests for short texts# This creates multiple jobs and uses more credits due to per-job overheadsentences = ["First sentence.", "Second sentence.", "Third sentence."]for sentence in sentences: job = client.voice.generate_speech( voice_identifier="aura", input_text=sentence, audio_format="mp3" )
Causes: - Text too long (>5000 characters) - Invalid characters or
encoding - Empty text field Solutions: - Split long text into chunks -
Check text encoding (UTF-8) - Validate text is not empty
404 Not Found - Invalid Voice
Causes: - Voice identifier doesn’t exist - Voice not accessible by user
Voice UUID format invalid Solutions: - Check available voices with voice profiles endpoint -
Verify voice UUID format - Ensure voice is public or owned by user
402 Payment Required - Insufficient Credits
Causes: - Not enough credits for audio generation - Credit limit exceeded
Solutions: - Check credit balance - Purchase more credits - Wait for credit reset
429 Too Many Requests
Causes: - Rate limit exceeded - Too many concurrent requests
Solutions: - Implement exponential backoff - Use request queuing -
Upgrade to higher rate limits