import requests
import json
from pathlib import Path
class NanoGPTTTS:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://nano-gpt.com/api"
# Model capabilities
self.model_info = {
"Kokoro-82m": {
"cost_per_1k": 0.001,
"max_chars": 10000,
"supports_speed": True,
"output_format": "wav",
"binary_response": False
},
"Elevenlabs-Turbo-V2.5": {
"cost_per_1k": 0.06,
"max_chars": 10000,
"supports_speed": True,
"output_format": "mp3",
"binary_response": False,
"supports_voice_controls": True
},
"tts-1": {
"cost_per_1k": 0.015,
"max_chars": 4096,
"supports_speed": True,
"binary_response": True,
"supports_formats": True
},
"tts-1-hd": {
"cost_per_1k": 0.030,
"max_chars": 4096,
"supports_speed": True,
"binary_response": True,
"supports_formats": True,
"supports_instructions": True
},
"gpt-4o-mini-tts": {
"cost_per_1k": 0.0006,
"max_chars": 4096,
"supports_speed": False, # Speed ignored
"binary_response": True,
"supports_formats": True,
"supports_instructions": True
}
}
def synthesize(self, text, model="Kokoro-82m", output_file=None, **kwargs):
"""
Main synthesis method with automatic parameter handling
"""
# Validate inputs
self._validate_request(text, model, **kwargs)
# Prepare request
headers = {
"x-api-key": self.api_key,
"Content-Type": "application/json"
}
payload = self._build_payload(text, model, **kwargs)
# Make request
response = requests.post(
f"{self.base_url}/tts",
headers=headers,
json=payload
)
if response.status_code == 200:
return self._handle_response(response, model, output_file)
else:
self._handle_error(response)
def _validate_request(self, text, model, **kwargs):
"""Validate request parameters"""
if not text.strip():
raise ValueError("Text cannot be empty")
if model not in self.model_info:
raise ValueError(f"Unsupported model: {model}")
model_config = self.model_info[model]
if len(text) > model_config["max_chars"]:
raise ValueError(f"Text too long for {model}. Max: {model_config['max_chars']} chars")
# Validate speed parameter
if kwargs.get("speed") and not model_config.get("supports_speed", True):
print(f"Warning: Speed parameter ignored for {model}")
def _build_payload(self, text, model, **kwargs):
"""Build request payload based on model capabilities"""
payload = {
"text": text,
"model": model
}
model_config = self.model_info[model]
# Add voice if specified
if kwargs.get("voice"):
payload["voice"] = kwargs["voice"]
# Add speed if supported
if kwargs.get("speed") and model_config.get("supports_speed"):
payload["speed"] = kwargs["speed"]
# Add OpenAI-specific parameters
if model.startswith(("tts-", "gpt-")):
if kwargs.get("response_format"):
payload["response_format"] = kwargs["response_format"]
if kwargs.get("instructions") and model_config.get("supports_instructions"):
payload["instructions"] = kwargs["instructions"]
# Add Elevenlabs-specific parameters
elif model == "Elevenlabs-Turbo-V2.5":
for param in ["stability", "similarity_boost", "style"]:
if kwargs.get(param) is not None:
payload[param] = kwargs[param]
return payload
def _handle_response(self, response, model, output_file):
"""Handle different response types"""
model_config = self.model_info[model]
if model_config.get("binary_response"):
# Binary audio data (OpenAI models)
audio_data = response.content
if output_file:
with open(output_file, 'wb') as f:
f.write(audio_data)
return {"audio_file": output_file, "size": len(audio_data)}
else:
return {"audio_data": audio_data, "size": len(audio_data)}
else:
# JSON response with URL
data = response.json()
if output_file:
# Download and save audio
audio_response = requests.get(data['audioUrl'])
with open(output_file, 'wb') as f:
f.write(audio_response.content)
data["local_file"] = output_file
return data
def _handle_error(self, response):
"""Handle API errors"""
try:
error_data = response.json()
error_msg = error_data.get('error', 'Unknown error')
except:
error_msg = f"HTTP {response.status_code}"
if response.status_code == 400:
raise ValueError(f"Bad request: {error_msg}")
elif response.status_code == 401:
raise ValueError("Unauthorized: Check your API key")
elif response.status_code == 402:
raise ValueError("Insufficient balance")
elif response.status_code == 413:
raise ValueError("Text too long")
else:
raise Exception(f"API Error: {error_msg}")
def get_model_info(self, model=None):
"""Get information about available models"""
if model:
return self.model_info.get(model, {})
return self.model_info
def batch_synthesize(self, texts, model="Kokoro-82m", **kwargs):
"""Synthesize multiple texts"""
results = []
for i, text in enumerate(texts):
try:
output_file = f"batch_output_{i+1}.wav" if kwargs.get("save_files") else None
result = self.synthesize(text, model, output_file, **kwargs)
results.append({"index": i, "success": True, "result": result})
except Exception as e:
results.append({"index": i, "success": False, "error": str(e)})
return results
# Usage examples
tts = NanoGPTTTS("YOUR_API_KEY")
# Simple usage
result = tts.synthesize(
"Hello world!",
model="Kokoro-82m",
voice="af_bella",
output_file="hello.wav"
)
# Advanced Elevenlabs usage
result = tts.synthesize(
"This is an expressive voice demonstration!",
model="Elevenlabs-Turbo-V2.5",
voice="Rachel",
stability=0.3,
similarity_boost=0.8,
style=0.7,
speed=1.1,
output_file="expressive.mp3"
)
# OpenAI with instructions
result = tts.synthesize(
"Welcome to our premium service.",
model="tts-1-hd",
voice="nova",
instructions="Speak like a luxury brand representative",
response_format="flac",
output_file="premium.flac"
)
# Batch processing
texts = [
"First audio clip.",
"Second audio clip.",
"Third audio clip."
]
batch_results = tts.batch_synthesize(
texts,
model="gpt-4o-mini-tts",
voice="alloy",
save_files=True
)
for result in batch_results:
if result["success"]:
print(f"Generated file {result['index']}: {result['result'].get('local_file')}")
else:
print(f"Failed file {result['index']}: {result['error']}")