import requests
import time
import json
from pathlib import Path
class NanoGPTTranscriber:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://nano-gpt.com/api"
def transcribe(self, audio_path=None, audio_url=None, **kwargs):
"""
Transcribe audio with automatic method selection
"""
if audio_path and audio_url:
raise ValueError("Specify either audio_path or audio_url, not both")
if audio_path:
return self._transcribe_file(audio_path, **kwargs)
elif audio_url:
return self._transcribe_url(audio_url, **kwargs)
else:
raise ValueError("Either audio_path or audio_url must be provided")
def _transcribe_file(self, audio_path, **kwargs):
"""Direct file upload transcription"""
headers = {"x-api-key": self.api_key}
path = Path(audio_path)
if path.stat().st_size > 3 * 1024 * 1024: # 3MB
raise ValueError("File too large for direct upload. Use audio_url method.")
with open(audio_path, 'rb') as f:
files = {'audio': (path.name, f.read(), 'audio/mpeg')}
data = self._prepare_request_data(**kwargs)
response = requests.post(
f"{self.base_url}/transcribe",
headers=headers,
files=files,
data=data
)
return self._handle_response(response)
def _transcribe_url(self, audio_url, **kwargs):
"""URL-based transcription"""
headers = {
"x-api-key": self.api_key,
"Content-Type": "application/json"
}
data = {"audioUrl": audio_url}
data.update(self._prepare_request_data(**kwargs))
response = requests.post(
f"{self.base_url}/transcribe",
headers=headers,
json=data
)
return self._handle_response(response)
def _prepare_request_data(self, **kwargs):
"""Prepare request data with defaults"""
data = {
"model": kwargs.get("model", "Whisper-Large-V3"),
"language": kwargs.get("language", "auto")
}
# Add optional parameters
if kwargs.get("diarize"):
data["diarize"] = "true" if isinstance(kwargs["diarize"], bool) else kwargs["diarize"]
if kwargs.get("tagAudioEvents"):
data["tagAudioEvents"] = "true" if isinstance(kwargs["tagAudioEvents"], bool) else kwargs["tagAudioEvents"]
if kwargs.get("actualDuration"):
data["actualDuration"] = str(kwargs["actualDuration"])
return data
def _handle_response(self, response):
"""Handle API response"""
if response.status_code == 200:
return response.json()
elif response.status_code == 202:
return self._poll_async_job(response.json())
else:
try:
error_data = response.json()
raise Exception(f"API Error: {error_data.get('error', 'Unknown error')}")
except json.JSONDecodeError:
raise Exception(f"HTTP Error: {response.status_code}")
def _poll_async_job(self, job_data):
"""Poll for async job completion"""
headers = {
"x-api-key": self.api_key,
"Content-Type": "application/json"
}
status_data = {
"runId": job_data['runId'],
"cost": job_data.get('cost'),
"paymentSource": job_data.get('paymentSource'),
"isApiRequest": True,
"fileName": job_data.get('fileName'),
"fileSize": job_data.get('fileSize'),
"chargedDuration": job_data.get('chargedDuration'),
"diarize": job_data.get('diarize', False)
}
max_attempts = 60
for attempt in range(max_attempts):
time.sleep(5)
response = requests.post(
f"{self.base_url}/transcribe/status",
headers=headers,
json=status_data
)
if response.status_code == 200:
result = response.json()
if result.get('status') == 'completed':
return result
elif result.get('status') == 'failed':
raise Exception(f"Transcription failed: {result.get('error')}")
raise Exception("Transcription timed out")
def format_diarization(self, result):
"""Format transcription with speaker labels"""
if 'diarization' in result and 'segments' in result['diarization']:
segments = result['diarization']['segments']
return '\n\n'.join([
f"{seg['speaker']}: {seg['text']}"
for seg in segments
])
return result.get('transcription', '')
# Usage examples
transcriber = NanoGPTTranscriber("YOUR_API_KEY")
# Simple transcription
result = transcriber.transcribe(
audio_path="meeting.mp3",
model="Whisper-Large-V3",
language="en"
)
print("Transcription:", result['transcription'])
# Advanced with speaker diarization
result = transcriber.transcribe(
audio_url="https://example.com/conversation.mp3",
model="Elevenlabs-STT",
diarize=True,
tagAudioEvents=True
)
print("Formatted conversation:")
print(transcriber.format_diarization(result))