Compare commits

..

10 Commits

Author SHA1 Message Date
YuanHui
1fa8bdec1d fix docker build problem
Some checks failed
CodeQL / Analyze (python) (push) Failing after 10m45s
Check code quality / Lint (push) Failing after 1h0m6s
2025-12-02 13:11:48 +08:00
YuanHui
9d1429eeb2 add web ui 2025-12-02 12:22:06 +08:00
rany2
27d6248833 Bump edge_tts version to 7.2.3 (#418)
Signed-off-by: rany <rany2@riseup.net>
2025-08-29 00:11:57 +03:00
rany2
6630cb31f6 Set Edge Browser version to 140.0.3485.14 (#417)
Signed-off-by: rany <rany2@riseup.net>
2025-08-29 00:10:38 +03:00
rany2
4c363a650c Make edge-playback quiet when playback tts result (#415)
* Make edge-playback quiet when playback tts result
* Only show duration and captions

Signed-off-by: rany <rany2@riseup.net>
Co-authored-by: Xing Shi Cai <newptcai@tutanota.com>
2025-08-28 20:06:16 +03:00
rany2
d78d1faf66 Fix __list_voices type hint issue (#414)
Signed-off-by: rany <rany2@riseup.net>
2025-08-28 19:35:05 +03:00
rany2
065ccd05a6 Bump edge_tts version to 7.2.2 (#413)
Signed-off-by: rany <rany2@riseup.net>
2025-08-28 19:27:25 +03:00
rany2
bfc54f4acd Update API endpoints used by Edge TTS (#412)
Fixes https://github.com/rany2/edge-tts/issues/411

Signed-off-by: rany <rany2@riseup.net>
2025-08-28 19:26:07 +03:00
rany2
7c5eb17a14 Bump edge_tts version to 7.2.1 (#405)
Signed-off-by: rany <rany2@riseup.net>
2025-08-20 17:26:19 +03:00
rany2
cbd375c8e3 Fix Python 3.7 compatibility issue (#404)
Fixes https://github.com/rany2/edge-tts/issues/403

Signed-off-by: rany <rany2@riseup.net>
2025-08-20 17:25:38 +03:00
32 changed files with 3302 additions and 117 deletions

1
.gitignore vendored
View File

@@ -163,3 +163,4 @@ cython_debug/
*.mp3
*.srt
/.idea/
.DS_Store

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.12

6
main.py Normal file
View File

@@ -0,0 +1,6 @@
def main():
print("Hello from edge-tts!")
if __name__ == "__main__":
main()

7
pyproject.toml Normal file
View File

@@ -0,0 +1,7 @@
[project]
name = "edge-tts"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = []

View File

@@ -6,13 +6,12 @@ import subprocess
import sys
import tempfile
from shutil import which
from typing import List, Optional, Tuple
from .util import pr_err
def _main() -> None:
depcheck_failed = False
def _parse_args() -> Tuple[bool, List[str]]:
parser = argparse.ArgumentParser(
prog="edge-playback",
description="Speak text using Microsoft Edge's online text-to-speech API",
@@ -24,9 +23,12 @@ def _main() -> None:
help="Use mpv to play audio. By default, false on Windows and true on all other platforms",
)
args, tts_args = parser.parse_known_args()
use_mpv = sys.platform != "win32" or args.mpv
return use_mpv, tts_args
def _check_deps(use_mpv: bool) -> None:
depcheck_failed = False
deps = ["edge-tts"]
if use_mpv:
deps.append("mpv")
@@ -40,54 +42,88 @@ def _main() -> None:
pr_err("Please install the missing dependencies.")
sys.exit(1)
def _create_temp_files(
use_mpv: bool, mp3_fname: Optional[str], srt_fname: Optional[str], debug: bool
) -> Tuple[str, Optional[str]]:
media = subtitle = None
if not mp3_fname:
media = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
media.close()
mp3_fname = media.name
if debug:
print(f"Media file: {mp3_fname}")
if not srt_fname and use_mpv:
subtitle = tempfile.NamedTemporaryFile(suffix=".srt", delete=False)
subtitle.close()
srt_fname = subtitle.name
if debug and srt_fname:
print(f"Subtitle file: {srt_fname}\n")
return mp3_fname, srt_fname
def _run_edge_tts(
mp3_fname: str, srt_fname: Optional[str], tts_args: List[str]
) -> None:
edge_tts_cmd = ["edge-tts", f"--write-media={mp3_fname}"]
if srt_fname:
edge_tts_cmd.append(f"--write-subtitles={srt_fname}")
edge_tts_cmd = edge_tts_cmd + tts_args
with subprocess.Popen(edge_tts_cmd) as process:
process.communicate()
def _play_media(use_mpv: bool, mp3_fname: str, srt_fname: Optional[str]) -> None:
if sys.platform == "win32" and not use_mpv:
# pylint: disable-next=import-outside-toplevel
from .win32_playback import play_mp3_win32
play_mp3_win32(mp3_fname)
return
mpv_cmd = [
"mpv",
"--msg-level=all=error,statusline=status",
]
if srt_fname:
mpv_cmd.append(f"--sub-file={srt_fname}")
mpv_cmd.append(mp3_fname)
with subprocess.Popen(mpv_cmd) as process:
process.communicate()
def _cleanup(mp3_fname: Optional[str], srt_fname: Optional[str], keep: bool) -> None:
if keep and mp3_fname is not None:
print(f"\nKeeping temporary files: {mp3_fname}", end="")
if srt_fname:
print(f" and {srt_fname}", end="")
print()
return
if mp3_fname is not None and os.path.exists(mp3_fname):
os.unlink(mp3_fname)
if srt_fname is not None and os.path.exists(srt_fname):
os.unlink(srt_fname)
def _main() -> None:
use_mpv, tts_args = _parse_args()
_check_deps(use_mpv)
debug = os.environ.get("EDGE_PLAYBACK_DEBUG") is not None
keep = os.environ.get("EDGE_PLAYBACK_KEEP_TEMP") is not None
mp3_fname = os.environ.get("EDGE_PLAYBACK_MP3_FILE")
srt_fname = os.environ.get("EDGE_PLAYBACK_SRT_FILE")
media, subtitle = None, None
try:
if not mp3_fname:
media = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
media.close()
mp3_fname = media.name
if not srt_fname and use_mpv:
subtitle = tempfile.NamedTemporaryFile(suffix=".srt", delete=False)
subtitle.close()
srt_fname = subtitle.name
print(f"Media file: {mp3_fname}")
if srt_fname:
print(f"Subtitle file: {srt_fname}\n")
edge_tts_cmd = ["edge-tts", f"--write-media={mp3_fname}"]
if srt_fname:
edge_tts_cmd.append(f"--write-subtitles={srt_fname}")
edge_tts_cmd = edge_tts_cmd + tts_args
with subprocess.Popen(edge_tts_cmd) as process:
process.communicate()
if sys.platform == "win32" and not use_mpv:
# pylint: disable-next=import-outside-toplevel
from .win32_playback import play_mp3_win32
play_mp3_win32(mp3_fname)
else:
with subprocess.Popen(
[
"mpv",
f"--sub-file={srt_fname}",
mp3_fname,
]
) as process:
process.communicate()
mp3_fname, srt_fname = _create_temp_files(use_mpv, mp3_fname, srt_fname, debug)
_run_edge_tts(mp3_fname, srt_fname, tts_args)
_play_media(use_mpv, mp3_fname, srt_fname)
finally:
if keep:
print(f"\nKeeping temporary files: {mp3_fname} and {srt_fname}")
else:
if mp3_fname is not None and os.path.exists(mp3_fname):
os.unlink(mp3_fname)
if srt_fname is not None and os.path.exists(srt_fname):
os.unlink(srt_fname)
_cleanup(mp3_fname, srt_fname, keep)
if __name__ == "__main__":

View File

@@ -16,7 +16,6 @@ from typing import (
Dict,
Generator,
List,
Literal,
Optional,
Tuple,
Union,
@@ -25,6 +24,7 @@ from xml.sax.saxutils import escape, unescape
import aiohttp
import certifi
from typing_extensions import Literal
from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL
from .data_classes import TTSConfig
@@ -433,9 +433,9 @@ class Communicate:
trust_env=True,
timeout=self.session_timeout,
) as session, session.ws_connect(
f"{WSS_URL}&Sec-MS-GEC={DRM.generate_sec_ms_gec()}"
f"&Sec-MS-GEC-Version={SEC_MS_GEC_VERSION}"
f"&ConnectionId={connect_id()}",
f"{WSS_URL}&ConnectionId={connect_id()}"
f"&Sec-MS-GEC={DRM.generate_sec_ms_gec()}"
f"&Sec-MS-GEC-Version={SEC_MS_GEC_VERSION}",
compress=15,
proxy=self.proxy,
headers=WSS_HEADERS,

View File

@@ -1,14 +1,18 @@
"""Constants for the edge_tts package."""
BASE_URL = "speech.platform.bing.com/consumer/speech/synthesize/readaloud"
BASE_URL = "api.msedgeservices.com/tts/cognitiveservices"
TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"
WSS_URL = f"wss://{BASE_URL}/edge/v1?TrustedClientToken={TRUSTED_CLIENT_TOKEN}"
VOICE_LIST = f"https://{BASE_URL}/voices/list?trustedclienttoken={TRUSTED_CLIENT_TOKEN}"
WSS_URL = (
f"wss://{BASE_URL}/websocket/v1?Ocp-Apim-Subscription-Key={TRUSTED_CLIENT_TOKEN}"
)
VOICE_LIST = (
f"https://{BASE_URL}/voices/list?Ocp-Apim-Subscription-Key={TRUSTED_CLIENT_TOKEN}"
)
DEFAULT_VOICE = "en-US-EmmaMultilingualNeural"
CHROMIUM_FULL_VERSION = "130.0.2849.68"
CHROMIUM_FULL_VERSION = "140.0.3485.14"
CHROMIUM_MAJOR_VERSION = CHROMIUM_FULL_VERSION.split(".", maxsplit=1)[0]
SEC_MS_GEC_VERSION = f"1-{CHROMIUM_FULL_VERSION}"
BASE_HEADERS = {
@@ -22,6 +26,8 @@ WSS_HEADERS = {
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Sec-WebSocket-Protocol": "synthesize",
"Sec-WebSocket-Version": "13",
}
WSS_HEADERS.update(BASE_HEADERS)
VOICE_HEADERS = {

View File

@@ -5,7 +5,8 @@
import argparse
import re
from dataclasses import dataclass
from typing import Literal
from typing_extensions import Literal
@dataclass

View File

@@ -20,49 +20,8 @@ class TTSChunk(TypedDict):
class VoiceTag(TypedDict):
"""VoiceTag data."""
ContentCategories: List[
Literal[
"Cartoon",
"Conversation",
"Copilot",
"Dialect",
"General",
"News",
"Novel",
"Sports",
]
]
VoicePersonalities: List[
Literal[
"Approachable",
"Authentic",
"Authority",
"Bright",
"Caring",
"Casual",
"Cheerful",
"Clear",
"Comfort",
"Confident",
"Considerate",
"Conversational",
"Cute",
"Expressive",
"Friendly",
"Honest",
"Humorous",
"Lively",
"Passion",
"Pleasant",
"Positive",
"Professional",
"Rational",
"Reliable",
"Sincere",
"Sunshine",
"Warm",
]
]
ContentCategories: List[str]
VoicePersonalities: List[str]
class Voice(TypedDict):
@@ -70,11 +29,13 @@ class Voice(TypedDict):
Name: str
ShortName: str
Gender: Literal["Female", "Male"]
DisplayName: str
LocalName: str
LocaleName: str
Locale: str
SuggestedCodec: Literal["audio-24khz-48kbitrate-mono-mp3"]
FriendlyName: str
Status: Literal["GA"]
Gender: Literal["Female", "Male"]
WordsPerMinute: str
Status: Literal["Deprecated", "GA", "Preview"]
VoiceTag: VoiceTag

View File

@@ -1,4 +1,4 @@
"""Version information for the edge_tts package."""
__version__ = "7.2.0"
__version__ = "7.2.3"
__version_info__ = tuple(int(num) for num in __version__.split("."))

View File

@@ -3,7 +3,7 @@ correct voice based on their attributes."""
import json
import ssl
from typing import List, Optional
from typing import Any, List, Optional
import aiohttp
import certifi
@@ -38,20 +38,17 @@ async def __list_voices(
ssl=ssl_ctx,
raise_for_status=True,
) as url:
data: List[Voice] = json.loads(await url.text())
data: List[Any] = json.loads(await url.text())
for voice in data:
# Remove leading and trailing whitespace from categories and personalities.
# This has only happened in one case with the zh-CN-YunjianNeural voice
# where there was a leading space in one of the categories.
voice["VoiceTag"]["ContentCategories"] = [
category.strip() # type: ignore
for category in voice["VoiceTag"]["ContentCategories"]
]
voice["VoiceTag"]["VoicePersonalities"] = [
personality.strip() # type: ignore
for personality in voice["VoiceTag"]["VoicePersonalities"]
]
if "VoiceTag" not in voice:
voice["VoiceTag"] = {}
if "ContentCategories" not in voice["VoiceTag"]:
voice["VoiceTag"]["ContentCategories"] = []
if "VoicePersonalities" not in voice["VoiceTag"]:
voice["VoiceTag"]["VoicePersonalities"] = []
return data

53
web/.dockerignore Normal file
View File

@@ -0,0 +1,53 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
dist/
build/
# Virtual environments
venv/
env/
ENV/
.venv/
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Git
.git/
.gitignore
# Documentation
README.md
QUICKSTART.md
*.md
# Build scripts
build.sh
deploy.sh
create_icons.py
create_icons.sh
# Test files
test_*.py
*_test.py
# Logs
*.log
# Temporary files
*.tmp
*.mp3
*.wav
*.srt

26
web/.gitignore vendored Normal file
View File

@@ -0,0 +1,26 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
ENV/
# IDEs
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Temporary files
*.mp3
*.wav
*.srt
create_icons.py
create_icons.sh

435
web/DEPLOYMENT.md Normal file
View File

@@ -0,0 +1,435 @@
# Edge TTS Web UI - Docker Deployment Guide
Complete guide for deploying Edge TTS Web UI to a remote server using Docker.
## 📋 Prerequisites
### Local Machine
- Docker installed
- Docker Compose installed
- SSH access to remote server
- rsync (for deployment script)
### Remote Server
- Linux server (Ubuntu 20.04+ recommended)
- Docker installed
- Docker Compose installed
- Port 8000 open (or configure different port)
- Minimum 512MB RAM
- SSH access configured
## 🚀 Quick Start
### Option 1: Local Testing
```bash
cd web
# Build the image
./build.sh
# Start with docker-compose
docker-compose up -d
# Check logs
docker-compose logs -f
# Access at http://localhost:8000
```
### Option 2: Remote Deployment
```bash
cd web
# Deploy to remote server
REMOTE_HOST=192.168.1.100 ./deploy.sh
# Or with custom user and path
REMOTE_HOST=myserver.com REMOTE_USER=deployer REMOTE_PATH=/opt/edge-tts ./deploy.sh
```
## 📦 Building the Docker Image
### Build Locally
```bash
# Build with default tag (latest)
./build.sh
# Build with custom tag
./build.sh v1.0.0
```
### Manual Build
```bash
docker build -t edge-tts-web:latest .
```
## 🏃 Running the Container
### Using Docker Compose (Recommended)
```bash
# Start in background
docker-compose up -d
# View logs
docker-compose logs -f
# Stop
docker-compose down
# Restart
docker-compose restart
# View status
docker-compose ps
```
### Using Docker CLI
```bash
# Run container
docker run -d \
--name edge-tts-web \
-p 8000:8000 \
--restart unless-stopped \
edge-tts-web:latest
# View logs
docker logs -f edge-tts-web
# Stop container
docker stop edge-tts-web
# Remove container
docker rm edge-tts-web
```
## 🌐 Remote Server Deployment
### Step-by-Step Manual Deployment
#### 1. Install Docker on Remote Server
```bash
# SSH into server
ssh user@your-server.com
# Install Docker
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
# Install Docker Compose
sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
# Add user to docker group (optional)
sudo usermod -aG docker $USER
```
#### 2. Copy Files to Server
```bash
# Create deployment directory
ssh user@your-server.com 'mkdir -p /opt/edge-tts'
# Copy files (from local machine)
cd web
rsync -avz --exclude='venv' --exclude='.git' \
./ user@your-server.com:/opt/edge-tts/
```
#### 3. Build and Start on Server
```bash
# SSH into server
ssh user@your-server.com
# Navigate to deployment directory
cd /opt/edge-tts
# Build and start
docker-compose up -d
# Check status
docker-compose ps
docker-compose logs -f
```
### Automated Deployment Script
The `deploy.sh` script automates the entire deployment process:
```bash
# Basic usage
REMOTE_HOST=192.168.1.100 ./deploy.sh
# With custom configuration
REMOTE_HOST=myserver.com \
REMOTE_USER=deployer \
REMOTE_PATH=/home/deployer/edge-tts \
./deploy.sh
```
**What the script does:**
1. ✅ Checks SSH connectivity
2. ✅ Creates remote directory
3. ✅ Copies all files to server
4. ✅ Stops existing containers
5. ✅ Builds new Docker image
6. ✅ Starts containers
7. ✅ Shows deployment status
## 🔧 Configuration
### Environment Variables
Create a `.env` file for custom configuration:
```bash
# .env
PYTHONUNBUFFERED=1
# Add other environment variables as needed
```
### Custom Port
To use a different port, edit `docker-compose.yml`:
```yaml
ports:
- "3000:8000" # Host:Container
```
### Resource Limits
Add resource limits in `docker-compose.yml`:
```yaml
services:
edge-tts-web:
# ... other config
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
reservations:
cpus: '0.25'
memory: 256M
```
## 🔒 Security Best Practices
### 1. Use Non-Root User
The Dockerfile already creates a non-root user (`appuser`)
### 2. Reverse Proxy with SSL
Use Nginx or Traefik as reverse proxy:
**Nginx example:**
```nginx
server {
listen 80;
server_name tts.yourdomain.com;
location / {
proxy_pass http://localhost:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
}
```
**With Let's Encrypt SSL:**
```bash
sudo apt install certbot python3-certbot-nginx
sudo certbot --nginx -d tts.yourdomain.com
```
### 3. Firewall Configuration
```bash
# Allow only necessary ports
sudo ufw allow 22/tcp # SSH
sudo ufw allow 80/tcp # HTTP
sudo ufw allow 443/tcp # HTTPS
sudo ufw enable
```
### 4. Docker Socket Security
Don't expose Docker socket unnecessarily. The current setup doesn't require it.
## 📊 Monitoring
### Check Container Status
```bash
# Using docker-compose
docker-compose ps
# Using docker CLI
docker ps
```
### View Logs
```bash
# All logs
docker-compose logs
# Follow logs (real-time)
docker-compose logs -f
# Last 100 lines
docker-compose logs --tail=100
# Specific service
docker-compose logs edge-tts-web
```
### Health Checks
The container includes a health check that runs every 30 seconds:
```bash
# Check health status
docker inspect edge-tts-web --format='{{.State.Health.Status}}'
# View health check logs
docker inspect edge-tts-web --format='{{json .State.Health}}' | jq
```
## 🔄 Updates and Maintenance
### Update Application
```bash
# Pull latest changes
git pull
# Rebuild and restart
docker-compose up -d --build
# Or use deployment script
REMOTE_HOST=your-server.com ./deploy.sh
```
### Backup and Restore
```bash
# Backup (no persistent data currently)
# If you add persistent data, use Docker volumes
# Create volume backup
docker run --rm -v edge-tts-data:/data -v $(pwd):/backup \
alpine tar czf /backup/edge-tts-backup.tar.gz /data
# Restore volume
docker run --rm -v edge-tts-data:/data -v $(pwd):/backup \
alpine tar xzf /backup/edge-tts-backup.tar.gz -C /
```
### Clean Up
```bash
# Stop and remove containers
docker-compose down
# Remove images
docker rmi edge-tts-web:latest
# Clean up unused resources
docker system prune -a
```
## 🐛 Troubleshooting
### Container Won't Start
```bash
# Check logs
docker-compose logs
# Check container status
docker-compose ps
# Rebuild from scratch
docker-compose down
docker-compose build --no-cache
docker-compose up -d
```
### Port Already in Use
```bash
# Check what's using the port
sudo lsof -i :8000
# Kill the process or change port in docker-compose.yml
```
### Permission Denied Errors
```bash
# On remote server, add user to docker group
sudo usermod -aG docker $USER
newgrp docker
```
### Health Check Failing
```bash
# Check if app is responding
curl http://localhost:8000/api/health
# Check health status
docker inspect edge-tts-web --format='{{json .State.Health}}' | jq
```
## 📁 File Structure
```
web/
├── Dockerfile # Docker image definition
├── docker-compose.yml # Docker Compose configuration
├── .dockerignore # Files to exclude from image
├── build.sh # Build script
├── deploy.sh # Deployment script
├── server.py # FastAPI server
├── index.html # Web UI
├── app.js # Frontend logic
├── styles.css # Styling
├── manifest.json # PWA manifest
├── sw.js # Service worker
└── requirements.txt # Python dependencies
```
## 🌟 Production Recommendations
1. **Use a Reverse Proxy**: Nginx or Traefik with SSL/TLS
2. **Set Up Monitoring**: Prometheus + Grafana
3. **Configure Logging**: Centralized logging with ELK or Loki
4. **Auto-Restart**: Use `restart: unless-stopped` in docker-compose
5. **Resource Limits**: Set appropriate CPU and memory limits
6. **Regular Backups**: If you add persistent data
7. **Security Updates**: Keep Docker and base images updated
8. **Domain Name**: Use a proper domain with DNS
9. **CDN**: Consider using a CDN for static assets
10. **Rate Limiting**: Implement rate limiting for API endpoints
## 📞 Support
For issues or questions:
- Check logs: `docker-compose logs -f`
- GitHub Issues: https://github.com/rany2/edge-tts/issues
- Review health checks: `docker inspect edge-tts-web`
## 📝 License
This deployment configuration is part of the Edge TTS project.

195
web/DOCKER_QUICKSTART.md Normal file
View File

@@ -0,0 +1,195 @@
# Docker Quick Start Guide
## 🚀 Deploy in 3 Steps
### Step 1: Build
```bash
cd web
./build.sh
```
### Step 2: Deploy to Remote Server
```bash
# Replace with your server IP/hostname
REMOTE_HOST=192.168.1.100 ./deploy.sh
```
### Step 3: Access
```
http://YOUR_SERVER_IP:8000
```
## 📋 Common Commands
### Local Development
```bash
# Build and start
docker-compose up -d
# View logs
docker-compose logs -f
# Stop
docker-compose down
# Restart
docker-compose restart
```
### Remote Deployment
```bash
# Basic deployment
REMOTE_HOST=192.168.1.100 ./deploy.sh
# Custom user and path
REMOTE_HOST=myserver.com \
REMOTE_USER=deployer \
REMOTE_PATH=/opt/edge-tts \
./deploy.sh
```
### Monitoring
```bash
# Container status
docker-compose ps
# Health check
docker inspect edge-tts-web --format='{{.State.Health.Status}}'
# Resource usage
docker stats edge-tts-web
```
### Troubleshooting
```bash
# View logs
docker-compose logs --tail=100
# Restart container
docker-compose restart
# Rebuild from scratch
docker-compose down
docker-compose build --no-cache
docker-compose up -d
```
## 🔧 Configuration
### Change Port
Edit `docker-compose.yml`:
```yaml
ports:
- "3000:8000" # Change 3000 to your desired port
```
### Environment Variables
Create `.env` file:
```bash
PYTHONUNBUFFERED=1
# Add your variables here
```
## 🌐 Production Setup
### 1. Use Reverse Proxy (Recommended)
**Nginx:**
```nginx
server {
listen 80;
server_name tts.yourdomain.com;
location / {
proxy_pass http://localhost:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
}
```
**Install SSL:**
```bash
sudo certbot --nginx -d tts.yourdomain.com
```
### 2. Firewall Setup
```bash
sudo ufw allow 22/tcp # SSH
sudo ufw allow 80/tcp # HTTP
sudo ufw allow 443/tcp # HTTPS
sudo ufw enable
```
### 3. Auto-Updates (Optional)
```bash
# Add to crontab
0 2 * * * cd /opt/edge-tts && docker-compose pull && docker-compose up -d
```
## 📊 Monitoring
### Check Health
```bash
curl http://localhost:8000/api/health
```
### View Metrics
```bash
docker stats edge-tts-web
```
## 🆘 Quick Fixes
### Port Already in Use
```bash
# Find process using port
sudo lsof -i :8000
# Or change port in docker-compose.yml
```
### Permission Denied
```bash
sudo usermod -aG docker $USER
newgrp docker
```
### Container Won't Start
```bash
# Check logs
docker-compose logs
# Rebuild
docker-compose build --no-cache
docker-compose up -d
```
## 📁 File Structure
```
web/
├── Dockerfile # Container definition
├── docker-compose.yml # Orchestration
├── build.sh # Build script
├── deploy.sh # Deploy script
├── server.py # Backend
└── [web files] # Frontend
```
## 🔗 Useful Links
- Full Documentation: [DEPLOYMENT.md](DEPLOYMENT.md)
- Edge TTS Project: https://github.com/rany2/edge-tts
- Docker Docs: https://docs.docker.com
## 💡 Tips
1. **Always use reverse proxy in production**
2. **Enable SSL/TLS with Let's Encrypt**
3. **Set up monitoring and logging**
4. **Regular backups if you add persistent data**
5. **Keep Docker and images updated**
---
**Need Help?** Check [DEPLOYMENT.md](DEPLOYMENT.md) for detailed instructions!

42
web/Dockerfile Normal file
View File

@@ -0,0 +1,42 @@
# Edge TTS Web UI - Production Dockerfile
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy application files
COPY *.py .
COPY *.html .
COPY *.css .
COPY *.js .
COPY *.json .
COPY *.png .
COPY *.svg .
# Create non-root user for security
RUN useradd -m -u 1000 appuser && \
chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health')"
# Run the application
CMD ["python", "server.py", "--host", "0.0.0.0", "--port", "8000"]

212
web/QUICKSTART.md Normal file
View File

@@ -0,0 +1,212 @@
# Quick Start Guide
## 🚀 Get Started in 3 Steps
### 1. Install Dependencies
```bash
cd web
pip install -r requirements.txt
```
### 2. Start the Server
```bash
./start.sh
```
Or manually:
```bash
python3 server.py
```
### 3. Open Your Browser
Visit: **http://localhost:8000**
---
## ✨ Features at a Glance
### Text to Speech
- Enter any text (up to 5000 characters)
- Select from 100+ voices in multiple languages
- Adjust speed, volume, and pitch
- Generate natural-sounding speech
### Voice Selection
- Filter by language and gender
- Preview voice names and locales
- Save your favorite settings
### Audio Controls
- Play audio directly in browser
- Download as MP3 files
- View generation history
- Quick reload from history
### PWA Features
- Install as standalone app
- Offline support with service worker
- Works on desktop and mobile
- Responsive design
---
## 📱 Install as App
### On Desktop (Chrome/Edge)
1. Click the install icon in the address bar
2. Or look for "Install App" button in the UI
3. App will be added to your applications
### On Mobile (Android)
1. Open in Chrome
2. Tap the menu (⋮)
3. Select "Add to Home screen"
4. App icon will appear on home screen
### On iOS (Safari)
1. Tap the share button
2. Select "Add to Home Screen"
3. Name the app and add to home screen
---
## 🎯 Quick Usage Tips
### Generate Speech
1. Enter or paste text
2. Select a voice (default: English)
3. Adjust speed/volume/pitch if needed
4. Click "Generate Speech"
5. Audio player appears with playback controls
### Download Audio
- Click "Download MP3" button
- File saves with timestamp and text snippet
### Use History
- Recent generations saved automatically
- Click "Load" to restore settings
- Click "Delete" to remove from history
### Filter Voices
- Use language dropdown for specific locales
- Use gender filter for Male/Female voices
- Voice list updates automatically
---
## 🔧 Configuration
### Change Port
```bash
python3 server.py --port 8080
```
### Enable Hot Reload (Development)
```bash
python3 server.py --reload
```
### Bind to Specific Host
```bash
python3 server.py --host 127.0.0.1
```
---
## ⚡ API Usage
### Test with cURL
Get voices:
```bash
curl http://localhost:8000/api/voices
```
Generate speech:
```bash
curl -X POST http://localhost:8000/api/synthesize \
-H "Content-Type: application/json" \
-d '{
"text": "Hello, world!",
"voice": "en-US-EmmaMultilingualNeural",
"rate": "+0%",
"volume": "+0%",
"pitch": "+0Hz"
}' \
--output speech.mp3
```
---
## 🎨 Customization
### Update Theme Color
Edit `styles.css`:
```css
:root {
--primary-color: #2563eb; /* Your color here */
}
```
Update `manifest.json`:
```json
{
"theme_color": "#2563eb"
}
```
### Replace Icons
Create PNG icons:
- `icon-192.png` - 192x192 pixels
- `icon-512.png` - 512x512 pixels
Use any image editing tool or online icon generator.
---
## 🐛 Troubleshooting
### Port Already in Use
```bash
python3 server.py --port 8080
```
### Dependencies Not Found
```bash
pip3 install -r requirements.txt
```
### Voices Not Loading
- Check internet connection
- Check server logs for errors
- Try refreshing the page
### Service Worker Issues
- Clear browser cache
- Hard refresh (Ctrl+Shift+R or Cmd+Shift+R)
- Check browser console for errors
---
## 📚 More Information
See [README.md](README.md) for detailed documentation including:
- Full API reference
- Deployment guide
- Docker setup
- Production considerations
- Contributing guidelines
---
## 🎉 You're All Set!
Enjoy using Edge TTS Web UI!
For issues or questions, visit: https://github.com/rany2/edge-tts

297
web/README.md Normal file
View File

@@ -0,0 +1,297 @@
# Edge TTS Web UI
A Progressive Web App (PWA) for converting text to speech using Microsoft Edge's online TTS service.
## Features
- 🎙️ **Text to Speech**: Convert any text to natural-sounding speech
- 🌍 **Multiple Languages**: Support for 100+ voices in various languages
- 🎛️ **Voice Customization**: Adjust speed, volume, and pitch
- 📱 **PWA Support**: Install as an app on any device
- 💾 **Offline Support**: Service worker caching for offline usage
- 📝 **History**: Keep track of recent generations
- ⬇️ **Download**: Save generated audio as MP3 files
## Installation
### Prerequisites
- Python 3.8 or higher
- pip (Python package manager)
### Setup
1. Navigate to the web directory:
```bash
cd web
```
2. Install dependencies:
```bash
pip install -r requirements.txt
```
## Usage
### Start the Server
```bash
python server.py
```
Or with custom options:
```bash
python server.py --host 0.0.0.0 --port 8000
```
Options:
- `--host`: Host to bind to (default: 0.0.0.0)
- `--port`: Port to bind to (default: 8000)
- `--reload`: Enable auto-reload for development
### Access the Web UI
Open your browser and navigate to:
```
http://localhost:8000
```
### Install as PWA
1. Open the web UI in a modern browser (Chrome, Edge, Safari, Firefox)
2. Look for the install prompt or click "Install App" button
3. The app will be added to your home screen/app drawer
## API Endpoints
The server provides the following REST API endpoints:
### GET /api/health
Health check endpoint
**Response:**
```json
{
"status": "healthy",
"service": "edge-tts-api"
}
```
### GET /api/voices
Get list of all available voices
**Response:**
```json
[
{
"Name": "en-US-EmmaMultilingualNeural",
"ShortName": "en-US-EmmaMultilingualNeural",
"Gender": "Female",
"Locale": "en-US",
"LocaleName": "English (United States)",
...
}
]
```
### POST /api/synthesize
Synthesize speech from text
**Request Body:**
```json
{
"text": "Hello, world!",
"voice": "en-US-EmmaMultilingualNeural",
"rate": "+0%",
"volume": "+0%",
"pitch": "+0Hz"
}
```
**Response:**
Returns MP3 audio file
**Parameters:**
- `text` (required): Text to convert (max 5000 characters)
- `voice` (optional): Voice name (default: "en-US-EmmaMultilingualNeural")
- `rate` (optional): Speech rate from -100% to +100% (default: "+0%")
- `volume` (optional): Volume from -100% to +100% (default: "+0%")
- `pitch` (optional): Pitch from -500Hz to +500Hz (default: "+0Hz")
### POST /api/synthesize-with-subtitles
Synthesize speech with subtitle generation
**Request Body:**
Same as /api/synthesize
**Response:**
```json
{
"audio": "base64_encoded_audio_data",
"subtitles": "SRT formatted subtitles",
"format": "mp3"
}
```
## File Structure
```
web/
├── index.html # Main HTML page
├── styles.css # Styles and theme
├── app.js # Client-side JavaScript
├── manifest.json # PWA manifest
├── sw.js # Service worker
├── server.py # FastAPI backend server
├── requirements.txt # Python dependencies
├── icon-192.png # App icon (192x192)
├── icon-512.png # App icon (512x512)
└── README.md # This file
```
## Development
### Running in Development Mode
```bash
python server.py --reload
```
This enables auto-reload when you modify the code.
### Testing
Test the API endpoints using curl:
```bash
# Get voices
curl http://localhost:8000/api/voices
# Synthesize speech
curl -X POST http://localhost:8000/api/synthesize \
-H "Content-Type: application/json" \
-d '{"text":"Hello world","voice":"en-US-EmmaMultilingualNeural"}' \
--output speech.mp3
```
### Customization
#### Update Icons
Replace `icon-192.png` and `icon-512.png` with your own icons.
For best results, create:
- 192x192 PNG for mobile devices
- 512x512 PNG for high-resolution displays
#### Update Theme Color
Edit the `--primary-color` variable in [styles.css](styles.css):
```css
:root {
--primary-color: #2563eb; /* Change this color */
}
```
Also update `theme_color` in [manifest.json](manifest.json).
## Browser Support
### PWA Features
- ✅ Chrome/Edge (Desktop & Mobile)
- ✅ Safari (iOS 11.3+)
- ✅ Firefox (Desktop & Android)
- ✅ Samsung Internet
### Service Worker
- ✅ All modern browsers
- ❌ IE11 (not supported)
## Troubleshooting
### Port Already in Use
If port 8000 is already in use:
```bash
python server.py --port 8080
```
### Icons Not Showing
Make sure `icon-192.png` and `icon-512.png` exist in the web directory.
### Voices Not Loading
Check the server logs for errors. The server needs internet connection to fetch voices from Microsoft's API.
### CORS Issues
The server is configured to allow all origins for development. For production, update the CORS settings in [server.py](server.py):
```python
app.add_middleware(
CORSMiddleware,
allow_origins=["https://yourdomain.com"], # Update this
...
)
```
## Deployment
### Production Considerations
1. **Use a production ASGI server**: Uvicorn with multiple workers
```bash
uvicorn server:app --host 0.0.0.0 --port 8000 --workers 4
```
2. **Use a reverse proxy**: nginx or Apache for SSL/TLS
3. **Set environment variables**:
```bash
export EDGE_TTS_HOST=0.0.0.0
export EDGE_TTS_PORT=8000
```
4. **Update CORS settings**: Restrict to your domain
5. **Enable HTTPS**: Required for PWA installation
### Docker Deployment
Create a `Dockerfile`:
```dockerfile
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["python", "server.py", "--host", "0.0.0.0", "--port", "8000"]
```
Build and run:
```bash
docker build -t edge-tts-web .
docker run -p 8000:8000 edge-tts-web
```
## License
This web UI is built on top of [edge-tts](https://github.com/rany2/edge-tts).
## Contributing
Contributions are welcome! Please feel free to submit issues or pull requests.
## Credits
- **edge-tts**: The underlying TTS library by [@rany2](https://github.com/rany2)
- **Microsoft Edge TTS**: The text-to-speech service

594
web/app.js Normal file
View File

@@ -0,0 +1,594 @@
// Configuration
const API_BASE_URL = window.location.hostname === 'localhost'
? 'http://localhost:8000/api'
: '/api';
// Sample sentences for different languages
const SAMPLE_SENTENCES = {
'ar': ['مرحبا، كيف حالك اليوم؟', 'الطقس جميل اليوم.', 'أتمنى لك يوما سعيدا.'],
'bg': ['Здравейте, как сте днес?', 'Времето е хубаво днес.', 'Желая ви приятен ден.'],
'ca': ['Hola, com estàs avui?', 'El temps és agradable avui.', 'Que tinguis un bon dia.'],
'cs': ['Ahoj, jak se máš dnes?', 'Počasí je dnes pěkné.', 'Přeji ti hezký den.'],
'da': ['Hej, hvordan har du det i dag?', 'Vejret er dejligt i dag.', 'Hav en god dag.'],
'de': ['Hallo, wie geht es dir heute?', 'Das Wetter ist heute schön.', 'Ich wünsche dir einen schönen Tag.'],
'el': ['Γεια σου, πώς είσαι σήμερα;', 'Ο καιρός είναι ωραίος σήμερα.', 'Σου εύχομαι μια όμορφη μέρα.'],
'en': ['Hello, how are you today?', 'The weather is nice today.', 'Have a wonderful day!'],
'es': ['Hola, ¿cómo estás hoy?', 'El clima está agradable hoy.', '¡Que tengas un buen día!'],
'fi': ['Hei, mitä kuuluu tänään?', 'Sää on kaunis tänään.', 'Mukavaa päivää!'],
'fr': ['Bonjour, comment allez-vous aujourd\'hui?', 'Le temps est agréable aujourd\'hui.', 'Passez une bonne journée!'],
'hi': ['नमस्ते, आज आप कैसे हैं?', 'आज मौसम अच्छा है।', 'आपका दिन शुभ हो।'],
'hr': ['Bok, kako si danas?', 'Vrijeme je lijepo danas.', 'Želim ti lijep dan.'],
'hu': ['Szia, hogy vagy ma?', 'Az idő szép ma.', 'Szép napot kívánok!'],
'id': ['Halo, apa kabar hari ini?', 'Cuacanya bagus hari ini.', 'Semoga harimu menyenangkan!'],
'it': ['Ciao, come stai oggi?', 'Il tempo è bello oggi.', 'Ti auguro una buona giornata!'],
'ja': ['こんにちは、今日はお元気ですか?', '今日は天気がいいですね。', '良い一日をお過ごしください。'],
'ko': ['안녕하세요, 오늘은 어떠세요?', '오늘 날씨가 좋네요.', '좋은 하루 보내세요!'],
'nl': ['Hallo, hoe gaat het vandaag?', 'Het weer is mooi vandaag.', 'Fijne dag gewenst!'],
'no': ['Hei, hvordan har du det i dag?', 'Været er fint i dag.', 'Ha en fin dag!'],
'pl': ['Cześć, jak się masz dzisiaj?', 'Pogoda jest ładna dzisiaj.', 'Miłego dnia!'],
'pt': ['Olá, como você está hoje?', 'O tempo está agradável hoje.', 'Tenha um ótimo dia!'],
'ro': ['Bună, ce mai faci astăzi?', 'Vremea este frumoasă astăzi.', 'O zi bună!'],
'ru': ['Привет, как дела сегодня?', 'Погода сегодня хорошая.', 'Хорошего дня!'],
'sk': ['Ahoj, ako sa máš dnes?', 'Počasie je dnes pekné.', 'Prajem ti pekný deň.'],
'sv': ['Hej, hur mår du idag?', 'Vädret är fint idag.', 'Ha en trevlig dag!'],
'th': ['สวัสดี วันนี้เป็นอย่างไรบ้าง?', 'อากาศดีวันนี้.', 'ขอให้มีความสุขตลอดวัน!'],
'tr': ['Merhaba, bugün nasılsın?', 'Hava bugün güzel.', 'İyi günler dilerim!'],
'uk': ['Привіт, як справи сьогодні?', 'Погода сьогодні гарна.', 'Гарного дня!'],
'vi': ['Xin chào, hôm nay bạn thế nào?', 'Thời tiết hôm nay đẹp.', 'Chúc bạn một ngày tốt lành!'],
'zh': ['你好,今天过得怎么样?', '今天天气真好。', '祝你有美好的一天!'],
// Cantonese (yue-CN)
'yue': ['你好,今日點呀?', '今日天氣好好。', '祝你有美好嘅一天!'],
// Wu Chinese (wuu-CN) - uses Simplified Chinese
'wuu': ['侬好,今朝好伐?', '今朝天气老好额。', '祝侬开心!'],
};
// State
let voices = [];
let filteredVoices = [];
let currentAudioUrl = null;
let currentTestAudioUrl = null;
let history = [];
let deferredPrompt = null;
// DOM Elements
const textInput = document.getElementById('textInput');
const charCount = document.getElementById('charCount');
const voiceSelect = document.getElementById('voiceSelect');
const languageSelect = document.getElementById('languageSelect');
const genderFilter = document.getElementById('genderFilter');
const rateSlider = document.getElementById('rateSlider');
const rateValue = document.getElementById('rateValue');
const volumeSlider = document.getElementById('volumeSlider');
const volumeValue = document.getElementById('volumeValue');
const pitchSlider = document.getElementById('pitchSlider');
const pitchValue = document.getElementById('pitchValue');
const generateBtn = document.getElementById('generateBtn');
const clearBtn = document.getElementById('clearBtn');
const testVoiceBtn = document.getElementById('testVoiceBtn');
const progressBar = document.getElementById('progressBar');
const statusMessage = document.getElementById('statusMessage');
const audioSection = document.getElementById('audioSection');
const audioPlayer = document.getElementById('audioPlayer');
const downloadBtn = document.getElementById('downloadBtn');
const historyList = document.getElementById('historyList');
const onlineStatus = document.getElementById('onlineStatus');
const installPrompt = document.getElementById('installPrompt');
const installBtn = document.getElementById('installBtn');
// Initialize
document.addEventListener('DOMContentLoaded', () => {
loadVoices();
loadHistory();
setupEventListeners();
setupPWA();
updateOnlineStatus();
});
// Event Listeners
function setupEventListeners() {
textInput.addEventListener('input', updateCharCount);
languageSelect.addEventListener('change', () => {
filterVoices();
updateTestVoiceButton();
});
genderFilter.addEventListener('change', filterVoices);
voiceSelect.addEventListener('change', updateTestVoiceButton);
rateSlider.addEventListener('input', (e) => {
const value = e.target.value;
rateValue.textContent = `${value >= 0 ? '+' : ''}${value}%`;
});
volumeSlider.addEventListener('input', (e) => {
const value = e.target.value;
volumeValue.textContent = `${value >= 0 ? '+' : ''}${value}%`;
});
pitchSlider.addEventListener('input', (e) => {
const value = e.target.value;
pitchValue.textContent = `${value >= 0 ? '+' : ''}${value}Hz`;
});
generateBtn.addEventListener('click', generateSpeech);
clearBtn.addEventListener('click', clearForm);
testVoiceBtn.addEventListener('click', testVoice);
downloadBtn.addEventListener('click', downloadAudio);
window.addEventListener('online', updateOnlineStatus);
window.addEventListener('offline', updateOnlineStatus);
}
// Character count
function updateCharCount() {
const count = textInput.value.length;
charCount.textContent = count;
if (count > 4500) {
charCount.style.color = 'var(--error-color)';
} else if (count > 4000) {
charCount.style.color = 'var(--primary-color)';
} else {
charCount.style.color = '';
}
}
// Load voices from API
async function loadVoices() {
try {
// Add cache busting to ensure fresh data
const response = await fetch(`${API_BASE_URL}/voices?_=${Date.now()}`);
if (!response.ok) throw new Error('Failed to load voices');
voices = await response.json();
populateLanguageSelect();
showStatus('Voices loaded successfully', 'success');
console.log(`Loaded ${voices.length} voices from API`);
} catch (error) {
console.error('Error loading voices:', error);
showStatus('Failed to load voices. Please check the server connection.', 'error');
}
}
// Populate language select
function populateLanguageSelect() {
const languages = [...new Set(voices.map(v => v.Locale))].sort();
languageSelect.innerHTML = '<option value="">Select a language</option>';
languages.forEach(lang => {
const option = document.createElement('option');
option.value = lang;
option.textContent = getLanguageName(lang);
languageSelect.appendChild(option);
});
}
// Get language name from locale
function getLanguageName(locale) {
const names = voices.filter(v => v.Locale === locale);
return names.length > 0 ? names[0].LocaleName : locale;
}
// Filter voices based on selected language and gender
function filterVoices() {
const selectedLanguage = languageSelect.value;
const selectedGender = genderFilter.value;
// If no language is selected, clear voice dropdown
if (!selectedLanguage) {
voiceSelect.innerHTML = '<option value="">Select language first</option>';
filteredVoices = [];
return;
}
// Filter voices by language and gender
filteredVoices = voices.filter(voice => {
const languageMatch = voice.Locale === selectedLanguage;
const genderMatch = !selectedGender || voice.Gender === selectedGender;
return languageMatch && genderMatch;
});
populateVoiceSelect();
}
// Populate voice select
function populateVoiceSelect() {
voiceSelect.innerHTML = '';
if (filteredVoices.length === 0) {
voiceSelect.innerHTML = '<option value="">No voices available for selected filters</option>';
return;
}
// Sort voices alphabetically by LocalName
const sortedVoices = [...filteredVoices].sort((a, b) => {
return a.LocalName.localeCompare(b.LocalName);
});
sortedVoices.forEach(voice => {
const option = document.createElement('option');
option.value = voice.Name;
option.textContent = `${voice.LocalName} (${voice.Gender})`;
voiceSelect.appendChild(option);
});
}
// Generate speech
async function generateSpeech() {
const text = textInput.value.trim();
if (!text) {
showStatus('Please enter some text', 'error');
return;
}
if (text.length > 5000) {
showStatus('Text exceeds maximum length of 5000 characters', 'error');
return;
}
const voice = voiceSelect.value;
if (!voice) {
showStatus('Please select a voice', 'error');
return;
}
const params = {
text: text,
voice: voice,
rate: `${rateSlider.value >= 0 ? '+' : ''}${rateSlider.value}%`,
volume: `${volumeSlider.value >= 0 ? '+' : ''}${volumeSlider.value}%`,
pitch: `${pitchSlider.value >= 0 ? '+' : ''}${pitchSlider.value}Hz`
};
try {
generateBtn.disabled = true;
generateBtn.innerHTML = '<span class="loading"></span> Generating...';
progressBar.style.display = 'block';
hideStatus();
const response = await fetch(`${API_BASE_URL}/synthesize`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(params)
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Failed to generate speech' }));
const errorMsg = error.detail || 'Failed to generate speech';
// Check if it's an invalid voice error
if (errorMsg.includes('No audio') || errorMsg.includes('voice')) {
throw new Error(`Voice error: ${errorMsg}. Try selecting a different voice or refresh the page.`);
}
throw new Error(errorMsg);
}
const blob = await response.blob();
// Clean up previous audio URL
if (currentAudioUrl) {
URL.revokeObjectURL(currentAudioUrl);
}
currentAudioUrl = URL.createObjectURL(blob);
audioPlayer.src = currentAudioUrl;
audioSection.style.display = 'block';
// Add to history
const selectedVoice = voices.find(v => v.Name === voice);
addToHistory({
text: text,
voice: voice,
voiceName: voiceSelect.options[voiceSelect.selectedIndex].text,
locale: selectedVoice ? selectedVoice.Locale : '',
localeName: selectedVoice ? selectedVoice.LocaleName : '',
params: params,
timestamp: new Date().toISOString()
});
showStatus('Speech generated successfully!', 'success');
} catch (error) {
console.error('Error generating speech:', error);
showStatus(error.message, 'error');
} finally {
generateBtn.disabled = false;
generateBtn.innerHTML = '<span class="btn-icon">🎵</span> Generate Speech';
progressBar.style.display = 'none';
}
}
// Download audio
function downloadAudio() {
if (!currentAudioUrl) return;
const text = textInput.value.substring(0, 30).replace(/[^a-z0-9]/gi, '_');
const filename = `edge-tts-${text}-${Date.now()}.mp3`;
const a = document.createElement('a');
a.href = currentAudioUrl;
a.download = filename;
a.click();
}
// Update test voice button state
function updateTestVoiceButton() {
const hasVoice = voiceSelect.value && voiceSelect.value !== '';
testVoiceBtn.disabled = !hasVoice;
}
// Get sample sentence for language
function getSampleSentence(locale) {
// Extract language code (e.g., 'en' from 'en-US')
const langCode = locale.split('-')[0];
// Check for exact locale match first (for special cases like yue-CN, wuu-CN)
if (SAMPLE_SENTENCES[locale]) {
const sentences = SAMPLE_SENTENCES[locale];
return sentences[Math.floor(Math.random() * sentences.length)];
}
// Then check for language code match
if (SAMPLE_SENTENCES[langCode]) {
const sentences = SAMPLE_SENTENCES[langCode];
return sentences[Math.floor(Math.random() * sentences.length)];
}
// Default to English
const sentences = SAMPLE_SENTENCES['en'];
return sentences[Math.floor(Math.random() * sentences.length)];
}
// Test voice with sample sentence
async function testVoice() {
const voice = voiceSelect.value;
const selectedLanguage = languageSelect.value;
if (!voice || !selectedLanguage) {
showStatus('Please select a voice first', 'error');
return;
}
// Get sample sentence
const sampleText = getSampleSentence(selectedLanguage);
const params = {
text: sampleText,
voice: voice,
rate: `${rateSlider.value >= 0 ? '+' : ''}${rateSlider.value}%`,
volume: `${volumeSlider.value >= 0 ? '+' : ''}${volumeSlider.value}%`,
pitch: `${pitchSlider.value >= 0 ? '+' : ''}${pitchSlider.value}Hz`
};
try {
testVoiceBtn.disabled = true;
testVoiceBtn.innerHTML = '<span class="loading"></span> Testing...';
const response = await fetch(`${API_BASE_URL}/synthesize`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(params)
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Failed to generate test speech' }));
const errorMsg = error.detail || 'Failed to generate test speech';
// Check if it's an invalid voice error
if (errorMsg.includes('No audio') || errorMsg.includes('voice')) {
throw new Error(`Voice error: ${errorMsg}. This voice may not be available. Try refreshing the page.`);
}
throw new Error(errorMsg);
}
const blob = await response.blob();
// Clean up previous test audio URL
if (currentTestAudioUrl) {
URL.revokeObjectURL(currentTestAudioUrl);
}
currentTestAudioUrl = URL.createObjectURL(blob);
// Play audio automatically
const testAudio = new Audio(currentTestAudioUrl);
testAudio.play();
showStatus(`Testing voice: "${sampleText}"`, 'info');
} catch (error) {
console.error('Error testing voice:', error);
showStatus(error.message, 'error');
} finally {
testVoiceBtn.disabled = false;
testVoiceBtn.innerHTML = '<span class="btn-icon">🎧</span> Test Voice';
}
}
// Clear form
function clearForm() {
textInput.value = '';
updateCharCount();
rateSlider.value = 0;
volumeSlider.value = 0;
pitchSlider.value = 0;
rateValue.textContent = '+0%';
volumeValue.textContent = '+0%';
pitchValue.textContent = '+0Hz';
hideStatus();
}
// History management
function loadHistory() {
const saved = localStorage.getItem('tts_history');
if (saved) {
history = JSON.parse(saved);
renderHistory();
}
}
function saveHistory() {
// Keep only last 10 items
history = history.slice(0, 10);
localStorage.setItem('tts_history', JSON.stringify(history));
}
function addToHistory(item) {
history.unshift(item);
saveHistory();
renderHistory();
}
function renderHistory() {
if (history.length === 0) {
historyList.innerHTML = '<p class="empty-state">No recent generations yet</p>';
return;
}
historyList.innerHTML = '';
history.forEach((item, index) => {
const div = document.createElement('div');
div.className = 'history-item';
const date = new Date(item.timestamp);
const timeAgo = getTimeAgo(date);
const languageInfo = item.localeName ? ` - ${item.localeName}` : '';
div.innerHTML = `
<div class="history-item-header">
<div class="history-item-text" title="${escapeHtml(item.text)}">
${escapeHtml(item.text)}
</div>
<div class="history-item-time">${timeAgo}</div>
</div>
<div class="history-item-voice">${escapeHtml(item.voiceName)}${languageInfo}</div>
<div class="history-item-actions">
<button class="btn btn-primary" onclick="loadHistoryItem(${index})">
Load
</button>
<button class="btn btn-secondary" onclick="deleteHistoryItem(${index})">
Delete
</button>
</div>
`;
historyList.appendChild(div);
});
}
function loadHistoryItem(index) {
const item = history[index];
textInput.value = item.text;
updateCharCount();
// Find the voice in the voices list to get its locale
const voice = voices.find(v => v.Name === item.voice);
if (voice) {
// Set language first
languageSelect.value = voice.Locale;
// Trigger filter to populate voice dropdown
filterVoices();
// Then set the specific voice
voiceSelect.value = item.voice;
}
// Set parameters
if (item.params) {
rateSlider.value = parseInt(item.params.rate);
volumeSlider.value = parseInt(item.params.volume);
pitchSlider.value = parseInt(item.params.pitch);
rateValue.textContent = item.params.rate;
volumeValue.textContent = item.params.volume;
pitchValue.textContent = item.params.pitch;
}
window.scrollTo({ top: 0, behavior: 'smooth' });
showStatus('History item loaded', 'info');
}
function deleteHistoryItem(index) {
history.splice(index, 1);
saveHistory();
renderHistory();
}
// Utilities
function showStatus(message, type = 'info') {
statusMessage.textContent = message;
statusMessage.className = `status-message ${type}`;
}
function hideStatus() {
statusMessage.className = 'status-message';
}
function getTimeAgo(date) {
const seconds = Math.floor((new Date() - date) / 1000);
if (seconds < 60) return 'Just now';
if (seconds < 3600) return `${Math.floor(seconds / 60)}m ago`;
if (seconds < 86400) return `${Math.floor(seconds / 3600)}h ago`;
return `${Math.floor(seconds / 86400)}d ago`;
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
function updateOnlineStatus() {
if (navigator.onLine) {
onlineStatus.textContent = '● Online';
onlineStatus.className = 'online';
} else {
onlineStatus.textContent = '● Offline';
onlineStatus.className = 'offline';
}
}
// PWA Setup
function setupPWA() {
// Register service worker
if ('serviceWorker' in navigator) {
navigator.serviceWorker.register('sw.js')
.then(registration => {
console.log('Service Worker registered:', registration);
})
.catch(error => {
console.error('Service Worker registration failed:', error);
});
}
// Install prompt
window.addEventListener('beforeinstallprompt', (e) => {
e.preventDefault();
deferredPrompt = e;
installPrompt.style.display = 'inline';
});
installBtn.addEventListener('click', async () => {
if (!deferredPrompt) return;
deferredPrompt.prompt();
const { outcome } = await deferredPrompt.userChoice;
console.log(`User response to install prompt: ${outcome}`);
deferredPrompt = null;
installPrompt.style.display = 'none';
});
window.addEventListener('appinstalled', () => {
console.log('PWA installed');
installPrompt.style.display = 'none';
});
}

40
web/build.sh Executable file
View File

@@ -0,0 +1,40 @@
#!/bin/bash
# Build script for Edge TTS Web UI Docker image
set -e # Exit on error
echo "🏗️ Building Edge TTS Web UI Docker Image"
echo "=========================================="
echo ""
# Configuration
IMAGE_NAME="edge-tts-web"
IMAGE_TAG="${1:-latest}"
FULL_IMAGE_NAME="${IMAGE_NAME}:${IMAGE_TAG}"
# Check if Docker is installed
if ! command -v docker &> /dev/null; then
echo "❌ Docker is not installed. Please install Docker first."
exit 1
fi
# Build the Docker image
echo "📦 Building Docker image: ${FULL_IMAGE_NAME}"
docker build -t "${FULL_IMAGE_NAME}" .
if [ $? -eq 0 ]; then
echo ""
echo "✅ Build successful!"
echo ""
echo "Image details:"
docker images | grep "${IMAGE_NAME}" | head -n 1
echo ""
echo "To run the container:"
echo " docker run -d -p 8000:8000 --name edge-tts ${FULL_IMAGE_NAME}"
echo ""
echo "Or use docker-compose:"
echo " docker-compose up -d"
else
echo "❌ Build failed!"
exit 1
fi

113
web/deploy.sh Executable file
View File

@@ -0,0 +1,113 @@
#!/bin/bash
# Deployment script for Edge TTS Web UI to remote server
set -e # Exit on error
echo "🚀 Edge TTS Web UI - Remote Deployment Script"
echo "=============================================="
echo ""
# Configuration - Edit these values for your server
REMOTE_USER="${REMOTE_USER:-root}"
REMOTE_HOST="${REMOTE_HOST:-your-server.com}"
REMOTE_PATH="${REMOTE_PATH:-/opt/edge-tts}"
IMAGE_NAME="edge-tts-web"
IMAGE_TAG="${1:-latest}"
# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# Function to print colored output
print_info() {
echo -e "${GREEN} $1${NC}"
}
print_warning() {
echo -e "${YELLOW}⚠️ $1${NC}"
}
print_error() {
echo -e "${RED}$1${NC}"
exit 1
}
# Check if required variables are set
if [ "$REMOTE_HOST" = "your-server.com" ]; then
print_warning "Please configure REMOTE_HOST before deployment"
echo ""
echo "Usage:"
echo " REMOTE_HOST=192.168.1.100 ./deploy.sh"
echo " REMOTE_HOST=myserver.com REMOTE_USER=deployer ./deploy.sh"
echo ""
exit 1
fi
# Check if SSH key is available
if ! ssh -o BatchMode=yes -o ConnectTimeout=5 "${REMOTE_USER}@${REMOTE_HOST}" exit 2>/dev/null; then
print_warning "SSH key authentication not configured or server unreachable"
print_info "You may be prompted for password multiple times"
fi
print_info "Deployment Configuration:"
echo " Remote Host: ${REMOTE_USER}@${REMOTE_HOST}"
echo " Remote Path: ${REMOTE_PATH}"
echo " Image: ${IMAGE_NAME}:${IMAGE_TAG}"
echo ""
read -p "Continue with deployment? (y/n) " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "Deployment cancelled"
exit 0
fi
# Step 1: Create remote directory
print_info "Creating remote directory..."
ssh "${REMOTE_USER}@${REMOTE_HOST}" "mkdir -p ${REMOTE_PATH}"
# Step 2: Copy files to remote server
print_info "Copying files to remote server..."
rsync -avz --progress \
--exclude='*.pyc' \
--exclude='__pycache__' \
--exclude='.git' \
--exclude='venv' \
--exclude='.venv' \
--exclude='*.mp3' \
--exclude='*.log' \
./ "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_PATH}/"
# Step 3: Build and start containers on remote server
print_info "Building and starting containers on remote server..."
ssh "${REMOTE_USER}@${REMOTE_HOST}" << EOF
cd ${REMOTE_PATH}
# Stop existing containers
docker-compose down 2>/dev/null || true
# Build new image
docker-compose build
# Start containers
docker-compose up -d
# Show status
echo ""
echo "Container status:"
docker-compose ps
EOF
if [ $? -eq 0 ]; then
print_info "Deployment successful! ✅"
echo ""
echo "Access your application at:"
echo " http://${REMOTE_HOST}:8000"
echo ""
echo "To check logs:"
echo " ssh ${REMOTE_USER}@${REMOTE_HOST} 'cd ${REMOTE_PATH} && docker-compose logs -f'"
else
print_error "Deployment failed!"
fi

25
web/docker-compose.yml Normal file
View File

@@ -0,0 +1,25 @@
version: '3.8'
services:
edge-tts-web:
build:
context: .
dockerfile: Dockerfile
container_name: edge-tts-web
ports:
- "8000:8000"
environment:
- PYTHONUNBUFFERED=1
restart: unless-stopped
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 5s
networks:
- edge-tts-network
networks:
edge-tts-network:
driver: bridge

BIN
web/icon-192.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
web/icon-512.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

4
web/icon.svg Normal file
View File

@@ -0,0 +1,4 @@
<svg width="192" height="192" xmlns="http://www.w3.org/2000/svg">
<rect width="192" height="192" fill="#2563eb"/>
<text x="50%" y="50%" font-size="100" fill="white" text-anchor="middle" dominant-baseline="middle">🎙️</text>
</svg>

After

Width:  |  Height:  |  Size: 238 B

175
web/index.html Normal file
View File

@@ -0,0 +1,175 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Edge TTS - Text to Speech PWA using Microsoft Edge's online TTS service">
<meta name="theme-color" content="#2563eb">
<title>Edge TTS - Text to Speech</title>
<link rel="manifest" href="manifest.json">
<link rel="stylesheet" href="styles.css">
<link rel="icon" type="image/png" href="icon-192.png">
<link rel="apple-touch-icon" href="icon-192.png">
</head>
<body>
<div class="container">
<header>
<h1>🎙️ Edge TTS</h1>
<p class="subtitle">Text to Speech Converter</p>
</header>
<main>
<!-- Text Input Section -->
<section class="card">
<h2>Enter Text</h2>
<textarea
id="textInput"
placeholder="Enter text to convert to speech..."
rows="6"
maxlength="5000"
></textarea>
<div class="char-count">
<span id="charCount">0</span> / 5000 characters
</div>
</section>
<!-- Voice Selection Section -->
<section class="card">
<h2>Voice Settings</h2>
<div class="form-group">
<label for="languageSelect">Language</label>
<select id="languageSelect">
<option value="">Loading languages...</option>
</select>
</div>
<div class="filters">
<div class="form-group">
<label for="genderFilter">Gender</label>
<select id="genderFilter">
<option value="">All</option>
<option value="Female">Female</option>
<option value="Male">Male</option>
</select>
</div>
<div class="form-group">
<label for="voiceSelect">Voice</label>
<select id="voiceSelect">
<option value="">Select language first</option>
</select>
</div>
</div>
<div class="test-voice-section">
<button id="testVoiceBtn" class="btn btn-secondary" disabled>
<span class="btn-icon">🎧</span>
Test Voice
</button>
<p class="test-voice-hint">Select a voice to test it with a sample sentence</p>
</div>
<!-- Prosody Controls -->
<div class="prosody-controls">
<div class="form-group">
<label for="rateSlider">
Speed: <span id="rateValue">+0%</span>
</label>
<input
type="range"
id="rateSlider"
min="-100"
max="100"
value="0"
step="10"
>
</div>
<div class="form-group">
<label for="volumeSlider">
Volume: <span id="volumeValue">+0%</span>
</label>
<input
type="range"
id="volumeSlider"
min="-100"
max="100"
value="0"
step="10"
>
</div>
<div class="form-group">
<label for="pitchSlider">
Pitch: <span id="pitchValue">+0Hz</span>
</label>
<input
type="range"
id="pitchSlider"
min="-500"
max="500"
value="0"
step="50"
>
</div>
</div>
</section>
<!-- Actions Section -->
<section class="card">
<div class="actions">
<button id="generateBtn" class="btn btn-primary">
<span class="btn-icon">🎵</span>
Generate Speech
</button>
<button id="clearBtn" class="btn btn-secondary">
<span class="btn-icon">🗑️</span>
Clear
</button>
</div>
<div id="progressBar" class="progress-bar" style="display: none;">
<div class="progress-fill"></div>
</div>
<div id="statusMessage" class="status-message"></div>
</section>
<!-- Audio Player Section -->
<section id="audioSection" class="card" style="display: none;">
<h2>Audio Output</h2>
<audio id="audioPlayer" controls></audio>
<div class="audio-actions">
<button id="downloadBtn" class="btn btn-primary">
<span class="btn-icon">⬇️</span>
Download MP3
</button>
</div>
</section>
<!-- History Section -->
<section class="card">
<h2>Recent Generations</h2>
<div id="historyList" class="history-list">
<p class="empty-state">No recent generations yet</p>
</div>
</section>
</main>
<footer>
<p>
Powered by <a href="https://github.com/rany2/edge-tts" target="_blank">edge-tts</a>
| <span id="installPrompt" style="display: none;">
<button id="installBtn" class="btn-link">Install App</button>
</span>
</p>
<p class="status-indicator">
<span id="onlineStatus" class="online">● Online</span>
</p>
</footer>
</div>
<script src="app.js"></script>
</body>
</html>

40
web/manifest.json Normal file
View File

@@ -0,0 +1,40 @@
{
"name": "Edge TTS",
"short_name": "Edge TTS",
"description": "Convert text to speech using Microsoft Edge's online TTS service",
"start_url": "/",
"display": "standalone",
"background_color": "#ffffff",
"theme_color": "#2563eb",
"orientation": "portrait-primary",
"icons": [
{
"src": "icon-192.png",
"sizes": "192x192",
"type": "image/png",
"purpose": "any maskable"
},
{
"src": "icon-512.png",
"sizes": "512x512",
"type": "image/png",
"purpose": "any maskable"
}
],
"categories": ["productivity", "utilities"],
"screenshots": [
{
"src": "screenshot.png",
"sizes": "1280x720",
"type": "image/png"
}
],
"share_target": {
"action": "/",
"method": "GET",
"params": {
"title": "title",
"text": "text"
}
}
}

4
web/requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
pydantic>=2.5.0
edge-tts>=7.0.0

256
web/server.py Executable file
View File

@@ -0,0 +1,256 @@
#!/usr/bin/env python3
"""
Edge TTS Web API Server
This server provides a REST API for the edge-tts web UI.
"""
import asyncio
import io
import logging
from typing import Optional
from fastapi import FastAPI, HTTPException, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field
import uvicorn
# Import edge_tts
import edge_tts
from edge_tts import VoicesManager
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Create FastAPI app
app = FastAPI(
title="Edge TTS API",
description="REST API for Microsoft Edge Text-to-Speech service",
version="1.0.0"
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Global voices cache
voices_cache: Optional[list] = None
# Models
class SynthesizeRequest(BaseModel):
text: str = Field(..., max_length=5000, description="Text to convert to speech")
voice: str = Field(default="en-US-EmmaMultilingualNeural", description="Voice name")
rate: str = Field(default="+0%", description="Speech rate (e.g., '+0%', '-50%', '+100%')")
volume: str = Field(default="+0%", description="Volume (e.g., '+0%', '-50%', '+100%')")
pitch: str = Field(default="+0Hz", description="Pitch (e.g., '+0Hz', '-500Hz', '+500Hz')")
class VoiceResponse(BaseModel):
Name: str
ShortName: str
Gender: str
Locale: str
LocaleName: str
LocalName: Optional[str] = None
DisplayName: Optional[str] = None
Status: Optional[str] = None
# API Routes
@app.get("/")
async def root():
"""Serve the main web page"""
return FileResponse("index.html")
@app.get("/api/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "service": "edge-tts-api"}
@app.get("/api/voices")
async def get_voices():
"""
Get list of all available voices.
Returns a list of voice objects with their properties.
"""
global voices_cache
try:
# Use cached voices if available
if voices_cache is None:
logger.info("Fetching voices from Edge TTS service...")
voices_cache = await edge_tts.list_voices()
logger.info(f"Loaded {len(voices_cache)} voices")
return voices_cache
except Exception as e:
logger.error(f"Error fetching voices: {e}")
raise HTTPException(status_code=500, detail=f"Failed to fetch voices: {str(e)}")
@app.post("/api/synthesize")
async def synthesize_speech(request: SynthesizeRequest):
"""
Synthesize speech from text.
Returns an MP3 audio file.
"""
try:
logger.info(f"Synthesizing speech: text_length={len(request.text)}, voice={request.voice}")
# Validate text
if not request.text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
if len(request.text) > 5000:
raise HTTPException(status_code=400, detail="Text exceeds maximum length of 5000 characters")
# Create Communicate instance
communicate = edge_tts.Communicate(
text=request.text,
voice=request.voice,
rate=request.rate,
volume=request.volume,
pitch=request.pitch
)
# Generate audio
audio_data = io.BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data.write(chunk["data"])
# Check if audio was generated
audio_data.seek(0)
if audio_data.getbuffer().nbytes == 0:
raise HTTPException(status_code=500, detail="No audio was generated")
logger.info(f"Successfully generated {audio_data.getbuffer().nbytes} bytes of audio")
# Return audio as MP3
return Response(
content=audio_data.getvalue(),
media_type="audio/mpeg",
headers={
"Content-Disposition": "attachment; filename=speech.mp3"
}
)
except edge_tts.exceptions.NoAudioReceived as e:
logger.error(f"No audio received: {e}")
raise HTTPException(status_code=400, detail="No audio was generated. Check your parameters.")
except edge_tts.exceptions.UnknownResponse as e:
logger.error(f"Unknown response from TTS service: {e}")
raise HTTPException(status_code=502, detail="Unknown response from TTS service")
except edge_tts.exceptions.WebSocketError as e:
logger.error(f"WebSocket error: {e}")
raise HTTPException(status_code=503, detail="Failed to connect to TTS service")
except HTTPException:
raise
except Exception as e:
logger.error(f"Error synthesizing speech: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to synthesize speech: {str(e)}")
@app.post("/api/synthesize-with-subtitles")
async def synthesize_with_subtitles(request: SynthesizeRequest):
"""
Synthesize speech from text and generate subtitles.
Returns JSON with audio data (base64) and SRT subtitles.
"""
try:
logger.info(f"Synthesizing with subtitles: text_length={len(request.text)}, voice={request.voice}")
# Validate text
if not request.text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
# Create Communicate instance
communicate = edge_tts.Communicate(
text=request.text,
voice=request.voice,
rate=request.rate,
volume=request.volume,
pitch=request.pitch
)
# Create subtitle maker
submaker = edge_tts.SubMaker()
# Generate audio and subtitles
audio_data = io.BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data.write(chunk["data"])
elif chunk["type"] in ("WordBoundary", "SentenceBoundary"):
submaker.feed(chunk)
# Get subtitles
subtitles = submaker.get_srt()
# Return both audio and subtitles
import base64
audio_data.seek(0)
audio_base64 = base64.b64encode(audio_data.read()).decode('utf-8')
return {
"audio": audio_base64,
"subtitles": subtitles,
"format": "mp3"
}
except Exception as e:
logger.error(f"Error synthesizing with subtitles: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to synthesize: {str(e)}")
# Mount static files
app.mount("/", StaticFiles(directory=".", html=True), name="static")
def main():
"""Run the server"""
import argparse
parser = argparse.ArgumentParser(description="Edge TTS Web API Server")
parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
parser.add_argument("--port", type=int, default=8000, help="Port to bind to")
parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
args = parser.parse_args()
logger.info(f"Starting Edge TTS Web Server on {args.host}:{args.port}")
logger.info(f"Visit http://localhost:{args.port} to use the web interface")
uvicorn.run(
"server:app",
host=args.host,
port=args.port,
reload=args.reload,
log_level="info"
)
if __name__ == "__main__":
main()

28
web/start.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/bin/bash
# Quick start script for Edge TTS Web UI
echo "🎙️ Starting Edge TTS Web UI..."
echo ""
source ../.venv/bin/activate
# Check if Python is installed
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3 is not installed. Please install Python 3.8 or higher."
exit 1
fi
# Check if dependencies are installed
if ! python3 -c "import fastapi" 2>/dev/null; then
echo "📦 Installing dependencies..."
pip3 install -r requirements.txt
echo ""
fi
# Start the server
echo "✅ Starting server on http://localhost:8000"
echo ""
echo "Press Ctrl+C to stop the server"
echo ""
python3 server.py

488
web/styles.css Normal file
View File

@@ -0,0 +1,488 @@
:root {
--primary-color: #2563eb;
--primary-hover: #1d4ed8;
--secondary-color: #64748b;
--background: #f8fafc;
--card-background: #ffffff;
--text-primary: #1e293b;
--text-secondary: #64748b;
--border-color: #e2e8f0;
--success-color: #10b981;
--error-color: #ef4444;
--shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background: var(--background);
color: var(--text-primary);
line-height: 1.6;
padding: 1rem;
}
.container {
max-width: 800px;
margin: 0 auto;
}
/* Header */
header {
text-align: center;
margin-bottom: 2rem;
padding: 2rem 0;
}
header h1 {
font-size: 2.5rem;
color: var(--primary-color);
margin-bottom: 0.5rem;
}
.subtitle {
color: var(--text-secondary);
font-size: 1.1rem;
}
/* Card */
.card {
background: var(--card-background);
border-radius: 0.75rem;
padding: 1.5rem;
margin-bottom: 1.5rem;
box-shadow: var(--shadow);
border: 1px solid var(--border-color);
}
.card h2 {
font-size: 1.25rem;
color: var(--text-primary);
margin-bottom: 1rem;
padding-bottom: 0.75rem;
border-bottom: 2px solid var(--border-color);
}
/* Form Elements */
.form-group {
margin-bottom: 1rem;
}
.form-group label {
display: block;
margin-bottom: 0.5rem;
font-weight: 600;
color: var(--text-primary);
font-size: 0.9rem;
}
textarea {
width: 100%;
padding: 0.75rem;
border: 2px solid var(--border-color);
border-radius: 0.5rem;
font-family: inherit;
font-size: 1rem;
resize: vertical;
transition: border-color 0.2s;
}
textarea:focus {
outline: none;
border-color: var(--primary-color);
}
select {
width: 100%;
padding: 0.75rem;
border: 2px solid var(--border-color);
border-radius: 0.5rem;
font-family: inherit;
font-size: 1rem;
background: white;
cursor: pointer;
transition: border-color 0.2s;
}
select:focus {
outline: none;
border-color: var(--primary-color);
}
.char-count {
text-align: right;
color: var(--text-secondary);
font-size: 0.875rem;
margin-top: 0.5rem;
}
/* Filters */
.filters {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 1rem;
}
/* Test Voice Section */
.test-voice-section {
margin-top: 1rem;
padding-top: 1rem;
border-top: 1px solid var(--border-color);
display: flex;
align-items: center;
gap: 1rem;
}
.test-voice-hint {
font-size: 0.875rem;
color: var(--text-secondary);
margin: 0;
}
/* Prosody Controls */
.prosody-controls {
margin-top: 1rem;
}
input[type="range"] {
width: 100%;
height: 6px;
border-radius: 3px;
background: var(--border-color);
outline: none;
-webkit-appearance: none;
}
input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 20px;
height: 20px;
border-radius: 50%;
background: var(--primary-color);
cursor: pointer;
transition: background 0.2s;
}
input[type="range"]::-webkit-slider-thumb:hover {
background: var(--primary-hover);
}
input[type="range"]::-moz-range-thumb {
width: 20px;
height: 20px;
border-radius: 50%;
background: var(--primary-color);
cursor: pointer;
border: none;
transition: background 0.2s;
}
input[type="range"]::-moz-range-thumb:hover {
background: var(--primary-hover);
}
/* Buttons */
.btn {
padding: 0.75rem 1.5rem;
border: none;
border-radius: 0.5rem;
font-family: inherit;
font-size: 1rem;
font-weight: 600;
cursor: pointer;
transition: all 0.2s;
display: inline-flex;
align-items: center;
gap: 0.5rem;
}
.btn-primary {
background: var(--primary-color);
color: white;
}
.btn-primary:hover:not(:disabled) {
background: var(--primary-hover);
transform: translateY(-1px);
box-shadow: var(--shadow-lg);
}
.btn-secondary {
background: var(--secondary-color);
color: white;
}
.btn-secondary:hover:not(:disabled) {
background: #475569;
transform: translateY(-1px);
}
.btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.btn-icon {
font-size: 1.2rem;
}
.btn-link {
background: none;
border: none;
color: var(--primary-color);
cursor: pointer;
text-decoration: underline;
font-size: inherit;
padding: 0;
}
.btn-link:hover {
color: var(--primary-hover);
}
/* Actions */
.actions {
display: flex;
gap: 1rem;
flex-wrap: wrap;
}
/* Progress Bar */
.progress-bar {
width: 100%;
height: 4px;
background: var(--border-color);
border-radius: 2px;
overflow: hidden;
margin: 1rem 0;
}
.progress-fill {
height: 100%;
background: var(--primary-color);
animation: progress 1.5s ease-in-out infinite;
}
@keyframes progress {
0% {
width: 0%;
margin-left: 0%;
}
50% {
width: 50%;
margin-left: 25%;
}
100% {
width: 0%;
margin-left: 100%;
}
}
/* Status Message */
.status-message {
padding: 0.75rem;
border-radius: 0.5rem;
margin-top: 1rem;
display: none;
}
.status-message.success {
background: #d1fae5;
color: #065f46;
display: block;
}
.status-message.error {
background: #fee2e2;
color: #991b1b;
display: block;
}
.status-message.info {
background: #dbeafe;
color: #1e40af;
display: block;
}
/* Audio Section */
#audioPlayer {
width: 100%;
margin-bottom: 1rem;
}
.audio-actions {
display: flex;
gap: 1rem;
}
/* History */
.history-list {
max-height: 300px;
overflow-y: auto;
}
.history-item {
padding: 1rem;
border: 1px solid var(--border-color);
border-radius: 0.5rem;
margin-bottom: 0.75rem;
cursor: pointer;
transition: all 0.2s;
}
.history-item:hover {
border-color: var(--primary-color);
background: #f1f5f9;
}
.history-item-header {
display: flex;
justify-content: space-between;
align-items: start;
margin-bottom: 0.5rem;
}
.history-item-text {
font-size: 0.9rem;
color: var(--text-primary);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
flex: 1;
margin-right: 1rem;
}
.history-item-voice {
font-size: 0.8rem;
color: var(--text-secondary);
}
.history-item-time {
font-size: 0.75rem;
color: var(--text-secondary);
}
.history-item-actions {
display: flex;
gap: 0.5rem;
margin-top: 0.5rem;
}
.history-item-actions button {
padding: 0.25rem 0.75rem;
font-size: 0.875rem;
}
.empty-state {
text-align: center;
color: var(--text-secondary);
padding: 2rem;
}
/* Footer */
footer {
text-align: center;
padding: 2rem 0;
color: var(--text-secondary);
font-size: 0.875rem;
}
footer a {
color: var(--primary-color);
text-decoration: none;
}
footer a:hover {
text-decoration: underline;
}
.status-indicator {
margin-top: 0.5rem;
}
.online {
color: var(--success-color);
}
.offline {
color: var(--error-color);
}
/* Responsive */
@media (max-width: 640px) {
body {
padding: 0.5rem;
}
header h1 {
font-size: 2rem;
}
.card {
padding: 1rem;
}
.actions {
flex-direction: column;
}
.actions .btn {
width: 100%;
}
.filters {
grid-template-columns: 1fr;
}
.test-voice-section {
flex-direction: column;
align-items: stretch;
}
.test-voice-section .btn {
width: 100%;
}
.test-voice-hint {
text-align: center;
}
}
/* Install prompt animation */
#installPrompt {
animation: slideIn 0.3s ease-out;
}
@keyframes slideIn {
from {
opacity: 0;
transform: translateY(-10px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
/* Loading animation */
.loading {
display: inline-block;
width: 1rem;
height: 1rem;
border: 2px solid var(--border-color);
border-top-color: var(--primary-color);
border-radius: 50%;
animation: spin 0.6s linear infinite;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}

142
web/sw.js Normal file
View File

@@ -0,0 +1,142 @@
// Service Worker for Edge TTS PWA
const CACHE_NAME = 'edge-tts-v1';
const urlsToCache = [
'/',
'/index.html',
'/styles.css',
'/app.js',
'/manifest.json',
'/icon-192.png',
'/icon-512.png'
];
// Install event - cache resources
self.addEventListener('install', (event) => {
event.waitUntil(
caches.open(CACHE_NAME)
.then((cache) => {
console.log('Opened cache');
return cache.addAll(urlsToCache.map(url => {
// Try to add each URL, but don't fail if some are missing
return cache.add(url).catch(err => {
console.log('Failed to cache:', url, err);
});
}));
})
.then(() => self.skipWaiting())
);
});
// Activate event - clean up old caches
self.addEventListener('activate', (event) => {
event.waitUntil(
caches.keys().then((cacheNames) => {
return Promise.all(
cacheNames.map((cacheName) => {
if (cacheName !== CACHE_NAME) {
console.log('Deleting old cache:', cacheName);
return caches.delete(cacheName);
}
})
);
}).then(() => self.clients.claim())
);
});
// Fetch event - serve from cache, fallback to network
self.addEventListener('fetch', (event) => {
const { request } = event;
// Skip API requests - always go to network
if (request.url.includes('/api/')) {
event.respondWith(
fetch(request)
.catch(() => {
return new Response(
JSON.stringify({ error: 'Network unavailable' }),
{
status: 503,
headers: { 'Content-Type': 'application/json' }
}
);
})
);
return;
}
// Cache-first strategy for static assets
event.respondWith(
caches.match(request)
.then((response) => {
// Cache hit - return response
if (response) {
return response;
}
// Clone the request
const fetchRequest = request.clone();
return fetch(fetchRequest).then((response) => {
// Check if valid response
if (!response || response.status !== 200 || response.type !== 'basic') {
return response;
}
// Clone the response
const responseToCache = response.clone();
// Cache the new resource
caches.open(CACHE_NAME)
.then((cache) => {
cache.put(request, responseToCache);
});
return response;
});
})
.catch(() => {
// Return offline page or error
return new Response('Offline', {
status: 503,
statusText: 'Service Unavailable'
});
})
);
});
// Background sync for offline TTS generation (future enhancement)
self.addEventListener('sync', (event) => {
if (event.tag === 'sync-tts') {
event.waitUntil(syncTTS());
}
});
async function syncTTS() {
// Placeholder for future offline TTS queue functionality
console.log('Syncing pending TTS requests...');
}
// Push notifications (future enhancement)
self.addEventListener('push', (event) => {
const data = event.data.json();
const options = {
body: data.body,
icon: '/icon-192.png',
badge: '/icon-192.png',
vibrate: [200, 100, 200]
};
event.waitUntil(
self.registration.showNotification(data.title, options)
);
});
// Notification click handler
self.addEventListener('notificationclick', (event) => {
event.notification.close();
event.waitUntil(
clients.openWindow('/')
);
});