diff --git a/README.md b/README.md index d6b552d..8fa6022 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,17 @@ # learn-jp-with-python * Sample script for "Learn Japanese🇯🇵 with Python🐍" -* Set up your environment variable for AWS Access Key and DeepL API Key - * [Managing access keys for IAM users - AWS Identity and Access Management](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) - * [API Key for DeepL's API – DeepL Help Center | How Can We Help You?](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API) +* OpenAI works by default. Just set `OPENAI_API_KEY` in a `.env` file and run. +* Dependencies are provider-agnostic via IoC (Inversion of Control). You can switch providers without code changes. +* Optional: Set up credentials if you want DeepL/AWS Polly + * [Managing access keys for IAM users - AWS IAM](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) + * [API Key for DeepL's API – DeepL Help Center](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API) + +Use a `.env` file (recommended): ```bash -$ cp export.sh.sample export.sh -$ vi export.sh -$ source export.sh +$ cp .env.example .env +$ vi .env # fill in OPENAI_API_KEY (and others if desired) ``` * Run sample script @@ -19,3 +22,37 @@ $ . env/bin/activate (env) pip install -r requirements.txt (env) streamlit run learn_jp.py ``` + +## Providers + +OpenAI is the default provider. Configure via `.env` (defaults shown): + +```bash +# Translation: openai | deepl | libre | none +TRANSLATOR_PROVIDER=openai +# OpenAI Translation +OPENAI_API_KEY=... +OPENAI_TRANSLATION_MODEL=gpt-4o-mini +LIBRE_TRANSLATE_URL=https://libretranslate.com + +# Text-to-speech: openai | polly | noop +TTS_PROVIDER=openai +# OpenAI TTS +OPENAI_TTS_MODEL=gpt-4o-mini-tts +OPENAI_TTS_VOICE=alloy +# Optional Polly voice when using polly +TTS_VOICE=Mizuki +``` + +### Notes +- DeepL and Amazon Polly providers have not been fully tested in this branch. If they don't work for you, switch back to OpenAI or revert to a previous commit where DeepL and Polly were the default providers for the streamlit app. + + +If you configure DeepL and Polly, set: + +```bash +export DEEPL_API_KEY=... +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... +export AWS_DEFAULT_REGION=ap-northeast-1 +``` diff --git a/export.sh.sample b/export.sh.sample index 9c0f3a6..fd6fb8c 100644 --- a/export.sh.sample +++ b/export.sh.sample @@ -2,4 +2,20 @@ export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= export AWS_DEFAULT_REGION= -export DEEPL_API_KEY= \ No newline at end of file +export DEEPL_API_KEY= + +# Provider selection (defaults work without paid keys) +# TRANSLATOR_PROVIDER: deepl | libre | none +export TRANSLATOR_PROVIDER=openai +# OpenAI Translation +export OPENAI_API_KEY= +export OPENAI_TRANSLATION_MODEL=gpt-4o-mini +export LIBRE_TRANSLATE_URL=https://libretranslate.com + +# TTS_PROVIDER: polly | noop +export TTS_PROVIDER=openai +# OpenAI TTS +export OPENAI_TTS_MODEL=gpt-4o-mini-tts +export OPENAI_TTS_VOICE=alloy +# Optional: Polly voice id (when TTS_PROVIDER=polly) +export TTS_VOICE=Mizuki \ No newline at end of file diff --git a/japanese.mp3 b/japanese.mp3 new file mode 100644 index 0000000..fef3105 Binary files /dev/null and b/japanese.mp3 differ diff --git a/learn_jp.py b/learn_jp.py index ee24aef..ba1347b 100644 --- a/learn_jp.py +++ b/learn_jp.py @@ -1,12 +1,15 @@ import os -from contextlib import closing -from pathlib import Path - -import boto3 -import deepl import jaconv import streamlit as st from sudachipy import Dictionary +import os +from pathlib import Path +from dotenv import load_dotenv, find_dotenv + +from dotenv import find_dotenv, load_dotenv + +load_dotenv(find_dotenv()) + PART_OF_SPEECH = { "名詞": "noun", @@ -23,27 +26,22 @@ "感動詞": "interjection", } -polly = boto3.client('polly') +from services.factory import get_providers -tokenizer = Dictionary().create() +translator_service, tts_service = get_providers() -translator = deepl.Translator(os.getenv("DEEPL_API_KEY")) +tokenizer = Dictionary().create() def do_polly(text: str) -> None: ssml_text = f'{text}' - result = polly.synthesize_speech( - Text=ssml_text, OutputFormat="mp3", TextType="ssml", VoiceId="Mizuki") - - with closing(result["AudioStream"]) as stream: - Path("japanese.mp3").write_bytes(stream.read()) + tts_service.synthesize_ssml(ssml_text, outfile="japanese.mp3", voice=os.getenv("TTS_VOICE")) return def translate(text: str) -> str: - result = translator.translate_text(text, target_lang="EN-US") - return f"{result.text}" + return translator_service.translate(text, target_lang="EN-US") st.title('Learn Japanese🇯🇵 with Python🐍') diff --git a/learn_jp_apac.py b/learn_jp_apac.py index dc18bf0..cd9ff0b 100644 --- a/learn_jp_apac.py +++ b/learn_jp_apac.py @@ -3,7 +3,9 @@ import streamlit as st from sudachipy import Dictionary -polly = boto3.client("polly") +from services.factory import get_tts + +tts_service = get_tts() tokenizer = Dictionary().create() @@ -81,10 +83,7 @@ def text_to_speech(): st.write(f"#### {romaji}", unsafe_allow_html=True) ssml_text = f'{text}' - result = polly.synthesize_speech( - Text=ssml_text, OutputFormat="mp3", TextType="ssml", VoiceId="Mizuki") - with open("japanese.mp3", "wb") as f: - f.write(result["AudioStream"].read()) + tts_service.synthesize_ssml(ssml_text, outfile="japanese.mp3") st.audio("japanese.mp3") diff --git a/requirements.txt b/requirements.txt index 7588426..b596179 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ jaconv==0.4.0 streamlit==1.49.1 SudachiDict-core==20250825 SudachiPy==0.6.10 +openai>=1.50.0 +python-dotenv>=1.0.1 diff --git a/services/deepl_translator.py b/services/deepl_translator.py new file mode 100644 index 0000000..1e92a24 --- /dev/null +++ b/services/deepl_translator.py @@ -0,0 +1,21 @@ +import os +import deepl +from .translation import Translator + + +class DeepLTranslator: + def __init__(self) -> None: + api_key = os.getenv("DEEPL_API_KEY") + if not api_key: + raise RuntimeError("DEEPL_API_KEY is not set") + self._translator = deepl.Translator(api_key) + + def translate(self, text: str, target_lang: str) -> str: + result = self._translator.translate_text(text, target_lang=target_lang) + return result.text + + +def build() -> Translator: + return DeepLTranslator() + + diff --git a/services/factory.py b/services/factory.py new file mode 100644 index 0000000..9398c6b --- /dev/null +++ b/services/factory.py @@ -0,0 +1,60 @@ +import os +from pathlib import Path +try: + from dotenv import load_dotenv # type: ignore + # Ensure we load .env from the project root (parent of services/) + project_root_env = Path(__file__).resolve().parent.parent / ".env" + print(f"Loading .env from: {project_root_env}") + load_dotenv(dotenv_path=project_root_env, override=False) +except Exception: + # If dotenv isn't installed, ignore; env vars may still be provided by shell + pass +from typing import Tuple + +from .translation import Translator +from .tts import TextToSpeech + + +def get_translator() -> Translator: + provider = os.getenv("TRANSLATOR_PROVIDER", "openai").lower() + if provider == "deepl": + from .deepl_translator import build as build_deepl + + return build_deepl() + if provider == "openai": + from .openai_translator import build as build_openai + + return build_openai() + if provider == "none": + return _NoOpTranslator() + # default to libre + from .libre_translator import build as build_libre + + return build_libre() + + +def get_tts() -> TextToSpeech: + provider = os.getenv("TTS_PROVIDER", "openai").lower() + if provider == "polly": + from .polly_tts import build as build_polly + + return build_polly() + if provider == "openai": + from .openai_tts import build as build_openai_tts + + return build_openai_tts() + # default to noop + from .noop_tts import build as build_noop + + return build_noop() + + +def get_providers() -> Tuple[Translator, TextToSpeech]: + return get_translator(), get_tts() + + +class _NoOpTranslator: + def translate(self, text: str, target_lang: str) -> str: + return text + + diff --git a/services/libre_translator.py b/services/libre_translator.py new file mode 100644 index 0000000..138e9fa --- /dev/null +++ b/services/libre_translator.py @@ -0,0 +1,27 @@ +import os +import requests +from .translation import Translator + + +class LibreTranslator: + def __init__(self) -> None: + self._url = os.getenv("LIBRE_TRANSLATE_URL", "https://libretranslate.com") + # LibreTranslate expects language codes like 'en', 'ja' + + def translate(self, text: str, target_lang: str) -> str: + target = target_lang.split("-")[0].lower() + # Auto-detect source; specify target + resp = requests.post( + f"{self._url.rstrip('/')}/translate", + json={"q": text, "source": "auto", "target": target, "format": "text"}, + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + return data.get("translatedText", "") + + +def build() -> Translator: + return LibreTranslator() + + diff --git a/services/noop_tts.py b/services/noop_tts.py new file mode 100644 index 0000000..f414fd4 --- /dev/null +++ b/services/noop_tts.py @@ -0,0 +1,16 @@ +from .tts import TextToSpeech + + +class NoOpTTS: + def synthesize_ssml(self, ssml: str, outfile: str, voice: str | None = None) -> None: + # Write a tiny silent MP3 header so audio widgets don't break. + # This is a 1-second silent MP3 frame sequence (very small placeholder). + silent_bytes = bytes([0x49, 0x44, 0x33, 0x03, 0x00, 0x00]) # minimal ID3 header + with open(outfile, "wb") as f: + f.write(silent_bytes) + + +def build() -> TextToSpeech: + return NoOpTTS() + + diff --git a/services/openai_translator.py b/services/openai_translator.py new file mode 100644 index 0000000..e9e3d5c --- /dev/null +++ b/services/openai_translator.py @@ -0,0 +1,30 @@ +import os +from openai import OpenAI +from .translation import Translator + + +class OpenAITranslator: + def __init__(self) -> None: + # OPENAI_API_KEY must be set in env for OpenAI SDK + self._client = OpenAI() + self._model = os.getenv("OPENAI_TRANSLATION_MODEL", "gpt-4o-mini") + + def translate(self, text: str, target_lang: str) -> str: + # Normalize target like EN-US -> English (United States) simple prompt + lang = target_lang + prompt = ( + "You are a high-quality translator. Translate the following text " + f"into {lang}. Return only the translated text without quotes or extra commentary.\n\n" + f"Text: {text}" + ) + resp = self._client.responses.create( + model=self._model, + input=prompt, + ) + return (resp.output_text or "").strip() + + +def build() -> Translator: + return OpenAITranslator() + + diff --git a/services/openai_tts.py b/services/openai_tts.py new file mode 100644 index 0000000..3ad2a4b --- /dev/null +++ b/services/openai_tts.py @@ -0,0 +1,25 @@ +import os +from openai import OpenAI +from .tts import TextToSpeech + + +class OpenAITTS: + def __init__(self) -> None: + self._client = OpenAI() + self._model = os.getenv("OPENAI_TTS_MODEL", "gpt-4o-mini-tts") + + def synthesize_ssml(self, ssml: str, outfile: str, voice: str | None = None) -> None: + v = voice or os.getenv("OPENAI_TTS_VOICE", "alloy") + # Use streaming response helper to write MP3 directly + with self._client.audio.speech.with_streaming_response.create( + model=self._model, + voice=v, + input=ssml, + ) as response: + response.stream_to_file(outfile) + + +def build() -> TextToSpeech: + return OpenAITTS() + + diff --git a/services/polly_tts.py b/services/polly_tts.py new file mode 100644 index 0000000..e5c8f6b --- /dev/null +++ b/services/polly_tts.py @@ -0,0 +1,24 @@ +import os +from contextlib import closing +from pathlib import Path +import boto3 +from .tts import TextToSpeech + + +class PollyTTS: + def __init__(self) -> None: + self._client = boto3.client("polly") + + def synthesize_ssml(self, ssml: str, outfile: str, voice: str | None = None) -> None: + voice_id = voice or os.getenv("TTS_VOICE", "Mizuki") + result = self._client.synthesize_speech( + Text=ssml, OutputFormat="mp3", TextType="ssml", VoiceId=voice_id + ) + with closing(result["AudioStream"]) as stream: + Path(outfile).write_bytes(stream.read()) + + +def build() -> TextToSpeech: + return PollyTTS() + + diff --git a/services/translation.py b/services/translation.py new file mode 100644 index 0000000..f794b62 --- /dev/null +++ b/services/translation.py @@ -0,0 +1,8 @@ +from typing import Protocol + + +class Translator(Protocol): + def translate(self, text: str, target_lang: str) -> str: # pragma: no cover - protocol + ... + + diff --git a/services/tts.py b/services/tts.py new file mode 100644 index 0000000..33b657d --- /dev/null +++ b/services/tts.py @@ -0,0 +1,8 @@ +from typing import Protocol, Optional + + +class TextToSpeech(Protocol): + def synthesize_ssml(self, ssml: str, outfile: str, voice: Optional[str] = None) -> None: # pragma: no cover - protocol + ... + +