Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# learn-jp-with-python

* Sample script for "Learn Japanese🇯🇵 with Python🐍"
* Set up your environment variable for AWS Access Key and DeepL API Key
* [Managing access keys for IAM users - AWS Identity and Access Management](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
* [API Key for DeepL's API – DeepL Help Center | How Can We Help You?](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API)
* OpenAI works by default. Just set `OPENAI_API_KEY` in a `.env` file and run.
* Dependencies are provider-agnostic via IoC (Inversion of Control). You can switch providers without code changes.
* Optional: Set up credentials if you want DeepL/AWS Polly
* [Managing access keys for IAM users - AWS IAM](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
* [API Key for DeepL's API – DeepL Help Center](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API)

Use a `.env` file (recommended):

```bash
$ cp export.sh.sample export.sh
$ vi export.sh
$ source export.sh
$ cp .env.example .env
$ vi .env # fill in OPENAI_API_KEY (and others if desired)
```

* Run sample script
Expand All @@ -19,3 +22,37 @@ $ . env/bin/activate
(env) pip install -r requirements.txt
(env) streamlit run learn_jp.py
```

## Providers

OpenAI is the default provider. Configure via `.env` (defaults shown):

```bash
# Translation: openai | deepl | libre | none
TRANSLATOR_PROVIDER=openai
# OpenAI Translation
OPENAI_API_KEY=...
OPENAI_TRANSLATION_MODEL=gpt-4o-mini
LIBRE_TRANSLATE_URL=https://libretranslate.com

# Text-to-speech: openai | polly | noop
TTS_PROVIDER=openai
# OpenAI TTS
OPENAI_TTS_MODEL=gpt-4o-mini-tts
OPENAI_TTS_VOICE=alloy
# Optional Polly voice when using polly
TTS_VOICE=Mizuki
```

### Notes
- DeepL and Amazon Polly providers have not been fully tested in this branch. If they don't work for you, switch back to OpenAI or revert to a previous commit where DeepL and Polly were the default providers for the streamlit app.


If you configure DeepL and Polly, set:

```bash
export DEEPL_API_KEY=...
export AWS_ACCESS_KEY_ID=...
export AWS_SECRET_ACCESS_KEY=...
export AWS_DEFAULT_REGION=ap-northeast-1
```
18 changes: 17 additions & 1 deletion export.sh.sample
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,20 @@ export AWS_ACCESS_KEY_ID=<your-access-key>
export AWS_SECRET_ACCESS_KEY=<your-secret-key>
export AWS_DEFAULT_REGION=<your-region>

export DEEPL_API_KEY=<your-api-key>
export DEEPL_API_KEY=<your-api-key>

# Provider selection (defaults work without paid keys)
# TRANSLATOR_PROVIDER: deepl | libre | none
export TRANSLATOR_PROVIDER=openai
# OpenAI Translation
export OPENAI_API_KEY=<your-openai-api-key>
export OPENAI_TRANSLATION_MODEL=gpt-4o-mini
export LIBRE_TRANSLATE_URL=https://libretranslate.com

# TTS_PROVIDER: polly | noop
export TTS_PROVIDER=openai
# OpenAI TTS
export OPENAI_TTS_MODEL=gpt-4o-mini-tts
export OPENAI_TTS_VOICE=alloy
# Optional: Polly voice id (when TTS_PROVIDER=polly)
export TTS_VOICE=Mizuki
Binary file added japanese.mp3
Binary file not shown.
28 changes: 13 additions & 15 deletions learn_jp.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import os
from contextlib import closing
from pathlib import Path

import boto3
import deepl
import jaconv
import streamlit as st
from sudachipy import Dictionary
import os
from pathlib import Path
from dotenv import load_dotenv, find_dotenv

from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())


PART_OF_SPEECH = {
"名詞": "noun",
Expand All @@ -23,27 +26,22 @@
"感動詞": "interjection",
}

polly = boto3.client('polly')
from services.factory import get_providers

tokenizer = Dictionary().create()
translator_service, tts_service = get_providers()

translator = deepl.Translator(os.getenv("DEEPL_API_KEY"))
tokenizer = Dictionary().create()


def do_polly(text: str) -> None:
ssml_text = f'<speak><prosody rate="slow">{text}</prosody></speak>'
result = polly.synthesize_speech(
Text=ssml_text, OutputFormat="mp3", TextType="ssml", VoiceId="Mizuki")

with closing(result["AudioStream"]) as stream:
Path("japanese.mp3").write_bytes(stream.read())
tts_service.synthesize_ssml(ssml_text, outfile="japanese.mp3", voice=os.getenv("TTS_VOICE"))

return


def translate(text: str) -> str:
result = translator.translate_text(text, target_lang="EN-US")
return f"{result.text}"
return translator_service.translate(text, target_lang="EN-US")

st.title('Learn Japanese🇯🇵 with Python🐍')

Expand Down
9 changes: 4 additions & 5 deletions learn_jp_apac.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import streamlit as st
from sudachipy import Dictionary

polly = boto3.client("polly")
from services.factory import get_tts

tts_service = get_tts()
tokenizer = Dictionary().create()


Expand Down Expand Up @@ -81,10 +83,7 @@ def text_to_speech():
st.write(f"#### {romaji}", unsafe_allow_html=True)

ssml_text = f'<speak><prosody rate="slow">{text}</prosody></speak>'
result = polly.synthesize_speech(
Text=ssml_text, OutputFormat="mp3", TextType="ssml", VoiceId="Mizuki")
with open("japanese.mp3", "wb") as f:
f.write(result["AudioStream"].read())
tts_service.synthesize_ssml(ssml_text, outfile="japanese.mp3")
st.audio("japanese.mp3")


Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ jaconv==0.4.0
streamlit==1.49.1
SudachiDict-core==20250825
SudachiPy==0.6.10
openai>=1.50.0
python-dotenv>=1.0.1
21 changes: 21 additions & 0 deletions services/deepl_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
import deepl
from .translation import Translator


class DeepLTranslator:
def __init__(self) -> None:
api_key = os.getenv("DEEPL_API_KEY")
if not api_key:
raise RuntimeError("DEEPL_API_KEY is not set")
self._translator = deepl.Translator(api_key)

def translate(self, text: str, target_lang: str) -> str:
result = self._translator.translate_text(text, target_lang=target_lang)
return result.text


def build() -> Translator:
return DeepLTranslator()


60 changes: 60 additions & 0 deletions services/factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
from pathlib import Path
try:
from dotenv import load_dotenv # type: ignore
# Ensure we load .env from the project root (parent of services/)
project_root_env = Path(__file__).resolve().parent.parent / ".env"
print(f"Loading .env from: {project_root_env}")
load_dotenv(dotenv_path=project_root_env, override=False)
except Exception:
# If dotenv isn't installed, ignore; env vars may still be provided by shell
pass
from typing import Tuple

from .translation import Translator
from .tts import TextToSpeech


def get_translator() -> Translator:
provider = os.getenv("TRANSLATOR_PROVIDER", "openai").lower()
if provider == "deepl":
from .deepl_translator import build as build_deepl

return build_deepl()
if provider == "openai":
from .openai_translator import build as build_openai

return build_openai()
if provider == "none":
return _NoOpTranslator()
# default to libre
from .libre_translator import build as build_libre

return build_libre()


def get_tts() -> TextToSpeech:
provider = os.getenv("TTS_PROVIDER", "openai").lower()
if provider == "polly":
from .polly_tts import build as build_polly

return build_polly()
if provider == "openai":
from .openai_tts import build as build_openai_tts

return build_openai_tts()
# default to noop
from .noop_tts import build as build_noop

return build_noop()


def get_providers() -> Tuple[Translator, TextToSpeech]:
return get_translator(), get_tts()


class _NoOpTranslator:
def translate(self, text: str, target_lang: str) -> str:
return text


27 changes: 27 additions & 0 deletions services/libre_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
import requests
from .translation import Translator


class LibreTranslator:
def __init__(self) -> None:
self._url = os.getenv("LIBRE_TRANSLATE_URL", "https://libretranslate.com")
# LibreTranslate expects language codes like 'en', 'ja'

def translate(self, text: str, target_lang: str) -> str:
target = target_lang.split("-")[0].lower()
# Auto-detect source; specify target
resp = requests.post(
f"{self._url.rstrip('/')}/translate",
json={"q": text, "source": "auto", "target": target, "format": "text"},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
return data.get("translatedText", "")


def build() -> Translator:
return LibreTranslator()


16 changes: 16 additions & 0 deletions services/noop_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .tts import TextToSpeech


class NoOpTTS:
def synthesize_ssml(self, ssml: str, outfile: str, voice: str | None = None) -> None:
# Write a tiny silent MP3 header so audio widgets don't break.
# This is a 1-second silent MP3 frame sequence (very small placeholder).
silent_bytes = bytes([0x49, 0x44, 0x33, 0x03, 0x00, 0x00]) # minimal ID3 header
with open(outfile, "wb") as f:
f.write(silent_bytes)


def build() -> TextToSpeech:
return NoOpTTS()


30 changes: 30 additions & 0 deletions services/openai_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
from openai import OpenAI
from .translation import Translator


class OpenAITranslator:
def __init__(self) -> None:
# OPENAI_API_KEY must be set in env for OpenAI SDK
self._client = OpenAI()
self._model = os.getenv("OPENAI_TRANSLATION_MODEL", "gpt-4o-mini")

def translate(self, text: str, target_lang: str) -> str:
# Normalize target like EN-US -> English (United States) simple prompt
lang = target_lang
prompt = (
"You are a high-quality translator. Translate the following text "
f"into {lang}. Return only the translated text without quotes or extra commentary.\n\n"
f"Text: {text}"
)
resp = self._client.responses.create(
model=self._model,
input=prompt,
)
return (resp.output_text or "").strip()


def build() -> Translator:
return OpenAITranslator()


25 changes: 25 additions & 0 deletions services/openai_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
from openai import OpenAI
from .tts import TextToSpeech


class OpenAITTS:
def __init__(self) -> None:
self._client = OpenAI()
self._model = os.getenv("OPENAI_TTS_MODEL", "gpt-4o-mini-tts")

def synthesize_ssml(self, ssml: str, outfile: str, voice: str | None = None) -> None:
v = voice or os.getenv("OPENAI_TTS_VOICE", "alloy")
# Use streaming response helper to write MP3 directly
with self._client.audio.speech.with_streaming_response.create(
model=self._model,
voice=v,
input=ssml,
) as response:
response.stream_to_file(outfile)


def build() -> TextToSpeech:
return OpenAITTS()


24 changes: 24 additions & 0 deletions services/polly_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
from contextlib import closing
from pathlib import Path
import boto3
from .tts import TextToSpeech


class PollyTTS:
def __init__(self) -> None:
self._client = boto3.client("polly")

def synthesize_ssml(self, ssml: str, outfile: str, voice: str | None = None) -> None:
voice_id = voice or os.getenv("TTS_VOICE", "Mizuki")
result = self._client.synthesize_speech(
Text=ssml, OutputFormat="mp3", TextType="ssml", VoiceId=voice_id
)
with closing(result["AudioStream"]) as stream:
Path(outfile).write_bytes(stream.read())


def build() -> TextToSpeech:
return PollyTTS()


8 changes: 8 additions & 0 deletions services/translation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Protocol


class Translator(Protocol):
def translate(self, text: str, target_lang: str) -> str: # pragma: no cover - protocol
...


8 changes: 8 additions & 0 deletions services/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Protocol, Optional


class TextToSpeech(Protocol):
def synthesize_ssml(self, ssml: str, outfile: str, voice: Optional[str] = None) -> None: # pragma: no cover - protocol
...