Skip to content

Optionally persist to a dataset #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

## Features
- **API compatible** with `wandb.init`, `wandb.log`, and `wandb.finish` (drop-in replacement: just use `trackio` instead of `wandb`)
- Store logs in a Hugging Face Datasets-compatible format (Parquet)
- Persists logs in a private Hugging Face Dataset
- Visualize experiments with a Gradio dashboard
- *Local-first* design: dashboard runs locally by default. You can also host it on Spaces by specifying a `space_id` parameter in `init`.
- Everything here, including hosting on Spaces, is **free**!

Trackio is designed to be lightweight (<500 lines of code total), not fully-featured. It is designed in a modular way so that developers can easily fork the repository and add functionality that they care about.
Trackio is designed to be lightweight (<1000 lines of Python code total), not fully-featured. It is designed in an extensible way and written entirely in Python so that developers can easily fork the repository and add functionality that they care about.


## Installation
Expand Down
56 changes: 56 additions & 0 deletions examples/persist-dataset-on-spaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import random

from tqdm import tqdm

import trackio as wandb

wandb.init(
project="fake-training",
name="test-run",
config=dict(
epochs=5,
learning_rate=0.001,
batch_size=32,
),
dataset_id="abidlabs/metrics-123",
space_id="abidlabs/trackio-123",
)

EPOCHS = 5
NUM_TRAIN_BATCHES = 100
NUM_VAL_BATCHES = 20

for epoch in range(EPOCHS):
train_loss = 0
train_accuracy = 0
val_loss = 0
val_accuracy = 0

for _ in tqdm(range(NUM_TRAIN_BATCHES), desc=f"Epoch {epoch + 1} - Training"):
loss = random.uniform(0.2, 1.0)
accuracy = random.uniform(0.6, 0.95)
train_loss += loss
train_accuracy += accuracy

for _ in tqdm(range(NUM_VAL_BATCHES), desc=f"Epoch {epoch + 1} - Validation"):
loss = random.uniform(0.2, 0.9)
accuracy = random.uniform(0.65, 0.98)
val_loss += loss
val_accuracy += accuracy

train_loss /= NUM_TRAIN_BATCHES
train_accuracy /= NUM_TRAIN_BATCHES
val_loss /= NUM_VAL_BATCHES
val_accuracy /= NUM_VAL_BATCHES

wandb.log(
{
"epoch": epoch + 1,
"train_loss": train_loss,
"train_accuracy": train_accuracy,
"val_loss": val_loss,
"val_accuracy": val_accuracy,
}
)

wandb.finish()
55 changes: 55 additions & 0 deletions examples/persist-dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import random

from tqdm import tqdm

import trackio as wandb

wandb.init(
project="fake-training",
name="test-run",
config=dict(
epochs=5,
learning_rate=0.001,
batch_size=32,
),
dataset_id="abidlabs/metrics",
)

EPOCHS = 5
NUM_TRAIN_BATCHES = 100
NUM_VAL_BATCHES = 20

for epoch in range(EPOCHS):
train_loss = 0
train_accuracy = 0
val_loss = 0
val_accuracy = 0

for _ in tqdm(range(NUM_TRAIN_BATCHES), desc=f"Epoch {epoch + 1} - Training"):
loss = random.uniform(0.2, 1.0)
accuracy = random.uniform(0.6, 0.95)
train_loss += loss
train_accuracy += accuracy

for _ in tqdm(range(NUM_VAL_BATCHES), desc=f"Epoch {epoch + 1} - Validation"):
loss = random.uniform(0.2, 0.9)
accuracy = random.uniform(0.65, 0.98)
val_loss += loss
val_accuracy += accuracy

train_loss /= NUM_TRAIN_BATCHES
train_accuracy /= NUM_TRAIN_BATCHES
val_loss /= NUM_VAL_BATCHES
val_accuracy /= NUM_VAL_BATCHES

wandb.log(
{
"epoch": epoch + 1,
"train_loss": train_loss,
"train_accuracy": train_accuracy,
"val_loss": val_loss,
"val_accuracy": val_accuracy,
}
)

wandb.finish()
Binary file modified tests/__pycache__/test_run.cpython-312-pytest-8.3.4.pyc
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ def test_run_log_calls_client():
metrics = {"x": 1}
run.log(metrics)
client.predict.assert_called_once_with(
api_name="/log", project="proj", run="run1", metrics=metrics
api_name="/log", project="proj", run="run1", metrics=metrics, dataset_id=None
)
20 changes: 16 additions & 4 deletions trackio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def init(
project: str,
name: str | None = None,
space_id: str | None = None,
dataset_id: str | None = None,
config: dict | None = None,
) -> Run:
"""
Expand All @@ -43,6 +44,7 @@ def init(
project: The name of the project (can be an existing project to continue tracking or a new project to start tracking from scratch).
name: The name of the run (if not provided, a default name will be generated).
space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame". If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
dataset_id: If provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Should be a complete Dataset name like "username/datasetname". If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it.
config: A dictionary of configuration options. Provided for compatibility with wandb.init()
"""
if not current_server.get() and space_id is None:
Expand All @@ -63,31 +65,41 @@ def init(
)
print(f'* or by running in Python: trackio.show(project="{project}")')
else:
create_space_if_not_exists(space_id)
create_space_if_not_exists(space_id, dataset_id)
print(
f"* View dashboard by going to: {SPACE_URL.format(space_id=space_id)}"
)
current_project.set(project)

space_or_url = space_id if space_id else url
client = Client(space_or_url, verbose=False)
run = Run(project=project, client=client, name=name, config=config)
run = Run(
project=project, client=client, name=name, config=config, dataset_id=dataset_id
)
current_run.set(run)
globals()["config"] = run.config
return run


def create_space_if_not_exists(space_id: str) -> None:
def create_space_if_not_exists(
space_id: str,
dataset_id: str | None = None,
) -> None:
"""
Creates a new Hugging Face Space if it does not exist.

Args:
space_id: The ID of the Space to create.
dataset_id: The ID of the Dataset to create.
"""
if "/" not in space_id:
raise ValueError(
f"Invalid space ID: {space_id}. Must be in the format: username/reponame."
)
if dataset_id is not None and "/" not in dataset_id:
raise ValueError(
f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname."
)
try:
huggingface_hub.repo_info(space_id, repo_type="space")
print(f"* Found existing space: {SPACE_URL.format(space_id=space_id)}")
Expand All @@ -96,7 +108,7 @@ def create_space_if_not_exists(space_id: str) -> None:
pass

print(f"* Creating new space: {SPACE_URL.format(space_id=space_id)}")
deploy_as_space(space_id)
deploy_as_space(space_id, dataset_id)

client = None
for _ in range(30):
Expand Down
14 changes: 13 additions & 1 deletion trackio/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import huggingface_hub


def deploy_as_space(title: str):
def deploy_as_space(
title: str,
dataset_id: str | None = None,
):
if (
os.getenv("SYSTEM") == "spaces"
): # in case a repo with this function is uploaded to spaces
Expand Down Expand Up @@ -55,3 +58,12 @@ def deploy_as_space(title: str):
folder_path=trackio_path,
ignore_patterns=["README.md"],
)

hf_token = huggingface_hub.utils.get_token()
if hf_token is not None:
huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
if dataset_id is not None:
huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
# So that the dataset id is available to the sqlite_storage.py file
# if running locally as well.
os.environ["TRACKIO_DATASET_ID"] = dataset_id
12 changes: 12 additions & 0 deletions trackio/dummy_commit_scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# A dummy object to fit the interface of huggingface_hub's CommitScheduler
class DummyCommitSchedulerLock:
def __enter__(self):
return None

def __exit__(self, exception_type, exception_value, exception_traceback):
pass


class DummyCommitScheduler:
def __init__(self):
self.lock = DummyCommitSchedulerLock()
8 changes: 7 additions & 1 deletion trackio/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@ def __init__(
client: Client,
name: str | None = None,
config: dict | None = None,
dataset_id: str | None = None,
):
self.project = project
self.client = client
self.name = name or generate_readable_name()
self.config = config or {}
self.dataset_id = dataset_id

def log(self, metrics: dict):
self.client.predict(
api_name="/log", project=self.project, run=self.name, metrics=metrics
api_name="/log",
project=self.project,
run=self.name,
metrics=metrics,
dataset_id=self.dataset_id,
)

def finish(self):
Expand Down
Loading