Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@ AZURE_WORKSPACE=
# Docker parameters
DOCKER_REGISTRY=
DOCKER_TAG=
## Gitlab tokens
DOCKER_USERNAME=
DOCKER_PASSWORD=
DOCKER_PASSWORD=
2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ jobs:
- uses: pdm-project/[email protected]

- name: Publish package distributions to PyPI
run:
run:
pdm publish
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@ It aims to:

Simply run
```bash
pip install ezazml
pip install ez-azml
```
# Quickstart
The main entrypoint is the CLI command. The following command will show you the help
```
ez-azml -h
```

You can run an example with
You can run an example with (if you have cloned the repo)
## Command
```
ez-azml --config configs/pytorch/main.yaml run
ez-azml --config configs/command/main.yaml run
```
## Pipeline
## PipelineRun
```
ez-azml --config configs/pipeline/main.yaml run
```
PYTHONPATH=$PYTHONPATH:. ez-azml --config configs/pipeline/main.yaml run
```
(`PYTHONPATH` to point to the examples dir. This is not needed if your pipeline is available in any package as an importable function)
6 changes: 3 additions & 3 deletions configs/pytorch/main.yaml → configs/command/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cloud_run:
class_path: ez_azml.cloud_runs.DockerCommand
class_path: ez_azml.cloud_runs.DockerCommandRun
init_args:
docker: docker.yaml
docker: ../dockers/pytorch.yaml
code: examples
compute: ../clusters/cpu.yaml
inputs:
Expand All @@ -20,7 +20,7 @@ cloud_run:
mode: rw_mount
commands: [
echo 'This is an example',
python pytorch.py
python pytorch_script.py
]
flags: [
"--learning_rate 1e-6",
Expand Down
File renamed without changes.
7 changes: 1 addition & 6 deletions configs/environments/conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,5 @@ name: default_environment
channels:
- defaults
dependencies:
- python=3.8.12
- python=3.9.12
- pip=21.2.2
- pip:
- mldesigner==0.1.0b17
- azure-ai-ml==1.18.0
- azureml-mlflow==1.56.0
- ez-azml==0.1.0
55 changes: 51 additions & 4 deletions configs/pipeline/main.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,59 @@
cloud_run:
class_path: ez_azml.cloud_runs.Pipeline
class_path: ez_azml.cloud_runs.PipelineRun
init_args:
experiment_name: "pipelines example"
commands:
- function: examples.pipelines.test_fn
environment: ../environments/pipeline.yaml
torch:
class_path: ez_azml.cloud_runs.DockerCommandRun
init_args:
docker: ../dockers/pytorch.yaml
code: examples/pytorch_script.py
compute: ../clusters/cpu.yaml
inputs:
data_path:
class_path: azure.ai.ml.Input
init_args:
type: uri_folder
path: azureml://datastores/workspaceblobstore/paths/data
mode: ro_mount
outputs:
output_path:
class_path: azure.ai.ml.Output
init_args:
type: uri_folder
path: azureml://datastores/workspaceblobstore/paths/outputs
mode: rw_mount
commands: [
echo 'This is an example',
python pytorch_script.py
]
flags: [
"--learning_rate 1e-6",
"--output_path ${{outputs.output_path}}"
#"The input is ignored, could be used as ${{inputs.data_path}} through flags"
]
register_kwargs:
version: 2
command:
class_path: ez_azml.cloud_runs.CommandRun
init_args:
name: print_output
compute: ../clusters/cpu.yaml
code: examples/print_output.py
environment: ../environments/pipeline.yaml
inputs:
prev_output:
class_path: azure.ai.ml.Input
init_args:
type: uri_folder
commands: [
echo 'This is an print example',
"python print_output.py --path ${{inputs.prev_output}}"
]
register_kwargs:
version: 2

pipeline: examples.pipelines.test_pipeline
pipeline: examples.pipeline.ez_azml_pipeline
compute: ../clusters/cpu_raw.yaml
inputs:
test_input:
Expand Down
6 changes: 6 additions & 0 deletions examples/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def ez_azml_pipeline(test_input):
"""Example pipeline function."""
# pytorch_script is the name of the registered component
test_result = pytorch_script(data_path=test_input) # type: ignore # noqa F821
print_output(prev_output=test_result.outputs.output_path) # type: ignore # noqa F821
return {"test_output": test_result.outputs.output_path}
16 changes: 0 additions & 16 deletions examples/pipelines.py

This file was deleted.

11 changes: 11 additions & 0 deletions examples/print_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import argparse
from pathlib import Path

# Argument parsing
parser = argparse.ArgumentParser(description="Print input")
parser.add_argument("--path", type=Path, default="outputs", help="output to read")
args = parser.parse_args()
path: Path = args.path
for file in path.glob("*.txt"):
content = (path / file).read_text()
print(content)
File renamed without changes.
7 changes: 7 additions & 0 deletions src/ez_azml/cli/ez_azml.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ def run(self):
output = self.cloud_run.run()
logger.info(f"Run available at {output.url}")
self.cloud_run.on_run_end(output)

def register(self):
"""Registers the cloud run as a reusable component."""
self.cloud_run.on_register_start()
self.cloud_run.register()
self.cloud_run.on_register_end()
logger.info("Cloud run registered!")
6 changes: 3 additions & 3 deletions src/ez_azml/cloud_runs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .commands import Command, DockerCommand
from .pipelines import Pipeline
from .commands import CommandRun, DockerCommandRun
from .pipelines import PipelineRun

__all__ = ["Command", "DockerCommand", "Pipeline"]
__all__ = ["CommandRun", "DockerCommandRun", "PipelineRun"]
12 changes: 12 additions & 0 deletions src/ez_azml/cloud_runs/cloud_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,15 @@ def run(self) -> RunOutput:
def on_run_end(self, output: RunOutput):
"""Hook called once run has been submitted."""
return

def on_register_start(self):
"""Hook called before a component has been registered."""
return

@abstractmethod
def register(self):
"""Registers the run as a reusable component."""

def on_register_end(self):
"""Hook called after a component has been registered."""
return
6 changes: 3 additions & 3 deletions src/ez_azml/cloud_runs/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .command import Command
from .docker_command import DockerCommand
from .command import CommandRun
from .docker_command import DockerCommandRun

__all__ = ["DockerCommand", "Command"]
__all__ = ["DockerCommandRun", "CommandRun"]
97 changes: 80 additions & 17 deletions src/ez_azml/cloud_runs/commands/command.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,117 @@
from typing import Optional
import io
from pathlib import Path
from typing import Any, Optional, Union

from azure.ai.ml import command
from azure.ai.ml.entities import (
UserIdentityConfiguration,
WorkspaceConnection,
)
import yaml
from azure.ai.ml import Input, MLClient, Output, command, load_component
from azure.ai.ml.entities import Command, UserIdentityConfiguration, WorkspaceConnection
from typing_extensions import override

from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput


class Command(CloudRun):
class CommandRun(CloudRun):
"""Cloud run that is based on AzureML Commands.

Args:
code: location of the scripts to use
code: location of the python scripts to use
commands: commands to run on the cloud (e.g. `python my_script.py`)
flags: flags to use with the last command.
identity: credentials to use.
name: command's name.
register_kwargs: kwargs to use when registering component.
"""

def __init__(
self,
code: str,
commands: list[str],
code: Union[str, Path],
commands: Union[str, list[str]],
ws_connection: Optional[WorkspaceConnection] = None,
flags: Optional[list[str]] = None,
identity: Optional[UserIdentityConfiguration] = None,
name: Optional[str] = None,
register_kwargs: Optional[dict[str, Any]] = None,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.ws_connection = ws_connection
identity = identity or UserIdentityConfiguration()
self.identity = identity or UserIdentityConfiguration()
if isinstance(commands, str):
commands = [commands]
if flags:
commands[-1] += " " + " ".join(flags)
self.job = command(
command=";".join(commands),
code=code,
self.commands = commands
self.code = Path(code)
self.name = name or self.code.stem
self.register_kwargs = register_kwargs or {}

@property
def cli_command(self) -> str:
"""Actual cli command run on AzureML."""
return ";".join(self.commands)

@property
def command(self) -> Command:
"""Runnable command."""
return command(
command=self.cli_command,
code=self.code,
environment=self.environment,
compute=self.compute.name,
inputs=self.inputs,
outputs=self.outputs,
identity=identity,
identity=self.identity,
)

def _get_io_dict(
self, ios: dict[str, Union[Input, Output]], keys: Optional[list[str]] = None
):
ios_as_dict = {}
keys = keys or ["type"]
for key, io_obj in ios.items():
io_d = dict(io_obj)
ios_as_dict[key] = {k: io_d[k] for k in keys if io_d[k] is not None}
return ios_as_dict

def _get_component_yaml_stream(
self, name: Optional[str] = None, environment: Optional[str] = None, **kwargs
) -> io.StringIO:
inputs_dict = self._get_io_dict(self.inputs)
outputs_dict = self._get_io_dict(self.outputs)
if not environment:
self.ml_client.environments.create_or_update(self.environment)
version = self.environment.version or 1
environment = f"azureml:{self.environment.name}:{version}"
yaml_dict = {
"name": name or self.name,
"inputs": inputs_dict,
"outputs": outputs_dict,
"code": str(self.code),
"command": self.cli_command,
"environment": environment,
**kwargs,
}
yaml_stream = io.StringIO()
yaml.dump(yaml_dict, yaml_stream)
yaml_stream.seek(0) # Move the file pointer to the beginning
return yaml_stream

def get_component(self, **kwargs):
"""Returns the mldesigner component."""
yaml_file = self._get_component_yaml_stream(**self.register_kwargs, **kwargs)
return load_component(yaml_file)

@override
def register(self, ml_client: Optional[MLClient] = None, **kwargs):
ml_client = ml_client or self.ml_client
component = self.get_component(**kwargs)
ml_client.components.create_or_update(component)
return component

@override
def run(self) -> str:
if self.ws_connection:
self.ml_client.connections.create_or_update(self.ws_connection)
self.ml_client.environments.create_or_update(self.environment)
self.ml_client.begin_create_or_update(self.compute).result()
cloud_job = self.ml_client.create_or_update(self.job)
cloud_job = self.ml_client.create_or_update(self.command)
return RunOutput(url=cloud_job.studio_url)
4 changes: 2 additions & 2 deletions src/ez_azml/cloud_runs/commands/docker_command.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from ez_azml.entities import DockerEnvironment, DockerWorkspaceConnection
from ez_azml.params import DockerParams

from .command import Command
from .command import CommandRun


class DockerCommand(Command):
class DockerCommandRun(CommandRun):
"""A Command that uses a docker image.

Args:
Expand Down
4 changes: 2 additions & 2 deletions src/ez_azml/cloud_runs/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .pipeline import Pipeline, PipelineCommand
from .pipeline import PipelineRun

__all__ = ["Pipeline", "PipelineCommand"]
__all__ = ["PipelineRun"]
Loading