google-deepmind · copybara-service · May 7, 2025 · May 7, 2025
diff --git a/.github/workflows/core_test.yml b/.github/workflows/core_test.yml
@@ -35,4 +35,4 @@ jobs:
           pip freeze
       - name: Test with pytest  # TODO(team): Fix tensorflow version conflict.
         run: |
-          # pytest -n auto iris
+          # pytest -n auto iris/coordinator_test.py
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # Iris: Synchronous and Distributed Blackbox Optimization at Scale
-[![Continuous Integration](https://github.com/google-deepmind/iris/actions/workflows/core_test.yml/badge.svg)](https://github.com/google/vizier/actions/workflows/ci.yml?query=branch%3Amain)
+[![Continuous Integration](https://github.com/google-deepmind/iris/actions/workflows/core_test.yml/badge.svg)](https://github.com/google-deepmind/iris/actions?query=branch%3Amain)
 
 ## Overview
 Iris is a library for performing synchronous and distributed zeroth-order
@@ -12,17 +12,21 @@ blackbox function.
 To launch a local optimization, run:
 
 ```bash
+cd iris
+
 python3 -m launch \
 --lp_launch_type=local_mp \
 --experiment_name=iris_example \
---config=iris/configs/simple_example_config.py \
+--config=configs/simple_example_config.py \
 --logdir=/tmp/bblog \
 --num_workers=16 \
 --num_eval_workers=10 \
 --alsologtostderr
 ```
 
 ## Associated Publications
+
+* [Achieving Human Level Competitive Robot Table Tennis](https://arxiv.org/abs/2408.03906) (ICRA 2025 - Best Paper Award Finalist)
 * [SARA-RT: Scaling up Robotics Transformers with Self-Adaptive Robust Attention](https://arxiv.org/abs/2312.01990) (ICRA 2024 - Best Robotic Manipulation Award)
 * [Embodied AI with Two Arms: Zero-shot Learning, Safety and Modularity](https://arxiv.org/abs/2404.03570) (IROS 2024 - Robocup Best Paper Award)
 * [Agile Catching with Whole-Body MPC and Blackbox Policy Learning](https://arxiv.org/abs/2306.08205) (L4DC 2023)

diff --git a/iris/algorithms/ars_algorithm.py b/iris/algorithms/ars_algorithm.py
@@ -17,7 +17,7 @@
 import math
 from typing import Any, Callable, Dict, Optional, Sequence
 
-from iris import normalizer
+from iris import buffer
 from iris.algorithms import algorithm
 from iris.algorithms import stateless_perturbation_generators
 from iris.workers import worker_util
@@ -38,7 +38,7 @@ def __init__(
       orthogonal_suggestions: bool = False,
       quasirandom_suggestions: bool = False,
       top_sort_type: str = "max",
-      obs_norm_data_buffer: Optional[normalizer.MeanStdBuffer] = None,
+      obs_norm_data_buffer: Optional[buffer.MeanStdBuffer] = None,
       **kwargs,
   ) -> None:
     """Initializes the augmented random search algorithm.

diff --git a/iris/algorithms/ars_algorithm_test.py b/iris/algorithms/ars_algorithm_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from iris import normalizer
+from iris import buffer
 from iris.algorithms import ars_algorithm
 from iris.workers import worker_util
 import numpy as np
@@ -85,7 +85,7 @@ def test_restore_state_from_checkpoint(self, expected_obs_norm_state):
         top_percentage=1,
         orthogonal_suggestions=True,
         quasirandom_suggestions=True,
-        obs_norm_data_buffer=normalizer.MeanStdBuffer()
+        obs_norm_data_buffer=buffer.MeanStdBuffer()
         if expected_obs_norm_state is not None
         else None,
         random_seed=7,

diff --git a/iris/algorithms/cma_algorithm.py b/iris/algorithms/cma_algorithm.py
@@ -17,7 +17,7 @@
 from typing import Any, Dict, Optional, Sequence
 
 import cma
-from iris import normalizer
+from iris import buffer
 from iris.algorithms import algorithm
 from iris.workers import worker_util
 import numpy as np
@@ -34,11 +34,13 @@ class CMAES(algorithm.BlackboxAlgorithm):
      to implement the algorithm.
   """
 
-  def __init__(self,
-               std: float = 0.3,
-               bounds: Sequence[float] = (-1, 1),
-               obs_norm_data_buffer: Optional[normalizer.MeanStdBuffer] = None,
-               **kwargs) -> None:
+  def __init__(
+      self,
+      std: float = 0.3,
+      bounds: Sequence[float] = (-1, 1),
+      obs_norm_data_buffer: Optional[buffer.MeanStdBuffer] = None,
+      **kwargs
+  ) -> None:
     """Initializes the augmented random search algorithm.
 
     Args:

diff --git a/iris/algorithms/learnable_ars_algorithm.py b/iris/algorithms/learnable_ars_algorithm.py
@@ -20,8 +20,8 @@
 
 from absl import logging
 from flax import linen as nn
+from iris import buffer
 from iris import checkpoint_util
-from iris import normalizer
 from iris.algorithms import ars_algorithm
 from iris.algorithms import stateless_perturbation_generators
 from iris.workers import worker_util
@@ -63,7 +63,7 @@ def __init__(
       orthogonal_suggestions: bool = False,
       quasirandom_suggestions: bool = False,
       top_sort_type: str = "max",
-      obs_norm_data_buffer: Optional[normalizer.MeanStdBuffer] = None,
+      obs_norm_data_buffer: Optional[buffer.MeanStdBuffer] = None,
       seed: int = 42,
       reward_buffer_size: int = 10,
       **kwargs,

diff --git a/iris/algorithms/multi_agent_ars_algorithm.py b/iris/algorithms/multi_agent_ars_algorithm.py
@@ -18,8 +18,8 @@
 from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
 from absl import logging
+from iris import buffer
 from iris import checkpoint_util
-from iris import normalizer
 from iris.algorithms import ars_algorithm
 from iris.workers import worker_util
 import numpy as np
@@ -36,7 +36,7 @@ def __init__(
       orthogonal_suggestions: bool = False,
       quasirandom_suggestions: bool = False,
       top_sort_type: str = "max",
-      obs_norm_data_buffer: Optional[normalizer.MeanStdBuffer] = None,
+      obs_norm_data_buffer: Optional[buffer.MeanStdBuffer] = None,
       agent_keys: Optional[List[str]] = None,
       restore_state_from_single_agent: bool = False,
       **kwargs,

diff --git a/iris/algorithms/multi_agent_ars_algorithm_test.py b/iris/algorithms/multi_agent_ars_algorithm_test.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import os
+from iris import buffer
 from iris import checkpoint_util
-from iris import normalizer
 from iris.algorithms import multi_agent_ars_algorithm
 from iris.workers import worker_util
 import numpy as np
@@ -214,7 +214,7 @@ def test_restore_state_from_checkpoint(
         top_percentage=1,
         orthogonal_suggestions=True,
         quasirandom_suggestions=True,
-        obs_norm_data_buffer=normalizer.MeanStdBuffer()
+        obs_norm_data_buffer=buffer.MeanStdBuffer()
         if state['obs_norm_state'] is not None
         else None,
         agent_keys=[str(i) for i in range(num_agents)],
@@ -430,7 +430,7 @@ def test_maybe_save_custom_checkpoint(
         top_percentage=1,
         orthogonal_suggestions=True,
         quasirandom_suggestions=True,
-        obs_norm_data_buffer=normalizer.MeanStdBuffer()
+        obs_norm_data_buffer=buffer.MeanStdBuffer()
         if state['obs_norm_state'] is not None
         else None,
         agent_keys=[str(i) for i in range(num_agents)],
@@ -468,7 +468,7 @@ def test_split_checkpoint(self):
         top_percentage=1,
         orthogonal_suggestions=True,
         quasirandom_suggestions=True,
-        obs_norm_data_buffer=normalizer.MeanStdBuffer(),
+        obs_norm_data_buffer=buffer.MeanStdBuffer(),
         agent_keys=[str(i) for i in range(3)],
         random_seed=7,
     )

diff --git a/iris/algorithms/pes_algorithm.py b/iris/algorithms/pes_algorithm.py
@@ -16,7 +16,7 @@
 
 from typing import Any, Dict, Optional, Sequence
 
-from iris import normalizer
+from iris import buffer
 from iris.algorithms import algorithm
 from iris.algorithms import stateless_perturbation_generators
 from iris.workers import worker_util
@@ -26,16 +26,18 @@
 class PersistentES(algorithm.BlackboxAlgorithm):
   """Augmented random search algorithm for blackbox optimization."""
 
-  def __init__(self,
-               std: float,
-               step_size: float,
-               top_percentage: float = 1.0,
-               orthogonal_suggestions: bool = False,
-               quasirandom_suggestions: bool = False,
-               top_sort_type: str = "max",
-               obs_norm_data_buffer: Optional[normalizer.MeanStdBuffer] = None,
-               partial_rollout_length: Optional[int] = 5,
-               **kwargs) -> None:
+  def __init__(
+      self,
+      std: float,
+      step_size: float,
+      top_percentage: float = 1.0,
+      orthogonal_suggestions: bool = False,
+      quasirandom_suggestions: bool = False,
+      top_sort_type: str = "max",
+      obs_norm_data_buffer: Optional[buffer.MeanStdBuffer] = None,
+      partial_rollout_length: Optional[int] = 5,
+      **kwargs
+  ) -> None:
     """Initializes the augmented random search algorithm.
 
     Args:

diff --git a/iris/algorithms/pyribs_algorithm.py b/iris/algorithms/pyribs_algorithm.py
@@ -25,7 +25,7 @@
 import dataclasses
 from typing import Any, Dict, Sequence
 
-from iris import normalizer
+from iris import buffer as buffer_lib
 from iris.algorithms import algorithm
 from iris.workers import worker_util
 import numpy as np
@@ -41,9 +41,9 @@
 _SOLUTION = "solution"
 # Extra column names for storing normalizer data with solutions.
 _OBS_NORM_PREFIX = "obs_norm_"
-_OBS_NORM_MEAN = _OBS_NORM_PREFIX + normalizer.MEAN
-_OBS_NORM_STD = _OBS_NORM_PREFIX + normalizer.STD
-_OBS_NORM_N = _OBS_NORM_PREFIX + normalizer.N
+_OBS_NORM_MEAN = _OBS_NORM_PREFIX + buffer_lib.MEAN
+_OBS_NORM_STD = _OBS_NORM_PREFIX + buffer_lib.STD
+_OBS_NORM_N = _OBS_NORM_PREFIX + buffer_lib.N
 
 
 @dataclasses.dataclass(frozen=True)
@@ -69,7 +69,7 @@ class PyRibsAlgorithm(algorithm.BlackboxAlgorithm):
   def __init__(
       self,
       measure_specs: Sequence[MeasureSpec],
-      obs_norm_data_buffer: normalizer.MeanStdBuffer,
+      obs_norm_data_buffer: buffer_lib.MeanStdBuffer,
       initial_step_size: float,
       num_suggestions_per_emitter: int,
       num_emitters: int,
@@ -136,8 +136,8 @@ def _init_scheduler(
         ranges=self._archive_ranges,
         qd_score_offset=self._qd_score_offset,
         extra_fields={
-            _OBS_NORM_MEAN: (buffer_state[normalizer.MEAN].size, np.float32),
-            _OBS_NORM_STD: (buffer_state[normalizer.STD].size, np.float32),
+            _OBS_NORM_MEAN: (buffer_state[buffer_lib.MEAN].size, np.float32),
+            _OBS_NORM_STD: (buffer_state[buffer_lib.STD].size, np.float32),
             _OBS_NORM_N: ((), np.int32),
         },
     )
@@ -176,9 +176,9 @@ def get_param_suggestions(
       elite = self._archive.best_elite
       param_suggestions = [elite[_SOLUTION]] * self._num_evals
       buffer = {
-          normalizer.N: elite[_OBS_NORM_N],
-          normalizer.MEAN: elite[_OBS_NORM_MEAN],
-          normalizer.STD: elite[_OBS_NORM_STD],
+          buffer_lib.N: elite[_OBS_NORM_N],
+          buffer_lib.MEAN: elite[_OBS_NORM_MEAN],
+          buffer_lib.STD: elite[_OBS_NORM_STD],
       }
     else:
       param_suggestions = self._scheduler.ask()
@@ -205,9 +205,9 @@ def process_evaluations(
       self._obs_norm_data_buffer.merge(result.obs_norm_buffer_data)
       objective.append(result.value)
       measures.append([result.metrics[name] for name in self._measure_names])
-      obs_norm_n.append(result.obs_norm_buffer_data[normalizer.N])
-      obs_norm_std.append(result.obs_norm_buffer_data[normalizer.STD])
-      obs_norm_mean.append(result.obs_norm_buffer_data[normalizer.MEAN])
+      obs_norm_n.append(result.obs_norm_buffer_data[buffer_lib.N])
+      obs_norm_std.append(result.obs_norm_buffer_data[buffer_lib.STD])
+      obs_norm_mean.append(result.obs_norm_buffer_data[buffer_lib.MEAN])
 
     # Store the state of the obs_norm_buffer for each solution so that it can be
     # reproduced later when evaluating the policy, similar to other algorithms

diff --git a/iris/algorithms/pyribs_algorithm_test.py b/iris/algorithms/pyribs_algorithm_test.py
@@ -14,7 +14,7 @@
 
 from unittest import mock
 
-from iris import normalizer
+from iris import buffer
 from iris.algorithms import algorithm
 from iris.algorithms import pyribs_algorithm
 from iris.workers import worker_util
@@ -35,7 +35,7 @@ def setUp(self):
     # Basic parameters chosen to be simple enough to not distract from the
     # algorithm logic but with enough complexity to test functionality e.g.
     # using multiple measure specs.
-    self.buffer = normalizer.MeanStdBuffer(shape=(8,))
+    self.buffer = buffer.MeanStdBuffer(shape=(8,))
     self.num_suggestions_per_emitter = 10
     self.num_emitters = 20
     self.initial_step_size = 1.0
@@ -93,17 +93,16 @@ def test_get_param_suggestions_for_eval(self):
         worker_util.EvaluationResult(
             params_evaluated=suggestion[algorithm.PARAMS_TO_EVAL],
             value=1,
-            obs_norm_buffer_data=suggestion[
-                algorithm.OBS_NORM_BUFFER_STATE
-            ] | {normalizer.N: 1, normalizer.UNNORM_VAR: np.ones((8,))},
+            obs_norm_buffer_data=suggestion[algorithm.OBS_NORM_BUFFER_STATE]
+            | {buffer.N: 1, buffer.UNNORM_VAR: np.ones((8,))},
             metrics={'x': 1, 'y': 10},
         )
         for suggestion in suggestions
     ]
     # Give the first evaluation a high score so it is the elite.
     evaluations[0].value = 1000
     if evaluations[0].obs_norm_buffer_data is not None:
-      evaluations[0].obs_norm_buffer_data[normalizer.N] = 1000
+      evaluations[0].obs_norm_buffer_data[buffer.N] = 1000
     self.test_algorithm.process_evaluations(evaluations)
 
     eval_suggestions = self.test_algorithm.get_param_suggestions(evaluate=True)
@@ -115,13 +114,13 @@ def test_get_param_suggestions_for_eval(self):
           evaluations[0].params_evaluated
       )
       np.testing.assert_equal(
-          eval_suggestion[algorithm.OBS_NORM_BUFFER_STATE][normalizer.N],
-          evaluations[0].obs_norm_buffer_data[normalizer.N],
+          eval_suggestion[algorithm.OBS_NORM_BUFFER_STATE][buffer.N],
+          evaluations[0].obs_norm_buffer_data[buffer.N],
       )
       self.assertFalse(eval_suggestion[algorithm.UPDATE_OBS_NORM_BUFFER])
 
   def test_restore_state_from_checkpoint_without_archive(self):
-    checkpoint_buffer = normalizer.MeanStdBuffer(shape=(8,))
+    checkpoint_buffer = buffer.MeanStdBuffer(shape=(8,))
     checkpoint_buffer.push(np.ones(8,))
     checkpoint_state = {
         algorithm.PARAMS_TO_EVAL: np.zeros((13,)),
@@ -146,7 +145,7 @@ def test_restore_state_from_checkpoint_without_archive(self):
     )
 
   def test_restore_state_from_checkpoint_with_archive(self):
-    checkpoint_buffer = normalizer.MeanStdBuffer(shape=(8,))
+    checkpoint_buffer = buffer.MeanStdBuffer(shape=(8,))
     checkpoint_buffer.push(
         np.ones(
             8,
@@ -160,11 +159,11 @@ def test_restore_state_from_checkpoint_with_archive(self):
         qd_score_offset=0,
         extra_fields={
             pyribs_algorithm._OBS_NORM_MEAN: (
-                buffer_state[normalizer.MEAN].size,
+                buffer_state[buffer.MEAN].size,
                 np.float32,
             ),
             pyribs_algorithm._OBS_NORM_STD: (
-                buffer_state[normalizer.STD].size,
+                buffer_state[buffer.STD].size,
                 np.float32,
             ),
             pyribs_algorithm._OBS_NORM_N: ((), np.int32),
@@ -210,21 +209,21 @@ def test_process_evaluations(self):
             params_evaluated=np.ones((13,)),
             value=1,
             obs_norm_buffer_data={
-                normalizer.N: 1,
-                normalizer.STD: np.ones((8,)),
-                normalizer.MEAN: np.ones((8,)),
-                normalizer.UNNORM_VAR: np.ones((8,)),
+                buffer.N: 1,
+                buffer.STD: np.ones((8,)),
+                buffer.MEAN: np.ones((8,)),
+                buffer.UNNORM_VAR: np.ones((8,)),
             },
             metrics={'x': 1, 'y': 10},
         ),
         worker_util.EvaluationResult(
             params_evaluated=np.ones((13,) * 2),
             value=2,
             obs_norm_buffer_data={
-                normalizer.N: 2,
-                normalizer.STD: np.ones((8,)) * 2,
-                normalizer.MEAN: np.ones((8,)) * 2,
-                normalizer.UNNORM_VAR: np.ones((8,)) * 2,
+                buffer.N: 2,
+                buffer.STD: np.ones((8,)) * 2,
+                buffer.MEAN: np.ones((8,)) * 2,
+                buffer.UNNORM_VAR: np.ones((8,)) * 2,
             },
             metrics={'x': 2, 'y': 20},
         ),