DeepMind Control Examples

  1import gymnasium as gym
  2import fancy_gym
  3
  4
  5def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=True):
  6    """
  7    Example for running a DMC based env in the step based setting.
  8    The env_id has to be specified as `domain_name:task_name` or
  9    for manipulation tasks as `domain_name:manipulation-environment_name`
 10
 11    Args:
 12        env_id: Either `domain_name-task_name` or `manipulation-environment_name`
 13        seed: seed for deterministic behaviour
 14        iterations: Number of rollout steps to run
 15        render: Render the episode
 16
 17    Returns:
 18
 19    """
 20    env = gym.make(env_id, render_mode='human' if render else None)
 21    rewards = 0
 22    obs = env.reset(seed=seed)
 23    print("observation shape:", env.observation_space.shape)
 24    print("action shape:", env.action_space.shape)
 25
 26    for i in range(iterations):
 27        ac = env.action_space.sample()
 28        if render:
 29            env.render()
 30        obs, reward, terminated, truncated, info = env.step(ac)
 31        rewards += reward
 32
 33        if terminated or truncated:
 34            print(env_id, rewards)
 35            rewards = 0
 36            obs = env.reset()
 37
 38    env.close()
 39    del env
 40
 41
 42def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
 43    """
 44    Example for running a custom movement primitive based environments.
 45    Our already registered environments follow the same structure.
 46    Hence, this also allows to adjust hyperparameters of the movement primitives.
 47    Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
 48    We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
 49    for our repo: https://github.com/ALRhub/fancy_gym/
 50    Args:
 51        seed: seed for deterministic behaviour
 52        iterations: Number of rollout steps to run
 53        render: Render the episode
 54
 55    Returns:
 56
 57    """
 58
 59    # Base DMC name, according to structure of above example
 60    base_env_id = "dm_control/ball_in_cup-catch"
 61
 62    # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
 63    # You can also add other gym.Wrappers in case they are needed.
 64    wrappers = [fancy_gym.dmc.suite.ball_in_cup.MPWrapper]
 65    # # For a ProMP
 66    trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
 67    phase_generator_kwargs = {'phase_generator_type': 'linear'}
 68    controller_kwargs = {'controller_type': 'motor',
 69                         "p_gains": 1.0,
 70                         "d_gains": 0.1, }
 71    basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
 72                              'num_basis': 5,
 73                              'num_basis_zero_start': 1
 74                              }
 75
 76    # For a DMP
 77    # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
 78    # phase_generator_kwargs = {'phase_generator_type': 'exp',
 79    #                           'alpha_phase': 2}
 80    # controller_kwargs = {'controller_type': 'motor',
 81    #                      "p_gains": 1.0,
 82    #                      "d_gains": 0.1,
 83    #                      }
 84    # basis_generator_kwargs = {'basis_generator_type': 'rbf',
 85    #                           'num_basis': 5
 86    #                           }
 87    base_env = gym.make(base_env_id, render_mode='human' if render else None)
 88    env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
 89                            traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
 90                            phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
 91                            seed=seed)
 92
 93    # This renders the full MP trajectory
 94    # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
 95    # Resetting to no rendering, can be achieved by render(mode=None).
 96    # It is also possible to change them mode multiple times when
 97    # e.g. only every nth trajectory should be displayed.
 98    if render:
 99        env.render()
100
101    rewards = 0
102    obs = env.reset()
103
104    # number of samples/full trajectories (multiple environment steps)
105    for i in range(iterations):
106        ac = env.action_space.sample()
107        obs, reward, terminated, truncated, info = env.step(ac)
108        rewards += reward
109
110        if terminated or truncated:
111            print(base_env_id, rewards)
112            rewards = 0
113            obs = env.reset()
114
115    env.close()
116    del env
117
118def main(render = False):
119    # # Standard DMC Suite tasks
120    example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
121    #
122    # # Manipulation tasks
123    # # Disclaimer: The vision versions are currently not integrated and yield an error
124    example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render)
125    #
126    # # Gym + DMC hybrid task provided in the MP framework
127    example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
128
129    # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
130    # already registered above
131    example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
132
133    # # Standard DMC Suite tasks
134    example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
135    #
136    # # Manipulation tasks
137    # # Disclaimer: The vision versions are currently not integrated and yield an error
138    example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render)
139    #
140    # # Gym + DMC hybrid task provided in the MP framework
141    example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render)
142
143    # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
144    # already registered above
145    example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
146
147if __name__ == '__main__':
148    main()