Metaworld Examples

  1import gymnasium as gym
  2import fancy_gym
  3
  4
  5def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, render=True):
  6    """
  7    Example for running a MetaWorld based env in the step based setting.
  8    The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
  9    return the observable goal version.
 10    All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/
 11
 12    Args:
 13        env_id: `task_name-v2`
 14        seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
 15        iterations: Number of rollout steps to run
 16        render: Render the episode
 17
 18    Returns:
 19
 20    """
 21    env = gym.make(env_id, render_mode='human' if render else None)
 22    rewards = 0
 23    obs = env.reset(seed=seed)
 24    print("observation shape:", env.observation_space.shape)
 25    print("action shape:", env.action_space.shape)
 26
 27    for i in range(iterations):
 28        ac = env.action_space.sample()
 29        if render:
 30            env.render()
 31        obs, reward, terminated, truncated, info = env.step(ac)
 32        rewards += reward
 33        if terminated or truncated:
 34            print(env_id, rewards)
 35            rewards = 0
 36            obs = env.reset(seed=seed+i+1)
 37
 38    env.close()
 39    del env
 40
 41
 42def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
 43    """
 44    Example for running a custom movement primitive based environments.
 45    Our already registered environments follow the same structure.
 46    Hence, this also allows to adjust hyperparameters of the movement primitives.
 47    Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
 48    We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
 49    for our repo: https://github.com/ALRhub/fancy_gym/
 50    Args:
 51        seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
 52        iterations: Number of rollout steps to run
 53        render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)
 54
 55    Returns:
 56
 57    """
 58
 59    # Base MetaWorld name, according to structure of above example
 60    base_env_id = "metaworld/button-press-v2"
 61
 62    # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
 63    # You can also add other gym.Wrappers in case they are needed.
 64    wrappers = [fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper]
 65    # # For a ProMP
 66    # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
 67    # phase_generator_kwargs = {'phase_generator_type': 'linear'}
 68    # controller_kwargs = {'controller_type': 'metaworld'}
 69    # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
 70    #                           'num_basis': 5,
 71    #                           'num_basis_zero_start': 1
 72    #                           }
 73
 74    # For a DMP
 75    trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
 76    phase_generator_kwargs = {'phase_generator_type': 'exp',
 77                              'alpha_phase': 2}
 78    controller_kwargs = {'controller_type': 'metaworld'}
 79    basis_generator_kwargs = {'basis_generator_type': 'rbf',
 80                              'num_basis': 5
 81                              }
 82    base_env = gym.make(base_env_id, render_mode='human' if render else None)
 83    env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
 84                            traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
 85                            phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
 86                            seed=seed)
 87
 88    # This renders the full MP trajectory
 89    # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
 90    # Resetting to no rendering, can be achieved by render(mode=None).
 91    # It is also possible to change them mode multiple times when
 92    # e.g. only every nth trajectory should be displayed.
 93    if render:
 94        env.render()
 95
 96    rewards = 0
 97    obs = env.reset(seed=seed)
 98
 99    # number of samples/full trajectories (multiple environment steps)
100    for i in range(iterations):
101        ac = env.action_space.sample()
102        obs, reward, terminated, truncated, info = env.step(ac)
103        rewards += reward
104
105        if terminated or truncated:
106            print(base_env_id, rewards)
107            rewards = 0
108            obs = env.reset(seed=seed+i+1)
109
110    env.close()
111    del env
112
113def main(render = False):
114    # For rendering it might be necessary to specify your OpenGL installation
115    # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
116
117    # # Standard Meta world tasks
118    example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render)
119
120    # # MP + MetaWorld hybrid task provided in the our framework
121    example_meta("metaworld_ProMP/button-press-v2", seed=10, iterations=1, render=render)
122    #
123    # # Custom MetaWorld task
124    example_custom_meta_and_mp(seed=10, iterations=1, render=render)
125
126if __name__ == '__main__':
127    main()