robopal.envs.manipulation_tasks.demo_cabinet 源代码

import numpy as np

from robopal.envs.manipulation_tasks.robot_manipulate import ManipulateEnv
import robopal.commons.transform as trans
from robopal.robots.diana_med import DianaCabinet
from robopal.wrappers import GoalEnvWrapper



[文档]
class LockedCabinetEnv(ManipulateEnv):

    name = 'LockedCabinet-v1'
    
    def __init__(self,
                 robot=DianaCabinet,
                 render_mode='human',
                 control_freq=20,
                 is_show_camera_in_cv=False,
                 controller='CARTIK',
                 ):
        super().__init__(
            robot=robot,
            render_mode=render_mode,
            control_freq=control_freq,
            is_show_camera_in_cv=is_show_camera_in_cv,
            controller=controller,
        )

        self.obs_dim = (38,)
        self.goal_dim = (9,)
        self.action_dim = (4,)

        self.max_action = 1.0
        self.min_action = -1.0

        self.max_episode_steps = 50

        self.TASK_FLAG = 0

        self.pos_max_bound = np.array([0.6, 0.25, 0.4])
        self.pos_min_bound = np.array([0.4, -0.25, 0.2])

    def _get_obs(self) -> dict:
        """ The observation space is 16-dimensional, with the first 3 dimensions corresponding to the position
        of the block, the next 3 dimensions corresponding to the position of the goal, the next 3 dimensions
        corresponding to the position of the gripper, the next 3 dimensions corresponding to the vector
        between the block and the gripper, and the last dimension corresponding to the current gripper opening.
        """
        obs = np.zeros(self.obs_dim)

        obs[0:3] = (  # gripper position in global coordinates
            end_pos := self.get_site_pos('0_grip_site')
        )
        obs[3:6] = (  # handle position in global coordinates
            handle_pos := self.get_site_pos('left_handle')
        )
        obs[6:9] = (  # beam position in global coordinates
            beam_pos := self.get_site_pos('beam_left')
        )
        obs[9:12] = end_pos - handle_pos  # distance between the handle and the end
        obs[12:15] = end_pos - beam_pos  # distance between the beam and the end
        obs[15:18] = trans.mat_2_euler(self.get_site_rotm('left_handle'))
        obs[18:21] = trans.mat_2_euler(self.get_site_rotm('beam_left'))
        obs[21:24] = (  # gripper linear velocity
            end_vel := self.get_site_xvelp('0_grip_site') * self.dt
        )
        # velocity with respect to the gripper
        handle_velp = self.get_site_xvelp('left_handle') * self.dt
        obs[24:27] = (  # velocity with respect to the gripper
            handle_velp - end_vel
        )
        beam_velp = self.get_site_xvelp('beam_left') * self.dt
        obs[27:30] = (  # velocity with respect to the gripper
            beam_velp - end_vel
        )
        obs[30:33] = self.get_site_xvelr('left_handle') * self.dt
        obs[33:36] = self.get_site_xvelr('beam_left') * self.dt
        obs[36] = self.mj_data.joint('0_r_finger_joint').qpos[0]
        obs[37] = self.mj_data.joint('0_r_finger_joint').qvel[0] * self.dt

        return obs.copy()

    def _get_achieved_goal(self):
        achieved_goal = np.concatenate([
            self.get_site_pos('0_grip_site'),
            self.get_site_pos('beam_left'),
            self.get_site_pos('left_handle')
        ], axis=0)
        return achieved_goal.copy()

    def _get_desired_goal(self):
        desired_goal = np.concatenate([
            self.get_site_pos('beam_right') if self._is_success(
                self.get_site_pos('beam_left'), self.get_site_pos('cabinet_mid'), th=0.03
            ) == 0 else self.get_site_pos('left_handle'),
            self.get_site_pos('cabinet_mid'),
            self.get_site_pos('cabinet_left_opened'),
        ], axis=0)
        return desired_goal.copy()

    def _get_info(self) -> dict:
        return {
            'is_unlock_success': self._is_success(self.get_site_pos('beam_left'), self.get_site_pos('cabinet_mid'), th=0.03),
            'is_door_success': self._is_success(self.get_site_pos('left_handle'), self.get_site_pos('cabinet_left_opened'), th=0.03)
        }


[文档]
    def reset_object(self):
        if self.TASK_FLAG == 0:
            pass
        elif self.TASK_FLAG == 1:
            self.mj_data.joint('OBJTy').qpos[0] = -0.12

        return super().reset_object()




if __name__ == "__main__":
    env = LockedCabinetEnv()
    env = GoalEnvWrapper(env)
    env.reset()
    for t in range(int(1e5)):
        action = np.random.uniform(env.min_action, env.max_action, env.action_dim)
        s_, r, terminated, truncated, info = env.step(action)
        if truncated:
            env.reset()
    env.close()