robopal.envs.manipulation_tasks.robot_manipulate 源代码

import mujoco
import numpy as np
import logging
from typing import Dict, Union, Tuple, Any
from robopal.envs import RobotEnv
import robopal.commons.transform as T

logging.basicConfig(level=logging.INFO)



[文档]
class ManipulateEnv(RobotEnv):
    """
    The control frequency of the robot is of f = 20 Hz. This is achieved by applying the same action
    in 50 subsequent simulator step (with a time step of dt = 0.0005 s) before returning the control to the robot.
    """

    def __init__(self,
                 robot=None,
                 render_mode='human',
                 control_freq=20,
                 controller='CARTIK',
                 is_interpolate=False,
                 action_type="velocity",
                 is_randomize_end=True,
                 is_randomize_object=True,
                 is_show_camera_in_cv=False,
                 is_render_camera_offscreen = False,
                 camera_in_render="frontview",
                 camera_in_window="free",
                 ):
        super().__init__(
            robot=robot,
            render_mode=render_mode,
            control_freq=control_freq,
            controller=controller,
            is_interpolate=is_interpolate,
            is_show_camera_in_cv=is_show_camera_in_cv,
            is_render_camera_offscreen=is_render_camera_offscreen,
            camera_in_render=camera_in_render,
            camera_in_window=camera_in_window,
        )

        self.action_type = action_type
        self.is_randomize_end = is_randomize_end
        self.is_randomize_object = is_randomize_object

        self.max_episode_steps = 50

        self.obs_dim: np.ndarray = None
        self.action_dim: np.ndarray = None

        self._timestep = 0
        self.goal_pos = None
        self.desired_position = self.init_pos[self.agents[0]]
        self.action_scale = 0.1

        self.grip_max_bound = self.robot.end[self.agents[0]]._ctrl_range[1]
        self.grip_min_bound = self.robot.end[self.agents[0]]._ctrl_range[0]


[文档]
    def compute_end_position(self, input) -> Tuple[np.ndarray, Any]:
        """ Map to target action space bounds
        """
        if self.action_type == "velocity":
            actual_pos = self.desired_position + self.action_scale * input
        elif self.action_type == "position":
            actual_pos = input
        else:
            raise ValueError(f"Invalid action type: {self.action_type}")
        
        actual_pos = actual_pos.clip(self.robot.pos_min_bound, self.robot.pos_max_bound)
        return actual_pos

    

[文档]
    def step(self, action) -> Tuple:
        """ Take one step in the environment.

        :param action:  The action space is 4-dimensional, with the first 3 dimensions corresponding to the desired
        linear velocities of the end effector in Cartesian coordinates, and the last dimension corresponding to the
        desired gripper state (0 denotes closed, 1 denotes open).
        :return: obs, reward, terminated, truncated, info
        """
        self._timestep += 1

        # normalized actions should be un-normalized before applying to the environment
        end_pos = self.compute_end_position(action[:3])
        
        # take one step
        normalized_gripper_ctrl = action[3]
        unnormalized_gripper_ctrl = (normalized_gripper_ctrl + 1) * (self.grip_max_bound - self.grip_min_bound) / 2 + self.grip_min_bound
        self.robot.end[self.agents[0]].apply_action(unnormalized_gripper_ctrl)
        super().step(end_pos)

        self.desired_position = end_pos

        obs = self._get_obs()
        reward = self.compute_rewards()
        terminated = False
        truncated = True if self._timestep >= self.max_episode_steps else False
        info = self._get_info()

        return obs, reward, terminated, truncated, info



[文档]
    @staticmethod
    def goal_distance(goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        return np.linalg.norm(goal_a - goal_b, axis=-1)



[文档]
    def compute_rewards(self, achieved_goal: np.ndarray = np.zeros(3), desired_goal: np.ndarray = np.zeros(3), info: dict = None, **kwargs):
        """ Sparse Reward: the returned reward can have two values: -1 if the block hasn’t reached its final
        target position, and 0 if the block is in the final target position (the block is considered to have
        reached the goal if the Euclidean distance between both is lower than 0.05 m).
        """
        d = self.goal_distance(achieved_goal, desired_goal)
        if kwargs:
            return -(d >= kwargs['th']).astype(np.float64)
        return -(d >= 0.02).astype(np.float64)


    def _is_success(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, th=0.02) -> np.ndarray:
        """ Compute whether the achieved goal successfully achieved the desired goal.
        """
        d = self.goal_distance(achieved_goal, desired_goal)
        return (d < th).astype(np.float32)

    def _get_obs(self, agent: str = None) -> Union[Dict, np.ndarray]:
        """ The observation space is 16-dimensional, with the first 3 dimensions corresponding to the position
        of the block, the next 3 dimensions corresponding to the position of the goal, the next 3 dimensions
        corresponding to the position of the gripper, the next 3 dimensions corresponding to the vector
        between the block and the gripper, and the last dimension corresponding to the current gripper opening.
        """
        raise NotImplementedError
    
    def _get_achieved_goal(self) -> np.ndarray:
        """ get achieved goal, required for goal-based env.
        """
        pass

    def _get_desired_goal(self) -> np.ndarray:
        """ get desired goal, required for goal-based env.
        """
        pass

    def _get_info(self, agent: str = None) -> dict:
        return {}


[文档]
    def reset(self, seed=None, options=None):
        options = options or {}
        options['disable_reset_render'] = True
        
        super().reset(seed, options)

        self._timestep = 0

        if self.is_randomize_end:
            self.set_random_init_position()
        self.update_init_pose_to_current()

        obs = self._get_obs()
        info = self._get_info()
        
        return obs, info

    

[文档]
    def update_init_pose_to_current(self):
        super().update_init_pose_to_current()

        # reset the desired position to the initial position
        self.desired_position = self.init_pos[self.agents[0]]



[文档]
    def reset_object(self):
        """ Reset the object to a random pose within the workspace.
        """
        if self.is_randomize_object:
            pass
        return super().reset_object()



[文档]
    def set_random_init_position(self):
        """ Set the initial position of the end effector to a random position within the workspace.
        """
        for agent in self.agents:
            random_pos = np.random.uniform(self.robot.pos_min_bound, self.robot.pos_max_bound)
            qpos = self.controller.ik(random_pos, np.array([1, 0, 0, 0]), q_init=self.robot.get_arm_qpos(agent))
            self.set_joint_qpos(qpos, agent)
            self.forward()