robopal.envs.bimanual_tasks.bimanual_manipulate 源代码

import logging
import numpy as np
from typing import Dict, Union, Tuple, Any
from robopal.envs import RobotEnv
import robopal.commons.transform as T

logging.basicConfig(level=logging.INFO)


[文档] class BimanualManipulate(RobotEnv): """ The control frequency of the robot is of f=20 Hz. This is achieved by applying the same action in 50 subsequent simulator step (with a time step of dt = 0.001 s) before returning the control to the robot. :parameter robot: The robot model to be used in the environment. :parameter render_mode: The mode in which the environment is rendered. Possible values are 'human' and 'rgb_array'. :parameter control_freq: The control frequency of the robot. :parameter is_show_camera_in_cv: Whether to show the camera feed in a window. :parameter controller: The controller to be used in the environment. Possible values are 'CARTIK' and 'JNTIMP'. :parameter is_interpolate: Whether to interpolate the actions. :parameter is_shared_obs: Whether to share the observations between the agents. :parameter gripper_ctrl_mode: The mode in which the gripper is controlled. Possible values are 'abs' and 'rel'. """ def __init__(self, robot=None, render_mode='human', control_freq=20, is_show_camera_in_cv=False, controller='CARTIK', is_interpolate=False, is_shared_obs=False, gripper_ctrl_mode='abs', ): super().__init__( robot=robot, render_mode=render_mode, control_freq=control_freq, controller=controller, is_show_camera_in_cv=is_show_camera_in_cv, is_interpolate=is_interpolate, ) self.is_shared_obs = is_shared_obs # TODO: check if this is necessary self.max_episode_steps = 50 self._timestep = 0 self.goal_pos = None self.desired_positions = self.init_pos self.desired_gripper_actions = {agent: 0 for agent in self.agents} self.action_scale = 0.1 self.gripper_action_scale = 0.1 self.pos_max_bound = {self.agents[0]: np.array([0.65, 0.2, 0.4]), self.agents[1]: np.array([0.65, 0.2, 0.4])} self.pos_min_bound = {self.agents[0]: np.array([0.3, -0.2, 0.14]), self.agents[1]: np.array([0.3, -0.2, 0.14])} self.gripper_ctrl_mode = gripper_ctrl_mode
[文档] def compute_manipulator_action(self, action, agent) -> np.ndarray: """ Map to target action space bounds """ self.desired_positions[agent] = self.desired_positions[agent] + self.action_scale * action[:3] self.desired_positions[agent] = self.desired_positions[agent].clip(self.pos_min_bound[agent], self.pos_max_bound[agent]) return self.desired_positions[agent]
[文档] def compute_gripper_action(self, action, agent) -> np.ndarray: """ Map to target action space bounds """ self.desired_gripper_actions[agent] = self.desired_gripper_actions[agent] + self.gripper_action_scale * action[3] self.desired_gripper_actions[agent] = self.desired_gripper_actions[agent].clip(-1, 1) ret = self.normalize_gripper_ctrl(self.desired_gripper_actions[agent], agent) return ret
[文档] def normalize_gripper_ctrl(self, action, agent): gripper_ctrl = ( (action + 1) * (self.robot.end[agent]._ctrl_range[1] - self.robot.end[agent]._ctrl_range[0]) / 2 + self.robot.end[agent]._ctrl_range[0] ) return gripper_ctrl
[文档] def step( self, actions: Dict[str, np.ndarray] ) -> tuple[ Dict[str, np.ndarray], Dict[str, float], Dict[str, bool], Dict[str, bool], Dict[str, dict], ]: """ Take one step in the environment. :param action: The action space is 4-dimensional, with the first 3 dimensions corresponding to the desired position of the block in Cartesian coordinates, and the last dimension corresponding to the desired gripper opening (0 for closed, 1 for open). :return: obs, reward, terminated, truncated, info """ self._timestep += 1 manipulator_actions = {agent: None for agent in self.agents} gripper_actions = {agent: None for agent in self.agents} for agent in self.agents: manipulator_actions[agent] = self.compute_manipulator_action(actions[agent], agent) gripper_actions[agent] = self.compute_gripper_action(actions[agent], agent) # take one step for agent in self.agents: self.robot.end[agent].apply_action(gripper_actions[agent]) super().step(manipulator_actions) observations = {agent: self._get_obs(agent) for agent in self.agents} rewards = {agent: self._get_rewards(agent) for agent in self.agents} # Check termination conditions terminations = {agent: False for agent in self.agents} # Check truncation conditions (overwrites termination conditions) truncations = {agent: False for agent in self.agents} if self._timestep > self.max_episode_steps: truncations = {agent: True for agent in self.agents} infos = {agent: self._get_info(agent) for agent in self.agents} # if any(terminations.values()) or all(truncations.values()): # self.agents = [] return observations, rewards, terminations, truncations, infos
def _get_rewards(self, agent: str): """ Sparse Reward: the returned reward can have two values: -1 if the block hasn’t reached its final target position, and 0 if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m). """ return 0 def _is_success(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, th=0.02) -> np.ndarray: """ Compute whether the achieved goal successfully achieved the desired goal. """ d = self.goal_distance(achieved_goal, desired_goal) return (d < th).astype(np.float32)
[文档] @staticmethod def goal_distance(goal_a, goal_b): assert goal_a.shape == goal_b.shape return np.linalg.norm(goal_a - goal_b, axis=-1)
def _get_obs(self, agent: str = None) -> Union[Dict, np.ndarray]: """ The observation space is 16-dimensional, with the first 3 dimensions corresponding to the position of the block, the next 3 dimensions corresponding to the position of the goal, the next 3 dimensions corresponding to the position of the gripper, the next 3 dimensions corresponding to the vector between the block and the gripper, and the last dimension corresponding to the current gripper opening. """ raise NotImplementedError def _get_info(self, agent: str = None) -> dict: return {}
[文档] def reset(self, seed=None, options=None): options = options or {} options['disable_reset_render'] = True super().reset(seed, options) self._timestep = 0 # self.set_random_init_position() self.update_init_pose_to_current() observations = { agent: self._get_obs(agent) for agent in self.agents } # Get dummy infos. Necessary for proper parallel_to_aec conversion infos = {agent: self._get_info(agent) for agent in self.agents} return observations, infos
[文档] def update_init_pose_to_current(self): super().update_init_pose_to_current() # reset the desired position to the initial position self.desired_position = self.init_pos
[文档] def reset_object(self): pass
[文档] def set_random_init_position(self): """ Set the initial position of the end effector to a random position within the workspace. """ for agent in self.agents: random_pos = np.random.uniform(self.pos_min_bound[agent], self.pos_max_bound[agent]) qpos = self.controller.ik(random_pos, self.init_quat[agent], q_init=self.robot.get_arm_qpos(agent)) self.set_joint_qpos(qpos, agent) self.forward()