robopal.envs.manipulation_tasks.demo_pick_place 源代码

import numpy as np

from robopal.envs.manipulation_tasks.robot_manipulate import ManipulateEnv
import robopal.commons.transform as trans
from robopal.robots.diana_med import DianaPickAndPlace
from robopal.wrappers import GoalEnvWrapper

[文档] class PickAndPlaceEnv(ManipulateEnv): name = 'PickAndPlace-v1' def __init__(self, robot="PandaPickAndPlace", render_mode='human', control_freq=20, is_show_camera_in_cv=False, controller='CARTIK', action_type="velocity", is_render_camera_offscreen = False, camera_in_render="frontview", camera_in_window="free", is_randomize_end=False, is_randomize_object=True, is_randomize_goal=False, ): super().__init__( robot=robot, render_mode=render_mode, control_freq=control_freq, is_show_camera_in_cv=is_show_camera_in_cv, controller=controller, action_type=action_type, is_randomize_end=is_randomize_end, is_randomize_object=is_randomize_object, is_render_camera_offscreen=is_render_camera_offscreen, camera_in_render=camera_in_render, camera_in_window=camera_in_window, ) self.obs_dim = (22,) self.goal_dim = (3,) self.action_dim = (4,) self.max_action = 1.0 self.min_action = -1.0 self.max_episode_steps = 50 self.is_randomize_goal = is_randomize_goal def _get_obs(self) -> dict: """ The observation space is 16-dimensional, with the first 3 dimensions corresponding to the position of the block, the next 3 dimensions corresponding to the position of the goal, the next 3 dimensions corresponding to the position of the gripper, the next 3 dimensions corresponding to the vector between the block and the gripper, and the last dimension corresponding to the current gripper opening. """ obs = np.zeros(self.obs_dim) obs[0:15] = np.concatenate([ self.robot.get_arm_qpos(), # gripper position in global coordinates end_pos := self.get_site_pos('0_grip_site'), # gripper linear velocity self.get_site_xvelp('0_grip_site') * self.dt, self.robot.end['agent0'].get_finger_observations() ]) obs[15:18] = ( # block position in global coordinates object_pos := self.get_body_pos('green_block') ) obs[18:22] = ( # block rotation self.get_body_quat('green_block') ) # obs[11:14] = ( # Relative block position with respect to gripper position in globla coordinates. # end_pos - object_pos # ) return obs.copy() def _get_achieved_goal(self) -> np.ndarray: return self.get_body_pos('green_block') def _get_desired_goal(self) -> np.ndarray: return self.get_site_pos('goal_site')
[文档] def compute_rewards(self, achieved_goal: np.ndarray = np.zeros(3), desired_goal: np.ndarray = np.zeros(3), info: dict = None, **kwargs): """ Sparse Reward: the returned reward can have two values: -1 if the block hasn’t reached its final target position, and 0 if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m). """ d = self.goal_distance(self._get_achieved_goal(), self._get_desired_goal()) if kwargs: return -(d >= kwargs['th']).astype(np.float64) return -(d >= 0.02).astype(np.float64)
def _get_info(self) -> dict: return {'is_success': self._is_success(self.get_body_pos('green_block'), self.get_site_pos('goal_site'), th=0.02)}
[文档] def reset_object(self): if self.is_randomize_object: random_x_pos, random_y_pos = np.random.uniform([0.35, -0.15], [0.55, 0.15]) block_pose = np.array([random_x_pos, random_y_pos, 0.46, 1.0, 0.0, 0.0, 0.0]) self.set_object_pose('green_block:joint', block_pose) else: self.set_object_pose('green_block:joint', np.array([0.5, 0.1, 0.46, 1.0, 0.0, 0.0, 0.0])) if self.is_randomize_goal: goal_pos = np.random.uniform([0.35, -0.15, 0.46], [0.55, 0.15, 0.65]) while np.linalg.norm(block_pose[:3] - goal_pos) <= 0.05: goal_pos = np.random.uniform([0.35, -0.15, 0.46], [0.55, 0.15, 0.65]) self.set_site_pos('goal_site', goal_pos) return super().reset_object()
if __name__ == "__main__": env = PickAndPlaceEnv( is_render_camera_offscreen=True, is_randomize_end=False, is_randomize_object=True, ) env = GoalEnvWrapper(env) env.reset() for t in range(int(1e5)): action = np.random.uniform(env.min_action, env.max_action, env.action_dim) s_, r, terminated, truncated, info = env.step(action) if truncated: env.reset() env.close()