droneProject
Loading...
Searching...
No Matches
droneRobot.py
Go to the documentation of this file.
2import sys
3import os
4import threading
5
6sys.path.append(r"E:\Webots\lib\controller\python")
7if(os.environ.get('WEBOTS_HOME') is not None):
8 print("WEBOTS_HOME found")
9 sys.path.append(os.environ.get('WEBOTS_HOME') + '//lib//controller//python')
10else:
11 print("WEBOTS_HOME is not defined, please set your environment variable correctly.")
12
13
14import numpy as np
15from controller import GPS, Gyro, InertialUnit, Motor, Supervisor
16
17from deepbots.supervisor.controllers.robot_supervisor_env import RobotSupervisorEnv
18from gym.spaces import Box
19
20from deepbots.supervisor.controllers.robot_supervisor_env import RobotSupervisorEnv
21
22import math
23from math import sin, cos
24from warnings import warn
25import random
26
27
28class DroneRobot(RobotSupervisorEnv):
29
30 def __init__(self):
31 """@brief Initializes the DroneRobot environment and sets default parameters.
32
33 This constructor initializes the simulation environment, sets up the observation and action spaces,
34 and defines key parameters such as PID gains, motor power, and episode configurations.
35 Additionally, it configures the random seed for reproducibility and sets up the simulation timestep.
36
37 @details
38 - Initializes debug mode, random seed, and task status.
39 - Configures observation and action spaces using Gym's `Box`.
40 - Sets up PID gains with random initial values and motor power to zero.
41 - Defines simulation parameters such as timestep and maximum steps per episode.
42 - Maintains error tracking for the PID controller.
43
44 @note The method assumes that the superclass `RobotSupervisorEnv` is correctly implemented.
45 """
46 super().__init__()
47 self.debugMode = False # Set to True to enable debug mode
48 self.randomSeed = 42
49 random.seed(self.randomSeed) # Set the random seed for reproducibility
50 self.saveOK = False # Set to True when the task is solved
52 self.angle_bound = 90
53 self.max_velocity = 150 #Max velocity
54
55 self.avg_target_score = -20 #set this to define when the task is considered solved
57
58 self.target_location = [0,0,2]
59
60
61 self.past_errors = {"x": 0, "y": 0, "z": 0}
62 self.integral_errors = {"x": 0, "y": 0, "z": 0}
64
66
67 self.didOnce = False
68 self.actionCount = 0
69 # Get the Webots simulation timestep
70 self.timestep = self.getBasicTimeStep()
71
72 self.dt = 1/200 # Run at 200 Hz
74 self.steps_per_episode = 3000 # Max number of steps per episode
75 self.episode_score = 0 # Score accumulated during an episode
76 self.episode_score_list = [] # A list to save all the episode scores, used to check if task is solved
77
78 self.yaw_gain = 0 #Yaw gain, Yaw is set to move the drone in the general direction of the goal
79 #Init values to random numbers
80 self.pid_gains = {
81 'X_P': random.uniform(0,100),
82 'X_I': random.uniform(0,100),
83 'X_D': random.uniform(0,100),
84
85 'Y_P': random.uniform(0,100),
86 'Y_I': random.uniform(0,100),
87 'Y_D': random.uniform(0,100),
88
89 'Z_P': random.uniform(0,100),
90 'Z_I': random.uniform(0,100),
91 'Z_D': random.uniform(0,100),
92 }
93 self.motor_power = {
94 'm1': 0,
95 'm2': 0,
96 'm3': 0,
97 'm4': 0
98 }
99
100
101 """
102 Drone Bot Spaces
103 Num Observation
104 0 Drone X
105 1 Drone Y
106 2 Drone Z
107 3 Rot X
108 4 Rot Y
109 5 Rot Z
110 6 Rot W
111 7 Linear X
112 8 Linear Y
113 9 Linear Z
114 10 Angular X
115 11 Angular Y
116 12 Angular Z
117
118 """
119 #All bounds set to infinite
120 self.observation_space = Box(low=np.array([-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf]),
121 high=np.array([np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,np.inf]),
122 dtype=np.float64)
123 # Define agent's action space using Gym's Discrete
124
125 self.action_space = Box(low=np.array([0,0,0,0,0,0,0,0,0]),
126 high=np.array([100,100,100,100,100,100,100,100,100]),
127 dtype=np.float16)
128
129
130 self.robot = self.getSelf() # Grab the robot reference from the supervisor to access various robot methods
131
132
133 def update_motor_power(self, current_location):
134 """
135
136 @brief Updates the motor power for the drone based on the PID controller and the current location.
137
138 This method calculates the error between the drone's current location and the target location for
139 each axis (x, y, z) and adjusts the motor power accordingly using PID control. It also applies yaw
140 control to align the drone towards the target direction. For testing purposes, yaw gain can be set to
141 zero.
142
143 @param current_location A list containing the drone's current position (x, y, z) and yaw angle.
144
145 @details
146 - Computes proportional (P), integral (I), and derivative (D) terms for PID control on each axis.
147 - Normalizes yaw angle errors to the range [-π, π].
148 - Updates motor power values for maintaining stability and reaching the target location.
149 - Clips motor power values to ensure they remain within the allowed range.
150
151 @note Assumes the `current_location` input includes yaw as the third element.
152 @see self.pid_gains, self.motor_power
153
154
155 """
156 # Compute error between target and current position
157 error = {
158 "x": self.target_location[0] - current_location[0],
159 "y": self.target_location[1] - current_location[1],
160 "z": self.target_location[2] - current_location[2],
161 }
162
163 # Compute the angle (yaw) to target location in the xy-plane (in radians)
164 delta_x = self.target_location[0] - current_location[0]
165 delta_y = self.target_location[1] - current_location[1]
166 desired_yaw = math.atan2(delta_y, delta_x)
167
168 # Calculate the drone's current yaw angle (orientation in the xy-plane)
169 current_yaw = math.atan2(current_location[1], current_location[0])
170
171 # Calculate the error in yaw (angle difference), normalized to [-pi, pi]
172 error_yaw = desired_yaw - current_yaw
173 error_yaw = (error_yaw + math.pi) % (2 * math.pi) - math.pi
174
175 # Initialize motor power dictionary
176 motor_power = {
177 'm1': 0,
178 'm2': 0,
179 'm3': 0,
180 'm4': 0
181 }
182
183 # PID calculations for each axis (x, y, z)
184 for axis in ['x', 'y', 'z']:
185 # Proportional term (P)
186 P = self.pid_gains[f'{axis.upper()}_P'] * error[axis]
187
188 # Integral term (I)
189 self.integral_errors[axis] += error[axis] * self.dt
190 I = self.pid_gains[f'{axis.upper()}_I'] * self.integral_errors[axis]
191
192 # Derivative term (D)
193 D = self.pid_gains[f'{axis.upper()}_D'] * (error[axis] - self.past_errors[axis]) / self.dt
194
195 # Total PID output for this axis
196 PID_output = P + I + D
197
198 # Update motor power based on the axis
199 if axis == "x":
200 motor_power['m1'] -= PID_output # Adjust for roll (x-axis)
201 motor_power['m2'] += PID_output
202 motor_power['m3'] += PID_output
203 motor_power['m4'] -= PID_output
204 elif axis == "y":
205 motor_power['m1'] -= PID_output # Adjust for pitch (y-axis)
206 motor_power['m2'] -= PID_output
207 motor_power['m3'] += PID_output
208 motor_power['m4'] += PID_output
209 elif axis == "z":
210 motor_power['m1'] += PID_output # Adjust for vertical control
211 motor_power['m2'] += PID_output
212 motor_power['m3'] += PID_output
213 motor_power['m4'] += PID_output
214
215 # Yaw control using Proportional control (P only)
216 yaw_P = self.yaw_gain * error_yaw
217 motor_power['m1'] += yaw_P # Adjust for yaw
218 motor_power['m2'] -= yaw_P
219 motor_power['m3'] += yaw_P
220 motor_power['m4'] -= yaw_P
221 #print("Pre-clipped motor power:", motor_power)
222 # Clip the values to ensure motors' power stays within reasonable limits
223 motor_power = {key: np.clip(value, 0, self.max_velocity) for key, value in motor_power.items()}
224
225 # Set motor power directly
226 self.motor_power = motor_power
227
228 # Update past errors for the next iteration
229 for axis in ['x', 'y', 'z']:
230 self.past_errors[axis] = error[axis]
231 self.past_errors['yaw'] = error_yaw
232 #print(self.motor_power)
233
234 def axis_angle_to_euler(self,axis_angle):
235 """
236
237 @brief Converts an axis-angle representation to Euler angles in degrees.
238
239 This method takes a 4D vector representing an axis-angle (x, y, z, angle)
240 and converts it to the corresponding Euler angles (roll, pitch, yaw) in degrees.
241
242 @param axis_angle A list or tuple containing the axis components (x, y, z) and the rotation angle.
243 @return A tuple (roll, pitch, yaw) representing the Euler angles in degrees.
244
245 @details
246 - Normalizes the axis vector if its magnitude is non-zero.
247 - Converts the axis-angle to a quaternion and then to Euler angles.
248 - Handles edge cases to ensure angles are properly constrained.
249
250 @note The input angle is expected in radians.
251
252
253 """
254 x, y, z, angle = axis_angle
255
256 # Normalize the axis vector
257 axis_length = math.sqrt(x*x + y*y + z*z)
258 if axis_length != 0:
259 x /= axis_length
260 y /= axis_length
261 z /= axis_length
262
263 # Convert angle to radians
264 angle_rad = angle
265
266 # Convert axis-angle to quaternion
267 w = math.cos(angle_rad / 2)
268 x = x * math.sin(angle_rad / 2)
269 y = y * math.sin(angle_rad / 2)
270 z = z * math.sin(angle_rad / 2)
271
272 # Convert quaternion to Euler angles
273 ysqr = y * y
274
275 t0 = +2.0 * (w * x + y * z)
276 t1 = +1.0 - 2.0 * (x * x + ysqr)
277 roll = math.degrees(math.atan2(t0, t1))
278
279 t2 = +2.0 * (w * y - z * x)
280 t2 = +1.0 if t2 > +1.0 else t2
281 t2 = -1.0 if t2 < -1.0 else t2
282 pitch = math.degrees(math.asin(t2))
283
284 t3 = +2.0 * (w * z + x * y)
285 t4 = +1.0 - 2.0 * (ysqr + z * z)
286 yaw = math.degrees(math.atan2(t3, t4))
287
288 return roll, pitch, yaw
289
291 """
292
293 @brief Retrieves the current state of the drone as observations.
294
295 This method gathers the drone's current position, orientation, and velocity, and combines them
296 into a single observation vector.
297
298 @return A numpy array containing the concatenated observations:
299 [position (x, y, z), rotation (roll, pitch, yaw), velocity (linear and angular)].
300
301 @details
302 - The position is obtained from the `translation` field of the robot.
303 - The rotation is converted from axis-angle representation to Euler angles using `axis_angle_to_euler`.
304 - The velocity includes both linear and angular components.
305
306 @note Assumes that the Webots simulation provides the required fields and velocity information.
307 @see axis_angle_to_euler
308
309
310 """
311
312 posField = self.robot.getField("translation")
313 rotField = self.robot.getField("rotation")
314 drone_position = posField.getSFVec3f()
315
316 drone_rotation = rotField.getSFRotation()
317 drone_rotation = self.axis_angle_to_euler(drone_rotation)
318 speed = self.robot.getVelocity()
319 observation = np.concatenate([drone_position, drone_rotation, speed])
320
321 return observation
322
324 """
325
326 @brief Provides a default observation vector.
327
328 This method returns a default observation consisting of zero values, which matches the shape
329 of the observation space.
330
331 @return A list of zeros with a length equal to the number of dimensions in the observation space.
332
333 @details
334 - The default observation is used as a placeholder or initialization value when no meaningful
335 observation data is available.
336
337
338 """
339 # This method just returns a zero vector as a default observation
340 return [0.0 for _ in range(self.observation_space.shape[0])]
341
342 def get_reward(self, action=None):
343 """
344
345 @brief Computes the reward based on the drone's current state and actions.
346
347 This method evaluates the drone's performance in the environment by calculating a composite reward
348 that combines various factors such as distance to the target, stability, efficiency, and task completion.
349
350 @param action (Optional) The action taken by the drone, though not directly used in the current implementation.
351 @return A float value representing the computed reward for the current timestep.
352
353 @details
354 - **Distance to Target:** A quadratic penalty based on the squared distance to the target location.
355 - **Stability Reward:** Penalizes abrupt changes in velocity.
356 - **Efficiency Reward:** Adds a small time penalty to encourage faster task completion.
357 - **Stay Penalty:** Penalizes the drone for staying near the origin after a certain number of steps.
358 - **Direction Reward:** Rewards the drone for moving in the direction of the target.
359 - **Goal Bonus:** Provides a significant reward for reaching the target location.
360 - **Time Penalty:** Encourages efficiency by applying a small penalty for each timestep.
361
362 @note This function assumes observations include position and velocity as described in `get_observations`.
363
364
365 """
366 observations = self.get_observations()
367
368 # Distance to target
369 distance_to_target = np.linalg.norm(observations[:3] - self.target_location)
370 reward_distance = -distance_to_target**2 # Quadratic penalty
371
372 # Stability reward (encourage smooth movement)
373 velocity = observations[3:6]
374 velocity_change = np.linalg.norm(velocity - self.previous_velocity)
375 reward_stability = -0.1 * velocity_change
376
377 # Efficiency reward (smaller time penalty)
378 reward_efficiency = -0.01 * self.cur_step_count
379
380 # Penalty for staying near the origin
381 start_location = np.array([0, 0, 0])
382 distance_from_start = np.linalg.norm(observations[:3] - start_location)
383 reward_stay_penalty = -50 if distance_from_start < 0.1 and self.cur_step_count > 100 else 0
384
385 # Direction reward (encourage moving toward the target)
386 direction = (self.target_location - observations[:3]) / np.linalg.norm(self.target_location - observations[:3])
387 reward_direction = max(0, np.dot(direction, velocity)) # Positive reward
388
389 # Goal bonus (larger incentive for success)
390 goal_bonus = 50 if distance_to_target < 0.1 else 0
391
392 # Time penalty (encourage faster task completion)
393 time_penalty = -0.01 * self.cur_step_count
394
395 # Combine rewards
396 reward = (
397 reward_distance
398 + 0.1 * reward_stability
399 + reward_efficiency
400 + reward_stay_penalty
401 + reward_direction
402 + goal_bonus
403 + time_penalty
404 )
405
406 self.previous_velocity = velocity
407 self.last_ep_score = reward
408
409 return reward
410
411 def is_done(self):
412 """
413
414 @brief Checks whether the current episode is complete.
415
416 This method evaluates conditions to determine if the episode should terminate, such as exceeding
417 step limits, going out of bounds, or exceeding angular constraints.
418
419 @return `True` if the episode is complete, `False` otherwise.
420
421 @details
422 - **Step Count Trigger:** The episode ends if the current step count exceeds the maximum allowed steps.
423 - **Location Bound Trigger:** The episode terminates if the drone's position deviates beyond a predefined
424 bound from the target location on any axis.
425 - **Angle Bound Trigger:** The episode terminates if any rotational angle (roll, pitch, yaw) exceeds
426 the specified angular limit.
427
428 @note Observations are fetched from `get_observations`, and positional and angular bounds are defined
429 within the function.
430
431
432 """
433 if self.cur_step_count >= self.steps_per_episode:
434 print("\033[93mStep count triggered\033[0m")
435 #warn("Step count triggered")
436 #print("Step count triggered")
437 self.stepFlag = True
438 return True
439
440 x_fault = np.abs(self.get_observations()[0] - self.target_location[0])
441 y_fault = np.abs(self.get_observations()[1] - self.target_location[1])
442 z_fault = np.abs(self.get_observations()[2] - self.target_location[2])
443
444 #print(x_fault,y_fault,z_fault)
445
446 if (x_fault > self.location_bound) or (y_fault > self.location_bound) or (z_fault > self.location_bound):
447 print("\033[93mLocation bound triggered\033[0m")
448 #warn("Location bound triggered")
449 #print("Location bound triggered")
450 return True
451
452 angles = np.abs(self.get_observations()[3:6])
453 #print(f"Angles are {angles}")
454 if (angles[0] > self.angle_bound) or (angles[1] > self.angle_bound) or (angles[2] > self.angle_bound):
455 #warn("Angle bound triggered")
456 print("\033[93mAngle bound triggered\033[0m")
457 #print("Angle bound triggered")
458 return True
459 else:
460 return False
461
462
463 def solved(self):
464 """
465
466 @brief Determines if the task is considered "solved."
467
468 The task is deemed solved if the mean score of the last 100 episodes exceeds
469 a predefined threshold (`avg_target_score`), indicating consistent performance.
470
471 @return `True` if the task is solved, `False` otherwise.
472
473 @details
474 - **Threshold Check:** The task is solved if the mean score of the last 100 episodes is greater than `avg_target_score`.
475 - If the condition is met, the `saveOK` flag is set to `True`.
476
477 @note Requires the `episode_score_list` to contain at least 100 episodes.
478
479
480 """
481 if len(self.episode_score_list) > 100: # Last 100 trials
482 mean_score = np.mean(self.episode_score_list[-100:])
483 if mean_score > self.avg_target_score: # Target a low average penalty
484 self.saveOK = True
485 return True
486 return False
487
488 def get_info(self):
489 """
490
491 @brief Provides additional information about the environment.
492
493 This method returns a dictionary containing environment-specific details.
494
495 @return A dictionary with key-value pairs representing additional environment information.
496
497 @details
498 - Currently, this method returns a placeholder dictionary `{"Dummy": "dummy"}`.
499 - Can be extended to include meaningful diagnostic or metadata information.
500
501
502 """
503
504 return {"Dummy": "dummy"}
505 def launchTensorBoard(self,log_path):
506 """
507
508 @brief Launches TensorBoard for monitoring training progress.
509
510 This method runs a system command to start TensorBoard using the specified log directory.
511
512 @param log_path The path to the directory containing TensorBoard log files.
513
514 @details
515 - Executes the command `tensorboard --logdir=<log_path>` to launch TensorBoard.
516 - Requires TensorBoard to be installed and accessible from the system's command line.
517
518 @note Ensure the specified `log_path` is valid and contains log files for visualization.
519
520 """
521 os.system('tensorboard --logdir=' + log_path)
522 return
523 def startTensorBoard(self,log_path):
524 """
525
526 @brief Starts TensorBoard in a separate thread.
527
528 This method creates and starts a new thread to run TensorBoard, ensuring that it does not block the main process.
529
530 @param log_path The path to the directory containing TensorBoard log files.
531
532 @details
533 - Internally calls `launchTensorBoard` in a separate thread.
534 - Useful for starting TensorBoard asynchronously during training or simulation.
535
536 @note Ensure the `log_path` is valid and TensorBoard is installed.
537
538 """
539 t = threading.Thread(target=self.launchTensorBoard, args=([log_path]))
540 t.start()
541 return
542
543 def render(self, mode='human'):
544 """
545
546 @brief Renders the environment.
547
548 This method is a placeholder for rendering the environment in various modes.
549
550 @param mode The rendering mode (default is `"human"`).
551
552 @details
553 - Currently, this method is not implemented.
554 - Can be extended to provide visualizations or other forms of rendering.
555
556 """
557 pass
558
560 def step(self, action):
561 """
562 @brief Executes a single simulation step, applying an action and updating the environment.
563
564 This method steps the controller, applies the given action to the robot, and returns the resulting
565 state of the environment, including observations, reward, termination status, and additional info.
566
567 @param action The action to be applied to the robot, defined by the use case (e.g., integer for discrete actions).
568 @return A tuple `(observations, reward, done, info)`:
569 - `observations`: Current state of the environment.
570 - `reward`: Reward received for the action taken.
571 - `done`: Boolean indicating whether the episode is complete.
572 - `info`: Additional diagnostic information.
573
574 @details
575 - Increments the step count and initializes motor devices if not already done.
576 - Applies the action using `apply_action` and updates motor power with `update_motor_power`.
577 - Fetches observations, computes the reward, and checks if the episode is done.
578 - Handles integration with the Webots supervisor timestep.
579
580 @note Assumes valid initialization of motors and sensors (e.g., GPS, IMU, gyro).
581
582 """
583 self.cur_step_count += 1
584
585 self.m1_motor = self.getDevice('m1_motor') # type: Motor
586 self.m1_motor.setPosition(float('inf'))
587
588 self.m2_motor = self.getDevice('m2_motor') # type: Motor
589 self.m2_motor.setPosition(float('inf'))
590
591 self.m3_motor = self.getDevice('m3_motor') # type: Motor
592 self.m3_motor.setPosition(float('inf'))
593
594 self.m4_motor = self.getDevice('m4_motor') # type: Motor
595 self.m4_motor.setPosition(float('inf'))
596
597 self.imu = self.getDevice('inertial_unit') # type: InertialUnit
598 self.imu.enable(self.timestep)
599
600 self.gps = self.getDevice('gps') # type: GPS
601 self.gps.enable(self.timestep)
602
603 self.gyro = self.getDevice('gyro') # type: Gyro
604 self.gyro.enable(self.timestep)
605
606 if not self.didOnce:
607 self.apply_action(action)
608 self.didOnce = True
609 if super(Supervisor, self).step(self.timestep) == -1:
610 exit()
611 #get measurements
612 observation = self.get_observations()
613 current_location = [
614 observation[0],
615 observation[1],
616 observation[2],
617 ]
618 self.update_motor_power(current_location)
619 self.m1_motor.setVelocity(-self.motor_power['m1'])
620 self.m2_motor.setVelocity(self.motor_power['m2'])
621 self.m3_motor.setVelocity(-self.motor_power['m3'])
622 self.m4_motor.setVelocity(self.motor_power['m4'])
623
624
625
626 return (
627 observation,
628 self.get_reward(action),
629 self.is_done(),
630 self.get_info(),
631 )
632
633
634 def reset(self):
635 """
636
637 @brief Resets the environment to its initial state.
638
639 This method restores the simulation to its default configuration by resetting motor power,
640 step counters, error terms, and the Webots simulation state.
641
642 @return A default observation, represented as a zero-filled numpy array matching the observation space shape.
643
644 @details
645 - Resets motor power values to zero and clears PID error terms.
646 - Resets the Webots simulation state and physics using `simulationReset` and `simulationResetPhysics`.
647 - Ensures compatibility with Webots versions >R2020b but can be overridden for earlier versions.
648 - Calls `super().step` to integrate the reset state into the simulation timestep.
649
650 @note This method is backward-compatible, allowing older supervisor implementations to be migrated.
651
652 """
653
654 self.motor_power = {
655 'm1': 0,
656 'm2': 0,
657 'm3': 0,
658 'm4': 0
659 }
660 self.cur_step_count = 0
661 self.didOnce = False
662 self.past_errors = {"x": 0, "y": 0, "z": 0}
663 self.integral_errors = {"x": 0, "y": 0, "z": 0}
664 self.simulationReset()
665 self.simulationResetPhysics()
666 super(Supervisor, self).step(self.timestep)
667 #return self.get_default_observation()
668 return np.zeros(self.observation_space.shape)
669
670
671 def apply_action(self, action):
672 """
673
674 @brief Applies the specified action to the drone by updating PID controller gains.
675
676 This method sets the PID gains based on the provided action. If debug mode is active,
677 the action is ignored, and default PID gains are used instead.
678
679 @param action A list or array containing PID gain values for X, Y, and Z axes in the order:
680 [X_P, X_I, X_D, Y_P, Y_I, Y_D, Z_P, Z_I, Z_D].
681
682 @details
683 - In debug mode, fixed PID gains are applied, and the action parameter is ignored.
684 - Outside debug mode, the PID gains are updated directly from the action input.
685 - Resets the last episode score to zero after applying the action.
686
687 @note Ensure that the action parameter contains exactly 9 elements representing the PID gains.
688
689
690 """
691
692 if self.debugMode == True:
693 print("Debug mode is active, ignoring model actions")
694 self.pid_gains = {
695 'X_P': 10,
696 'X_I': 10,
697 'X_D': 0.5,
698
699 'Y_P': 10,
700 'Y_I': 10,
701 'Y_D': 0.5,
702
703 'Z_P': 10,
704 'Z_I': 10,
705 'Z_D': 0.5,
706 }
707
708 else:
709 print(f"Action is {action}")
710 self.pid_gains = {
711 'X_P': action[0],
712 'X_I': action[1],
713 'X_D': action[2],
714
715 'Y_P': action[3],
716 'Y_I': action[4],
717 'Y_D': action[5],
718
719 'Z_P': action[6],
720 'Z_I': action[7],
721 'Z_D': action[8],
722 }
723 print(f"last episodes score {self.last_ep_score}")
724 self.last_ep_score = 0
render(self, mode='human')
m1_motor
Needs to be defined in step to work properly.
startTensorBoard(self, log_path)
reset(self)
Guess what, another override function.
axis_angle_to_euler(self, axis_angle)
update_motor_power(self, current_location)
get_reward(self, action=None)
apply_action(self, action)
launchTensorBoard(self, log_path)
step(self, action)