droneProject
Loading...
Searching...
No Matches
Train_TDP.py
Go to the documentation of this file.
1# Import necessary libraries
2from droneRobot import DroneRobot # Import the custom DroneRobot class to define the environment for the drone
3import numpy as np # Import numpy for numerical operations, especially for creating the noise
4import os, datetime # Import os for file path handling and datetime for generating unique filenames
5from stable_baselines3 import TD3 # Import the TD3 (Twin Delayed DDPG) algorithm from Stable Baselines 3
6from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise # Import Ornstein-Uhlenbeck noise for action noise
7
8# Define the path for saving the trained model with a timestamp for uniqueness
9model_path = os.path.join('Training', 'Saved Models', f'td3_drone_model{datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}')
10# Define the log path for storing TensorBoard logs
11log_path = os.path.join('Training', 'Logs', 'td3_drone_logs')
12
13# Initialize the environment (the drone control simulation)
15env.target_location = [0, 0, 2] #X Y Z target location, go to this location. All reward is calculated based on this
16env.debugMode = False # Enable debug mode for testing your PID constants, defined in take_action() function
17
18# Get the number of actions in the action space of the environment
19n_actions = env.action_space.shape[-1] # Number of actions, extracted from the environment's action space shape
20
21# Initialize Ornstein-Uhlenbeck action noise for exploration during training
22action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.1) * np.ones(n_actions))
23# This noise is used to add randomness to the actions, promoting exploration in the environment
24
25# Start TensorBoard to log training progress and visualize it
26env.startTensorBoard(log_path)
27
28# Set the number of episodes to train the agent
29episodes = 1000 # Number of episodes for training
30timesteps = env.steps_per_episode * episodes # Total number of timesteps, calculated by steps per episode and episodes
31
32# Create the TD3 model with specific parameters
33model = TD3("MlpPolicy", env, action_noise=action_noise, verbose=1, device='cuda', tensorboard_log=log_path)
34# "MlpPolicy" specifies the use of a Multi-Layer Perceptron-based policy for continuous action spaces
35# action_noise adds noise to the actions for better exploration
36# verbose=1 enables some logging details during training
37# device='cuda' makes use of the GPU for faster training (change to 'cpu' if GPU is not available)
38# tensorboard_log specifies where to store TensorBoard logs for visualization
39
40# Start training the model for the specified number of timesteps
41model.learn(total_timesteps=timesteps, log_interval=10)
42# log_interval=10 specifies that logs will be output every 10 updates during the training process
43
44# Save the trained model after completing the training
45model.save("td3_drone_model", path=model_path)
46# Save the model with the filename "td3_drone_model" at the path defined earlier
47