[WIP: June2023] Deep Q-Learning using TorchSharp #710
Replies: 3 comments 6 replies
-
using Torch;
using System;
namespace DeepQLearning
{
class Program
{
static void Main(string[] args)
{
// Define the neural network
var model = new Sequential();
model.Add(new Linear(4, 128));
model.Add(new ReLU());
model.Add(new Linear(128, 2));
// Define the optimizer
var optimizer = new Adam(model.Parameters(), 0.001);
// Define the loss function
var loss = new MSELoss();
// Define the environment
var env = Gym.Make("CartPole-v0");
// Train the model
int episodeCount = 1000;
int stepsPerEpisode = 200;
int maxSteps = episodeCount * stepsPerEpisode;
int stepCount = 0;
int episode = 0;
while (stepCount < maxSteps)
{
env.Reset();
for (int step = 0; step < stepsPerEpisode; step++)
{
// Get the current state
var state = env.Observation;
// Choose an action based on the current state
var tensor = new Tensor(state, new[] { 1, state.Length });
var qValues = model.Forward(tensor);
var action = qValues.Max().Item2;
// Take the action and observe the result
var result = env.Step(action);
var nextState = result.Observation;
var reward = result.Reward;
var done = result.Done;
// Calculate the target Q-value
var target = qValues.Clone();
if (done)
{
target[0, action] = reward;
}
else
{
var nextTensor = new Tensor(nextState, new[] { 1, nextState.Length });
var nextQValues = model.Forward(nextTensor);
var maxNextQ = nextQValues.Max().Item1;
target[0, action] = reward + 0.99 * maxNextQ;
}
// Calculate the loss and update the model
tensor.Reshape(new[] { 1, 4 });
var output = model.Forward(tensor);
optimizer.ZeroGrad();
var l = loss.Forward(output, target);
l.Backward();
optimizer.Step();
// Update the step count
stepCount++;
// Check if the episode is done
if (done)
{
break;
}
}
// Print the episode number
Console.WriteLine("Episode: " + episode);
episode++;
}
// Close the environment
env.Close();
}
}
}
|
Beta Was this translation helpful? Give feedback.
-
I think I reproduced solution from lecture in my repo hear. |
Beta Was this translation helpful? Give feedback.
-
This is cool. I still hope we can build out a gym in .NET and maybe some shareable components for Q-learning. I don't have the expertise or experience to do that, but it'd be very cool. |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
June 2023
#981 (comment)
Feb 2023
https://www.youtube.com/watch?v=217tCMsZu0I

Beta Was this translation helpful? Give feedback.
All reactions