diff --git a/src/Gym.Environments/Envs/Aether/CarDynamics.cs b/src/Gym.Environments/Envs/Aether/CarDynamics.cs new file mode 100644 index 0000000..6778cba --- /dev/null +++ b/src/Gym.Environments/Envs/Aether/CarDynamics.cs @@ -0,0 +1,464 @@ +using Gym.Collections; +using Gym.Envs; +using Gym.Observations; +using Gym.Spaces; +using NumSharp; +using SixLabors.ImageSharp; +using System; +using System.Collections.Generic; +using System.ComponentModel; +using tainicom.Aether.Physics2D.Common; +using tainicom.Aether.Physics2D.Collision.Shapes; +using tainicom.Aether.Physics2D.Dynamics; +using tainicom.Aether.Physics2D.Dynamics.Contacts; +using tainicom.Aether.Physics2D.Dynamics.Joints; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Drawing; +using SixLabors.ImageSharp.Drawing.Processing; +using System.Linq; +using Gym.Threading; +using SixLabors.ImageSharp.ColorSpaces; +using Gym.Exceptions; +using System.Diagnostics; + +namespace Gym.Environments.Envs.Aether +{ + #region Road Tiles + /// + /// Not used in the python Gym environment, added here for completeness. + /// + public class RoadTile + { + public float Friction { get; set; } = 0f; + public bool RoadVisited { get; set; } = false; + public Rgba32 Color { get; set; } = new Rgba32(102,102,102); // 0.4,0.4,0.4 + public int Index { get; set; } = 0; + public Fixture PhysicsFixture { get; set; } + } + #endregion + /// + /// Top-down car dynamics simulation. + /// Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell. + /// This simulation is a bit more detailed, with wheels rotation. + /// Created by Oleg Klimov, converted to C# by Jacob Anderson + /// + public class CarDynamics + { + + #region CONSTANTS + private const float SIZE = 0.02f; + private const float ENGINE_POWER = 100000000 * SIZE * SIZE; + private const float WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE; + private const float FRICTION_LIMIT = ( + 1000000 * SIZE * SIZE + ); // friction ~= mass ~= size^2 (calculated implicitly using density) + private const float WHEEL_R = 27; + private const float WHEEL_W = 14; + #endregion + + #region Polygon Presets + private static readonly Vertices WHEELPOS = new Vertices(new Vector2[] { + new Vector2(-55f,80f), new Vector2(55f,80f), new Vector2(-55f,-82f), new Vector2(55f,-82f) + }); + private static readonly Vertices HULL_POLY1 = new Vertices(new Vector2[] { + new Vector2(-60f, +130f), new Vector2(+60f, +130f), new Vector2(+60f, +110f), new Vector2(-60f, +110f) + }); + private static readonly Vertices HULL_POLY2 = new Vertices(new Vector2[] { + new Vector2(-15f, +120f), new Vector2(+15f, +120f), new Vector2(+20f, +20f), new Vector2(-20f, 20f) + }); + private static readonly Vertices HULL_POLY3 = new Vertices(new Vector2[] { + new Vector2(+25f, +20f), + new Vector2(+50f, -10f), + new Vector2(+50f, -40f), + new Vector2(+20f, -90f), + new Vector2(-20f, -90f), + new Vector2(-50f, -40f), + new Vector2(-50f, -10f), + new Vector2(-25f, +20f) + }); + private static readonly Vertices HULL_POLY4 = new Vertices(new Vector2[] { + new Vector2(-50f, -120f), new Vector2(+50f, -120f), new Vector2(+50f, -90f), new Vector2(-50f, -90f) + }); + #endregion + + #region Visualization Customization + public Rgba32 WheelColor { get; set; } = new Rgba32(0, 0, 0); + public Rgba32 WheelWhite { get; set; } = new Rgba32(77, 77, 77); + public Rgba32 MudColor { get; set; } = new Rgba32(102, 102, 0); + public Rgba32 BodyColor { get; set; } = new Rgba32(204, 0, 0); + #endregion + + private World World { get; set; } + public Body Hull { get; private set; } + public List Wheels { get; private set; } + public float FuelSpent { get; set; } = 0f; + + #region Telemetry + public float SteerAngle { get; private set; } + public float Speed { get; private set; } + #endregion + + #region Particles + internal class CarParticle + { + internal Rgba32 RenderColor { get; set; } + internal List Poly { get; set; } + internal bool IsGrass { get; set; } = false; + + internal CarParticle(Vector2 point1, Vector2 point2, bool grass) + { + Poly = new List(); + Poly.Add(point1); + Poly.Add(point2); + IsGrass = grass; + } + } + private List Particles { get; set; } = new List(); + #endregion + + #region Wheels + public class Wheel + { + internal Body Unit { get; set; } + internal float WheelRad { get; set; } + internal float Gas { get; set; } + internal float Brake { get; set; } + internal float Steer { get; set; } + /// + /// Wheel angle + /// + internal float Phase { get; set; } + /// + /// Angular velocity + /// + internal float Omega { get; set; } + internal Rgba32 Color { get; set; } + internal Vector2? SkidStart { get; set; } + internal CarParticle SkidParticle { get; set; } + internal RevoluteJoint Joint { get; set; } + public List Tiles { get; set; } + + internal Wheel() + { + } + internal Wheel CreateBody(Body car, Vector2 pos) + { + float front_k = (pos.Y > 0f ? 1f : 1f); // ?? + Vertices wheel_poly = new Vertices(new Vector2[] { + new Vector2(-WHEEL_W*front_k*SIZE,WHEEL_R*front_k*SIZE), + new Vector2(WHEEL_W*front_k*SIZE,WHEEL_R*front_k*SIZE), + new Vector2(WHEEL_W*front_k*SIZE,-WHEEL_R*front_k*SIZE), + new Vector2(-WHEEL_W*front_k*SIZE,-WHEEL_R*front_k*SIZE) + }); + Vector2 p = car.Position + pos * SIZE; + Unit = car.World.CreateBody(position: p, rotation: car.Rotation, bodyType: BodyType.Dynamic); + Unit.Tag = this; + Fixture f = Unit.CreateFixture(new PolygonShape(new Vertices(wheel_poly), 0.1f)); + f.CollisionCategories = Category.All; + f.CollidesWith = Category.Cat1; + f.Restitution = 0f; + f.Tag = this; + WheelRad = front_k * WHEEL_R * SIZE; + Gas = 0f; + Brake = 0f; + Steer = 0f; + Phase = 0f; + Omega = 0f; + SkidStart = null; + SkidParticle = null; + // The wheel revolution physics + RevoluteJoint rjd = new RevoluteJoint(car, Unit, new Vector2(pos.X * SIZE, pos.Y * SIZE), new Vector2(0f, 0f)); + rjd.MotorEnabled = true; + rjd.LimitEnabled = true; + rjd.MaxMotorTorque = 180f * 900f * SIZE * SIZE; + rjd.MotorSpeed = 0f; + rjd.LowerLimit = -0.4f; + rjd.UpperLimit = 0.4f; + Joint = rjd; + car.World.Add(rjd); + Tiles = new List(); + return (this); + } + } + #endregion + + + public CarDynamics(World world, float init_angle, float init_x, float init_y) + { + World = world; + Wheels = new List(); + Hull = world.CreateBody(position: new Vector2(init_x, init_y), rotation: init_angle, bodyType: BodyType.Dynamic); + Fixture f = Hull.CreateFixture(new PolygonShape(ScaleIt(HULL_POLY1), 1f)); + f = Hull.CreateFixture(new PolygonShape(ScaleIt(HULL_POLY2), 1f)); + f = Hull.CreateFixture(new PolygonShape(ScaleIt(HULL_POLY3), 1f)); + f = Hull.CreateFixture(new PolygonShape(ScaleIt(HULL_POLY4), 1f)); + foreach (Vector2 v in WHEELPOS) + { + Wheel w = new Wheel(); + w.CreateBody(Hull, v); + Wheels.Add(w); + w.Color = WheelColor; + } + } + + private Vertices ScaleIt(Vertices vx, float scale = SIZE) + { + Vertices n = new Vertices(); + foreach (Vector2 v in vx) + { + n.Add(new Vector2(v.X * scale, v.Y * scale)); + } + return (n); + } + + #region Car Interface + /// + /// Apply gas to rear 2 wheels + /// + /// How much gas gets applied. Gets clipped between 0 and 1 + /// + public float Gas(float gas) + { + gas = np.clip(gas, 0f, 1f); + for(int i=2; i < Wheels.Count; i++) + { + // Rear-wheel drive, positive traction + Wheel w = Wheels[i]; + float diff = gas - w.Gas; + if (diff > 0.1f) + { + diff = 0.1f; // gradually increase, but stop immediately + } + w.Gas += diff; + } + return (gas); + } + /// + /// Apply braking across all 4 wheels + /// + /// Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation, between 0 and 1.0 + public void Brake(float b) + { + foreach (Wheel w in Wheels) + { + w.Brake = b; + } + } + /// + /// Assumes 2-wheel control, sign is direction, turning the front wheels of the car left or right. + /// + /// target position, it takes time to rotate steering wheel from side-to-side, range is -1 to 1 + public void Steer(float s) + { + Wheels[0].Steer = s; + Wheels[1].Steer = s; + } + + /// + /// Computes the updae on the given wheel and returns the fuel spent by the wheel + /// + /// The time step + /// The wheel being simulated + /// The amount of fuel spent in the step + private float Step(float dt, Wheel w) + { + float steer_angle = w.Steer - w.Joint.JointAngle; + float dir = (steer_angle < 0f ? -1f : (steer_angle > 0f ? 1f : 0f)); + float val = Math.Abs(steer_angle); + w.Joint.MotorSpeed = dir * Math.Min(50f * val, 3f); + + // Position -> friction_limit + bool grass = true; + float friction_limit = FRICTION_LIMIT * 0.6f; // Grass friction if no tile + foreach (RoadTile tile in w.Tiles) + { + friction_limit = Math.Max(friction_limit, FRICTION_LIMIT * tile.Friction); + grass = false; + } + + // Force + Vector2 forw = w.Unit.GetWorldVector(new Vector2(0f, 1f)); + Vector2 side = w.Unit.GetWorldVector(new Vector2(1f, 0f)); + Vector2 v = w.Unit.LinearVelocity; + float vf = forw.X * v.X + forw.Y * v.Y; + float vs = side.X * v.X + side.Y * v.Y; + + // # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy + // # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power + // # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega + + // # add small coef not to divide by zero + w.Omega += dt * ENGINE_POWER * w.Gas / WHEEL_MOMENT_OF_INERTIA / (Math.Abs(w.Omega) + 5f); + float fuel_spent = dt * ENGINE_POWER * w.Gas; + if (w.Brake >= 0.9f) + { + w.Omega = 0f; + } + else if(w.Brake > 0f) + { + float brake_force = 15f; // Radians per second + dir = (w.Omega < 0f ? 1f : (w.Omega > 0f ? -1f : 0f)); // Oppposite sign + val = brake_force * w.Brake; + if (Math.Abs(val) > Math.Abs(w.Omega)) + { + val = Math.Abs(w.Omega); // low speed => same as = 0 + } + w.Omega += dir * val; + } + w.Phase += w.Omega * dt; + + float vr = w.Omega * w.WheelRad; // rotating wheel speed + float f_force = -vf + vr;// force direction is direction of speed difference + float p_force = -vs; + + // Physically correct is to always apply friction_limit until speed is equal. + // But dt is finite, that will lead to oscillations if difference is already near zero. + + // Random coefficient to cut oscillations in few steps (have no effect on friction_limit) + f_force *= 205000f * SIZE * SIZE; + p_force *= 205000f * SIZE * SIZE; + float force = np.sqrt(f_force * f_force + p_force * p_force); + + // Skid trace + if (Math.Abs(force) > 2f * friction_limit) + { + if (w.SkidParticle != null && w.SkidParticle.IsGrass && w.SkidParticle.Poly.Count < 30) + { + w.SkidParticle.Poly.Add(w.Unit.Position); + } + else if (!w.SkidStart.HasValue) + { + w.SkidStart = w.Unit.Position; + } + else + { + w.SkidParticle = new CarParticle(w.SkidStart.Value, w.Unit.Position, grass); + Particles.Add(w.SkidParticle); + if (Particles.Count > 30) + { + Particles.RemoveAt(0); + } + w.SkidStart = null; + } + } + else + { + w.SkidStart = null; + w.SkidParticle = null; + } + if (Math.Abs(force) > friction_limit) + { + f_force /= force; + p_force /= force; + force = friction_limit; // Correct physics here + f_force *= force; + p_force *= force; + } + + w.Omega -= dt * f_force * w.WheelRad / WHEEL_MOMENT_OF_INERTIA; + + w.Unit.ApplyForce(new Vector2(p_force * side.X + f_force * forw.X, p_force * side.Y + f_force * forw.Y)); + + return (fuel_spent); + + } + + public CarDynamics Step(float dt) + { + foreach (Wheel w in Wheels) + { + FuelSpent += Step(dt, w); + } + SteerAngle = Hull.Rotation; + Speed = Hull.LinearVelocity.Length(); + return (this); + } + + public static Vector2 RotateVec(Vector2 v, float angle) { + float cos = (float)Math.Cos(angle); + float sin = (float)Math.Sin(angle); + return new Vector2(v.X * cos - v.Y * sin, v.X * sin + v.Y * cos); + } + public void Draw(Image img, float zoom, Vector2 translation, float angle, bool draw_particles = true) + { + if (draw_particles) + { + foreach (CarParticle p in Particles) + { + PointF[] poly = new PointF[p.Poly.Count]; + + for (int i = 0; i < p.Poly.Count; i++) + { + Vector2 px = RotateVec(p.Poly[i], angle) * zoom + translation; + poly[i] = new PointF(px.X, px.Y); + } + img.Mutate(i => i.DrawLines(p.RenderColor, 2f, poly)); + } + } + // Draw the Hull + foreach (Fixture f in Hull.FixtureList) + { + Transform trans = f.Body.GetTransform(); + Vertices polyVerts = ((PolygonShape)f.Shape).Vertices; + PointF[] path = new PointF[polyVerts.Count]; + for (int i = 0; i < polyVerts.Count; i++) + { + Vector2 px = RotateVec(Transform.Multiply(polyVerts[i], ref trans), angle) * zoom + translation; + path[i] = new PointF(px.X, px.Y); + } + img.Mutate(i => i.FillPolygon(BodyColor, path)); + } + + foreach (Wheel w in Wheels) + { + Transform trans = w.Unit.GetTransform(); + PointF[] path = null; + foreach (Fixture f in w.Unit.FixtureList) + { + Vertices polyVerts = ((PolygonShape)f.Shape).Vertices; + path = new PointF[polyVerts.Count]; + for (int i = 0; i < polyVerts.Count; i++) + { + Vector2 px = RotateVec(Transform.Multiply(polyVerts[i], ref trans), angle) * zoom + translation; + path[i] = new PointF(px.X, px.Y); + } + img.Mutate(i => i.FillPolygon(w.Color, path)); + } + float a1 = w.Phase; + float a2 = a1 + 1.2f; // radians + float s1 = (float)Math.Sin(a1); + float s2 = (float)Math.Sin(a2); + float c1 = (float)Math.Cos(a1); + float c2 = (float)Math.Cos(a2); + if (s1 > 0f && s2 > 0f) + { + continue; + } + if (s1 > 0f) + { + c1 = (c1 < 0f ? -1f : (c1 > 0f ? 1f : 0f)); + } + if (s2 > 0f) + { + c2 = (c2 < 0f ? -1f : (c2 > 0f ? 1f : 0f)); + } + Vector2[] wp = new Vector2[4] { + new Vector2(-WHEEL_W*SIZE,WHEEL_R*c1*SIZE), + new Vector2( WHEEL_W*SIZE,WHEEL_R*c1*SIZE), + new Vector2( WHEEL_W*SIZE,WHEEL_R*c2*SIZE), + new Vector2(-WHEEL_W*SIZE,WHEEL_R*c2*SIZE) + }; + path = new PointF[wp.Length]; + for (int i = 0; i < wp.Length; i++) + { + Vector2 vec = RotateVec(Transform.Multiply(wp[i], ref trans), angle) * zoom + translation; + path[i] = new PointF(vec.X, vec.Y); + } + img.Mutate(i => i.FillPolygon(WheelWhite, path)); + } + } + + #endregion + } +} diff --git a/src/Gym.Environments/Envs/Aether/CarRacingEnv.cs b/src/Gym.Environments/Envs/Aether/CarRacingEnv.cs new file mode 100644 index 0000000..83fd455 --- /dev/null +++ b/src/Gym.Environments/Envs/Aether/CarRacingEnv.cs @@ -0,0 +1,851 @@ +using Gym.Collections; +using Gym.Envs; +using Gym.Observations; +using Gym.Spaces; +using NumSharp; +using SixLabors.ImageSharp; +using System; +using System.Collections.Generic; +using System.ComponentModel; +using tainicom.Aether.Physics2D.Common; +using tainicom.Aether.Physics2D.Collision.Shapes; +using tainicom.Aether.Physics2D.Dynamics; +using tainicom.Aether.Physics2D.Dynamics.Contacts; +using tainicom.Aether.Physics2D.Dynamics.Joints; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Drawing; +using SixLabors.ImageSharp.Drawing.Processing; +using System.Linq; +using Gym.Threading; +using SixLabors.ImageSharp.ColorSpaces; +using Gym.Exceptions; +using System.Diagnostics; +using System.Runtime.InteropServices; +using System.IO; + +namespace Gym.Environments.Envs.Aether +{ + /// + ///## Description + ///The easiest control task to learn from pixels - a top-down + ///racing environment. The generated xtrack is random every episode. + + ///Some indicators are shown at the bottom of the window along with the + ///state RGB buffer. From left to right: true speed, four ABS sensors, + ///steering wheel position, and gyroscope. + ///To play yourself (it's rather fast for humans), type: + ///``` + ///python gymnasium/envs/box2d/car_racing.py + ///``` + ///Remember: it's a powerful rear-wheel drive car - don't press the accelerator + ///and turn at the same time. + + ///## Action Space + ///If continuous there are 3 actions : + ///- 0: steering, -1 is full left, +1 is full right + ///- 1: gas + ///- 2: breaking + + ///If discrete there are 5 actions: + ///- 0: do nothing + ///- 1: steer left + ///- 2: steer right + ///- 3: gas + ///- 4: brake + + ///## Observation Space + + ///A top-down 96x96 RGB image of the car and race xtrack. + + ///## Rewards + ///The reward is -0.1 every frame and +1000/N for every xtrack tile visited, + ///where N is the total number of tiles visited in the xtrack. For example, + ///if you have finished in 732 frames, your reward is + ///1000 - 0.1*732 = 926.8 points. + + ///## Starting State + ///The car starts at rest in the center of the road. + + ///## Episode Termination + ///The episode finishes when all the tiles are visited. The car can also go + ///outside the playfield - that is, far off the xtrack, in which case it will + ///receive -100 reward and die. + + ///## Arguments + ///`lap_complete_percent` dictates the percentage of tiles that must be visited by + ///the agent before a lap is considered complete. + + ///Passing `domain_randomize=True` enables the domain randomized variant of the environment. + ///In this scenario, the background and xtrack colours are different on every reset. + + ///Passing `continuous=False` converts the environment to use discrete action space. + ///The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. + + ///## Reset Arguments + ///Passing the option `options["randomize"] = True` will change the current colour of the environment on demand. + ///Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment. + ///`domain_randomize` must be `True` on init for this argument to work. + ///Example usage: + ///```python + ///import gymnasium as gym + ///env = gym.make("CarRacing-v1", domain_randomize=True) + + ///# normal reset, this changes the colour scheme by default + ///env.reset() + + ///# reset with colour scheme change + ///env.reset(options={"randomize": True}) + + ///# reset with no colour scheme change + ///env.reset(options={"randomize": False}) + ///``` + + ///## Version History + ///- v1: Change xtrack completion logic and add domain randomization (0.24.0) + ///- v0: Original version + + ///## References + ///- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. + + ///## Credits + ///Created by Oleg Klimov + ///Converted to C# by Jacob Anderson + /// + public class CarRacingEnv : Env + { + public enum StateFormat { + /// + /// State data is pixel data from rendered images in [w,h,3] format + /// + Pixels, + /// + /// State data is the telemetry of the car + /// + Telemetry + }; + + #region Constants + public const int STATE_W = 96; // Atari was 160 x 192 + public const int STATE_H = 96; + public const int VIDEO_W = 600; + public const int VIDEO_H = 400; + public const int WINDOW_W = 1000; + public const int WINDOW_H = 800; + + public const float SCALE = 6; // Track scale + public const float TRACK_RAD = 900f / SCALE; + public const float PLAYFIELD = 2000f / SCALE; + public const float FPS = 50.0f; // Frames per second + public const float ZOOM = 2.7f; // Camera zoom + public const bool ZOOM_FOLLOW = true; // Set to false for a fixed view + + public const float TRACK_DETAIL_STEP = 21f / SCALE; + public const float TRACK_TURN_RATE = 0.31f; + public const float TRACK_WIDTH = 40f / SCALE; + public const float BORDER = 8f / SCALE; + public const int BORDER_MIN_COUNT = 4; + public const float GRASS_DIM = PLAYFIELD / 20f; + private float MAX_SHAPE_DIM = Math.Max(Math.Max(GRASS_DIM, TRACK_WIDTH), TRACK_DETAIL_STEP) * 1.414213f * ZOOM * SCALE; + #endregion + + #region Display Members + private IEnvironmentViewerFactoryDelegate _viewerFactory; + private IEnvViewer _viewer; + public Rgba32 RoadColor { get; set; } = new Rgba32(102, 102, 102); + public Rgba32 BackgroundColor { get; set; } = new Rgba32(102, 204, 102); + public Rgba32 GrassColor { get; set; } = new Rgba32(102, 230, 102); + #endregion + + /// + /// How the state data is output from the step. + /// + public StateFormat StateOutputFormat { get; set; } = StateFormat.Pixels; + private bool ContinuousMode { get; set; } = true; + private bool DomainRandomize { get; set; } = false; + private float LapCompletePercent { get; set; } = 0.95f; + private bool Verbose { get; set; } = false; + private NumPyRandom RandomState { get; set; } + private FrictionDetector ContactListener { get; set; } + + private CarDynamics Car { get; set; } + private float Reward { get; set; } = 0f; + private float PreviousReward { get; set; } = 0f; + private float T { get; set; } = 0f; + internal bool NewLap { get; set; } = false; + private float StartAlpha { get; set; } = 0f; + private int TileVisitedCount { get; set; } = 0; + + private Image _LastRender = null; + private NDArray _LastImageArray = null; + private World _World; + + internal struct RoadPolygon + { + internal Vertices Verts; + internal Rgba32 Color; + } + + private List RoadPoly { get; set; } = new List(); + private List Road { get; set; } = new List(); + private float[] Track { get; set; } + + internal class FrictionDetector + { + private CarRacingEnv _env { get; set; } + private float LapCompletePercent { get; set; } + + public FrictionDetector(CarRacingEnv env, float lap_complete_percent) + { + _env = env; + LapCompletePercent = lap_complete_percent; + } + + private void DoContact(Contact contact,bool begin) + { + RoadTile tile = contact.FixtureA.Body.Tag as RoadTile; + CarDynamics.Wheel car = contact.FixtureB.Body.Tag as CarDynamics.Wheel; + if (tile == null) + { + tile = contact.FixtureB.Body.Tag as RoadTile; + car = contact.FixtureA.Body.Tag as CarDynamics.Wheel; + } + if (tile == null || car == null) + { + // Contacts are only with Car and Tile instances + return; + } + if (begin) + { + tile.Color = _env.RoadColor; + car.Tiles.Add(tile); + if (!tile.RoadVisited) + { + tile.RoadVisited = true; + _env.Reward += 1000f / (float)_env.Track.Length; + _env.TileVisitedCount++; + float visitpct = (float)_env.TileVisitedCount / (float)_env.Track.Length; + if (tile.Index == 0 && visitpct > LapCompletePercent) + { + _env.NewLap = true; + } + } + } + else + { + car.Tiles.Remove(tile); + } + } + public bool BeginContact(Contact contact) + { + DoContact(contact,true); + return (false); + } + + public void EndContact(Contact contact) + { + DoContact(contact,false); + } + + } + + public CarRacingEnv(IEnvViewer viewer) : this((IEnvironmentViewerFactoryDelegate)null) + { + _viewer = viewer ?? throw new ArgumentNullException(nameof(viewer)); + } + + public CarRacingEnv(IEnvironmentViewerFactoryDelegate viewerFactory, float lap_complete_percent = 0.95f, bool domain_randomize = false, bool continuous = true, bool verbose = false, NumPyRandom random_state = null) + { + RandomState = random_state; + if (RandomState == null) + { + RandomState = np.random; + } + _viewerFactory = viewerFactory; + LapCompletePercent = lap_complete_percent; + DomainRandomize = domain_randomize; + ContinuousMode = continuous; + Verbose = verbose; + if (continuous) + { + ActionSpace = new Box(new float[] { -1f, 0f, 0f }, new float[] { 1f, 1f, 1f }); + } + else + { + ActionSpace = new Discrete(5); + } + ObservationSpace = new Box(0f, 255f, shape: new NumSharp.Shape(STATE_H, STATE_W, 3)); + InitColors(); + ContactListener = new FrictionDetector(this, lap_complete_percent); + } + + private void InitColors() + { + if (DomainRandomize) + { + NDArray rando = RandomState.uniform(0, 210, 3).astype(NPTypeCode.Int32); + RoadColor = new Rgba32(rando[0], rando[1], rando[2]); + rando = RandomState.uniform(0, 210, 3).astype(NPTypeCode.Int32); + BackgroundColor = new Rgba32(rando[0], rando[1], rando[2]); + int[] offset = new int[] { 0, 0, 0 }; + int idx = RandomState.randint(0, 3); + offset[idx] = 20; + GrassColor = new Rgba32(rando[0]+offset[0], rando[1] + offset[1], rando[2] + offset[2]); + } + else + { + RoadColor = new Rgba32(102, 102, 102); + BackgroundColor = new Rgba32(102, 204, 102); + GrassColor = new Rgba32(102, 230, 102); + } + } + + private bool CreateTrack() + { + int CHECKPOINTS = 12; + float[] checkpoints = new float[CHECKPOINTS * 3]; + float frac = 2f * (float)Math.PI / (float)CHECKPOINTS; + // Create checkpoints + int i = 0; + for (i = 0; i < CHECKPOINTS; i++) + { + int j = i * 3; // index into the checkpoints array + float noise = RandomState.uniform(0f, frac); + float alpha = frac * (float)i + noise; + float rad = RandomState.uniform(TRACK_RAD / 3f, TRACK_RAD); + + if (i == 0) + { + alpha = 0f; + rad = 1.5f * TRACK_RAD; + } + else if(i == CHECKPOINTS-1) + { + alpha = frac * (float)i; + StartAlpha = frac * (-0.5f); + rad = 1.5f * TRACK_RAD; + } + checkpoints[j++] = alpha; + checkpoints[j++] = rad * (float)Math.Cos(alpha); + checkpoints[j++] = rad * (float)Math.Sin(alpha); + } + Road.Clear(); + // Go from one checkpoint to another to create the xtrack. + float x = 1.5f * TRACK_RAD; + float y = 0f; + float beta = 0f; + int dest_i = 0; + int laps = 0; + int no_freeze = 2500; + bool visited_other_size = false; + List xtrack = new List(); + while (no_freeze > 0) + { + float alpha = (float)Math.Atan2(y, x); + if (visited_other_size && alpha > 0f) + { + laps++; + visited_other_size = false; + } + if (alpha < 0f) + { + visited_other_size = true; + alpha += 2f * (float)Math.PI; + } + float dest_x = 0f; + float dest_y = 0f; + bool failed = false; + while (true) + { + failed = true; + while (true) + { + float dest_alpha = checkpoints[dest_i % checkpoints.Length]; + dest_x = checkpoints[dest_i % checkpoints.Length + 1]; + dest_y = checkpoints[dest_i % checkpoints.Length + 2]; + if (alpha <= dest_alpha) + { + failed = false; + break; + } + dest_i += 3; // each element is 3 + if (dest_i % checkpoints.Length == 0) + break; + } + if (!failed) + { + break; + } + alpha -= 2f * (float)Math.PI; + } + + float r1x = (float)Math.Cos(beta); + float r1y = (float)Math.Sin(beta); + float p1x = -r1y; + float p1y = r1x; + float dest_dx = dest_x - x; // Vector towards destination + float dest_dy = dest_y - y; + // Destination vector projected on rad: + float proj = r1x * dest_dx + r1y * dest_dy; + while ((beta - alpha) > (1.5f * (float)Math.PI)) + { + beta -= 2f * (float)Math.PI; + } + while ((beta - alpha) < (1.5f * (float)Math.PI)) + { + beta += 2f * (float)Math.PI; + } + float prev_beta = beta; + proj *= SCALE; + if (proj > 0.3f) + beta -= Math.Min(TRACK_TURN_RATE, (float)Math.Abs(0.001 * proj)); + if (proj < -0.3f) + beta += Math.Min(TRACK_TURN_RATE, (float)Math.Abs(0.001 * proj)); + x += p1x * TRACK_DETAIL_STEP; + y += p1y * TRACK_DETAIL_STEP; + xtrack.Add(alpha); + xtrack.Add(prev_beta * 0.5f + beta * 0.5f); + xtrack.Add(x); + xtrack.Add(y); + if (laps > 4) + { + break; + } + no_freeze--; + } + + // Find closed loop range i1 .. i2, first loop should be ignored, second is OK + int i1 = -1; + int i2 = -1; + i = xtrack.Count; // Each element is 4 floats + while (true) + { + i -= 4; // 4 floats per element + if (i == 0) + { + // Failed + return (false); + } + bool pass_through_start = (xtrack[i] > StartAlpha && xtrack[i - 4] <= StartAlpha); + if (pass_through_start && i2 == -1) + { + i2 = i; + } + else if (pass_through_start && i1 == -1) + { + i1 = i; + break; + } + } + if (Verbose) + { + Debug.WriteLine("Track generation {0}..{1} -> {2}-tiles track", i1, i2, i2 - i1); + } + Debug.Assert(i1 != -1); + Debug.Assert(i2 != -1); + float[] track = new float[i2 - i1]; + Array.Copy(xtrack.ToArray().Skip(i1).ToArray(), track, i2 - i1); + float first_beta = xtrack[1]; + float first_perp_x = (float)Math.Cos(first_beta); + float first_perp_y = (float)Math.Sin(first_beta); + // Length of perpendicular jump to put together head and tail + float a = first_perp_x * (track[2] - track[track.Length - 4 + 2]); // 4 floats per element + float b = first_perp_y * (track[3] - track[track.Length - 4 + 3]); + float well_glued_together = (float)Math.Sqrt(a * a + b * b); + if (well_glued_together > TRACK_DETAIL_STEP) + { + return (false); + } + // Red-white border on hard turns + bool[] border = new bool[track.Length >> 2]; + for (i = 0; i < track.Length; i += 4) + { + bool good = true; + int oneside = 0; + for (int neg = 0; neg < BORDER_MIN_COUNT; neg++) { + int idx = i - neg * 4; + while (idx < 0) + { + idx += track.Length; + } + float beta1 = track[idx + 1]; + idx = i - neg*4 - 4; + while (idx < 0) + { + idx += track.Length; + } + float beta2 = track[idx+1]; // index out of bounds TODO! + float dbeta = beta1 - beta2; + good &= (Math.Abs(dbeta) > TRACK_TURN_RATE * 0.2f); + oneside += (dbeta < 0f ? -1 : (dbeta > 0f ? 1 : 0)); + } + good = good && (Math.Abs(oneside) == BORDER_MIN_COUNT); + border[i >> 2] = good; + } + for (i = 0; i < border.Length; i++) + { + for (int neg = 0; neg < BORDER_MIN_COUNT; neg++) + { + int j = i - neg; + if (j < 0) + { + j += border.Length; + } + + border[j] |= border[i]; + } + } + // Create tiles + int prev_track_index = track.Length - 4; + for (i = 0; i < track.Length; i += 4, prev_track_index = (prev_track_index+4)%track.Length) // (alpha, beta, x, y) + { + // position 1 + float beta1 = track[i + 1]; + float x1 = track[i + 2]; + float y1 = track[i + 3]; + float cos_beta1 = (float)Math.Cos(beta1); + float sin_beta1 = (float)Math.Sin(beta1); + // previous position + float beta2 = track[prev_track_index + 1]; + float x2 = track[prev_track_index + 2]; + float y2 = track[prev_track_index + 3]; + float cos_beta2 = (float)Math.Cos(beta2); + float sin_beta2 = (float)Math.Sin(beta2); + // Polygon + Vector2 road1_l = new Vector2(x1 - TRACK_WIDTH * cos_beta1, y1 - TRACK_WIDTH * sin_beta1); + Vector2 road1_r = new Vector2(x1 + TRACK_WIDTH * cos_beta1, y1 + TRACK_WIDTH * sin_beta1); + Vector2 road2_l = new Vector2(x2 - TRACK_WIDTH * cos_beta2, y2 - TRACK_WIDTH * sin_beta2); + Vector2 road2_r = new Vector2(x2 + TRACK_WIDTH * cos_beta2, y2 + TRACK_WIDTH * sin_beta2); + Vertices vx = new Vertices(); + vx.Add(road1_l); + vx.Add(road1_r); + vx.Add(road2_r); + vx.Add(road2_l); + // fixtureDef(shape = polygonShape(vertices =[(0, 0), (1, 0), (1, -1), (0, -1)])) + Body bx = _World.CreateBody(bodyType:BodyType.Static); + Fixture t = bx.CreateFixture(new PolygonShape(vx,1f)); + RoadTile tile = new RoadTile(); + bx.Tag = tile; + tile.Index = i / 4; + int c = 25 * (tile.Index % 3); + tile.Color = new Rgba32(RoadColor.R + c, RoadColor.G + c, RoadColor.B + c); + tile.RoadVisited = false; + tile.Friction = 1f; + t.IsSensor = true; + tile.PhysicsFixture = t; + RoadPolygon poly = new RoadPolygon(); + poly.Verts = vx; + poly.Color = tile.Color; + RoadPoly.Add(poly); + Road.Add(bx); + if (border[tile.Index]) + { + int side = ((beta2-beta1) < 0) ? -1 : 1; + Vector2 b1_l = new Vector2(x1 + side * TRACK_WIDTH * cos_beta1, y1 + side * TRACK_WIDTH * sin_beta1); + Vector2 b1_r = new Vector2(x1 + side * (TRACK_WIDTH+BORDER) * cos_beta1, y1 + side * (TRACK_WIDTH+BORDER) * sin_beta1); + Vector2 b2_l = new Vector2(x2 + side * TRACK_WIDTH * cos_beta2, y2 + side * TRACK_WIDTH * sin_beta2); + Vector2 b2_r = new Vector2(x2 + side * (TRACK_WIDTH + BORDER) * cos_beta2, y2 + side * (TRACK_WIDTH + BORDER) * sin_beta2); + vx = new Vertices(); + vx.Add(b1_l); + vx.Add(b1_r); + vx.Add(b2_r); + vx.Add(b2_l); + poly = new RoadPolygon(); + poly.Verts = vx; + poly.Color = (tile.Index % 2 == 0) ? new Rgba32(255,255,255) : new Rgba32(255, 0, 0); + RoadPoly.Add(poly); + } + } + Track = track; + return (true); + } + + public override void CloseEnvironment() + { + if (_viewer != null) + { + _viewer.CloseEnvironment(); + _viewer = null; + } + } + + public override void Seed(int seed) + { + RandomState.seed(seed); + } + + /// + /// Useful method to draw a filled polygon that is zoomed, translated, and rotated. + /// + /// Target drawing surface + /// The path to fill + /// The color to fill it with + /// Zoom factor + /// Translation factor + /// Rotation factor + private void FillPoly(Image img, Vector2[] poly, Color c, float zoom, Vector2 trans, float angle) + { + PointF[] path = new PointF[poly.Length]; + for (int i = 0; i < poly.Length; i++) + { + Vector2 v = CarDynamics.RotateVec(poly[i], angle) * zoom + trans; + path[i] = new PointF(v.X, v.Y); + } + img.Mutate(i => i.FillPolygon(c, path)); + } + + #region Indicator Rendering + private void RenderIndicatorIfMin(Image img, float v, PointF[] path, Color c) + { + if (Math.Abs(v) < 1e-4) + { + return; + } + img.Mutate(i => i.FillPolygon(c, path)); + + } + private PointF[] _indicator(float place, float s, float h, float W, float H, float v, bool vertical=true) + { + PointF[] path = null; + if (vertical) + { + path = new PointF[] { + new PointF(place*s ,H-(h+h*v)), + new PointF((place+1f)*s,H-(h+h*v)), + new PointF((place+1f)*s,H-h), + new PointF(place*s ,H-h), + }; + } + else + { + path = new PointF[] { + new PointF(place*s ,H-4*h), + new PointF((place+v)*s,H-4*h), + new PointF((place+v)*s,H-2*h), + new PointF(place*s ,H-2*h), + }; + } + return (path); + } + private void RenderIndicators(Image img, int W, int H) + { + float s = W / 40f; + float h = H / 40f; + Rgba32 color = new Rgba32(0, 0, 0); + PointF[] poly = new PointF[] { + new PointF(W,H), + new PointF(W,H-5f*h), + new PointF(0f,H-5f*h), + new PointF(0f,H) + }; + // Blank out the indicator area + img.Mutate(i => i.FillPolygon(color, poly)); + // Draw the telemetry indicators + float v = Car.Hull.LinearVelocity.Length(); + RenderIndicatorIfMin(img, v, _indicator(5f, s, h, W, H, 0.02f * v), new Rgba32(255, 255, 255)); + // ABS indicators + for (int i = 0; i < 4; i++) + { + v = Car.Wheels[i].Omega; + RenderIndicatorIfMin(img, v, _indicator(7f+i, s, h, W, H, 0.01f * v), i < 2 ? new Rgba32(0, 0, 255) : new Rgba32(51, 0, 255)); + } + v = Car.Wheels[0].Joint.JointAngle; + RenderIndicatorIfMin(img, v, _indicator(20f, s, h, W, H, -10f * v, false), new Rgba32(0, 255, 0)); + v = Car.Hull.AngularVelocity; + RenderIndicatorIfMin(img, v, _indicator(30f, s, h, W, H, -0.8f * v, false), new Rgba32(255, 0, 0)); + } + #endregion + private void RenderRoad(Image img, float zoom, Vector2 trans, float angle) + { + float bounds = PLAYFIELD; + Vector2[] field = new Vector2[] { + new Vector2(bounds,bounds), + new Vector2(bounds,-bounds), + new Vector2(-bounds,-bounds), + new Vector2(-bounds, bounds) + }; + FillPoly(img, field, new Rgba32(102, 204, 102), zoom, trans, angle); + float k = GRASS_DIM; + for (int x = -20; x < 20; x += 2) + { + for (int y = -20; y < 20; y += 2) + { + Vector2[] poly = new Vector2[] { + new Vector2(k*x+k, k*y), + new Vector2(k*x , k*y), + new Vector2(k*x , k*y+k), + new Vector2(k*x+k, k*y+k) + }; + FillPoly(img, poly, new Rgba32(102, 230, 102), zoom, trans, angle); + } + } + Vector2 add = new Vector2(PLAYFIELD, PLAYFIELD); + for (int i = 0; i < RoadPoly.Count; i++) + { + FillPoly(img, RoadPoly[i].Verts.ToArray(), RoadPoly[i].Color, zoom, trans, angle); + } + } + + /// + /// Renders the current state of the race + /// + /// Rendering mode, either "human" or "state". State rendering will return the RGB array as the state in the observation. + /// + public override Image Render(string mode = "human") + { + if (_viewer == null && mode == "human") + { + lock (this) + { + //to prevent double initalization. + if (_viewer == null) + { + if (_viewerFactory == null) + _viewerFactory = NullEnvViewer.Factory; + _viewer = _viewerFactory(WINDOW_W, WINDOW_H, "carracing").GetAwaiter().GetResult(); + } + } + } + Image img = null; + if (_LastRender == null) + { + // Define the buffer image for drawing + img = new Image(WINDOW_W, WINDOW_H); + img.Mutate(i => i.BackgroundColor(new Rgba32(0, 0, 0))); // Space is black + // Computing transforms + float angle = -Car.Hull.Rotation; + // Animating first second zoom. + float zoom = 0.1f * SCALE * (float)Math.Max(1f - T, 0f) + ZOOM * SCALE * (float)Math.Min(T, 1f); + float scroll_x = -(Car.Hull.Position.X) * zoom; + float scroll_y = -(Car.Hull.Position.Y) * zoom; + Vector2 trans = CarDynamics.RotateVec(new Vector2(scroll_x, scroll_y), angle) + new Vector2(WINDOW_W/2,WINDOW_H/4); + RenderRoad(img, zoom, trans, angle); + Car.Draw(img, zoom, trans, angle, mode != "state"); + // flip the surface? + // Show stats + RenderIndicators(img, WINDOW_W, WINDOW_H); + // TODO: Display the reward as 42 pt text at (60, WINDOW_H - WINDOW_H * 2.5 / 40.0) + + _LastRender = img; + } + if (mode == "state" && img != null) + { + // Convert the image to an NDArray + _LastImageArray = new float[WINDOW_W, WINDOW_H, 3]; + for (int j = 0; j < WINDOW_H; j++) + { + for (int i = 0; i < WINDOW_W; i++) + { + _LastImageArray[i, j, 0] = (float)img[i, j].R / 255f; + _LastImageArray[i, j, 1] = (float)img[i, j].G / 255f; + _LastImageArray[i, j, 2] = (float)img[i, j].B / 255f; + } + } + } + // Clear out the last render so a new one is created + if (mode == "human") + { + _viewer.Render(_LastRender); + _LastRender = null; + } + return (img); + } + + public override NDArray Reset() + { + _World = new World(new Vector2(0f, 0f)); + _World.ContactManager.BeginContact = new BeginContactDelegate(ContactListener.BeginContact); + _World.ContactManager.EndContact = new EndContactDelegate(ContactListener.EndContact); + Reward = 0f; + PreviousReward = 0f; + TileVisitedCount = 0; + T = 0f; + NewLap = false; + RoadPoly.Clear(); + InitColors(); + bool done = false; + int iters = 0; + while (!done && iters < 100) + { + done = CreateTrack(); + iters++; + if (Verbose) + { + Debug.WriteLine("Retry to generate track (normal if there are not many instances of this message)."); + } + } + Car = new CarDynamics(_World, Track[1], Track[2], Track[3]); + return (Step(null).Observation); + } + + public override Step Step(object action) + { + bool null_action = true; + NDArray a = null; + if (action != null) + { + null_action = false; + a = (NDArray)action; + if (ContinuousMode) + { + Car.Steer(-1f * a[0]); + Car.Gas(a[1]); + Car.Brake(a[2]); + } + else + { + int i_action = (int)action; + if (!ActionSpace.Contains(i_action)) + { + throw (new InvalidActionError(string.Format("Action is invalid."))); + } + Car.Steer(-0.6f * (i_action == 1 ? 1f : 0f) + 0.6f * (i_action == 2 ? 1f : 0f)); + Car.Gas(0.2f * (i_action == 3 ? 1f : 0f)); + Car.Brake(0.8f * (i_action == 4 ? 1f : 0f)); + } + } + Car.Step(1f / FPS); + SolverIterations si = new SolverIterations(); + si.PositionIterations = 2 * 30; + si.VelocityIterations = 6 * 30; + _World.Step(1f / FPS, ref si); + T += 1f / FPS; + + // TODO: gather the 'state' pixels + + float step_reward = 0f; + bool done = false; + + if (!null_action) // First step without action, called from reset() + { + Reward -= 0.1f; + //# We actually don't want to count fuel spent, we want car to be faster. + //# self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER + Car.FuelSpent = 0f; + step_reward -= PreviousReward; + PreviousReward = Reward; + if (TileVisitedCount == Track.Length || NewLap) + { + done = true; + } + Vector2 pos = Car.Hull.Position; + if (Math.Abs(pos.X) > PLAYFIELD || Math.Abs(pos.Y) > PLAYFIELD) + { + done = true; + step_reward = -100f; + } + } + Step step = new Step(); + step.Done = done; + step.Reward = step_reward; + if (StateOutputFormat == StateFormat.Pixels) { + Image img = Render("state"); + step.Observation = _LastImageArray; + } + else { + step.Observation = new float[] { Car.Hull.Position.X, Car.Hull.Position.Y, Car.Speed, Car.SteerAngle }; + } + return step; + } + } +} diff --git a/src/Gym.Environments/Envs/Aether/LunarLanderEnv.cs b/src/Gym.Environments/Envs/Aether/LunarLanderEnv.cs index e0db0ce..fdc479d 100644 --- a/src/Gym.Environments/Envs/Aether/LunarLanderEnv.cs +++ b/src/Gym.Environments/Envs/Aether/LunarLanderEnv.cs @@ -146,6 +146,7 @@ public enum LunarLanderDiscreteActions : int // ## Credits // Created by Oleg Klimov + // Converted to C# by Jacob Anderson /// public class LunarLanderEnv : Env { @@ -421,6 +422,7 @@ public LunarLanderEnv(IEnvironmentViewerFactoryDelegate viewerFactory, bool cont ActionSpace = new Discrete(4, random_state: random_state); } + Metadata = new Dict("render.modes", new[] { "human", "rgb_array" }, "video.frames_per_second", 50); } #region Particles @@ -775,7 +777,7 @@ public override Step Step(object action) public override Image Render(string mode = "human") { - if (_viewer == null) { + if (_viewer == null && mode == "human") { lock (this) { //to prevent double initalization. if (_viewer == null) { diff --git a/src/Gym.Environments/Envs/Classic/LEM.cs b/src/Gym.Environments/Envs/Classic/LEM.cs new file mode 100644 index 0000000..04732fc --- /dev/null +++ b/src/Gym.Environments/Envs/Classic/LEM.cs @@ -0,0 +1,249 @@ +using System; +using System.Collections.Generic; + +namespace Gym.Environments.Envs.Classic +{ + /// + /// LEM physics for a simple landing vehicle with a straight drop to the landing pad. + /// + public class LEM + { + /// + /// The miles above the surface + /// + public float Altitude { get; private set; } + /// + /// The residual feet in the altitude + /// + public float AltitudeFeet + { + get + { + return (float)Math.Truncate(5280f * (Altitude - Math.Truncate(Altitude))); + } + } + public float Mass { get; private set; } + public float FuelMass { get; private set; } + public float Speed { get; private set; } + public float SpeedMPH { get { return (float)Math.Truncate(Speed * 3600.0f); } } + public float Gravity { get; private set; } + + private const float Z = 1.8f; + /// + /// + /// + /// Initial Altitude + /// Initial Total Mass of LEM (includes fuel) + /// Initial Mass of Fuel + /// Initial Speed + /// The gravity + public LEM(float a =120f, float m =33000f, float n =16500f, float v =1f, float g =0.001f) + { + Altitude = a; + Mass = m; + FuelMass = n; + Speed = v; + Gravity = g; + } + + public float NetFuel + { + get + { + return (Mass - FuelMass); + } + } + /// + /// Returns true when the total mass of the LEM is less than the initial fuel mass + /// + public bool OutOfFuel + { + get + { + return (NetFuel < 0.001); + } + } + + /// + /// Computes the drift time for the lander to reach zero altitude and returns the time + /// + /// + public float Drift() + { + float step = (-Speed + (float)Math.Sqrt(Speed * Speed + 2 * Altitude * Gravity)) / Gravity; + Speed = Speed + Gravity * step; + return (step); + } + /// + /// Computes the next speed and altitude of the LEM given the step and burn rate + /// + /// + /// + /// (speed,altitude) + private Tuple Update(float step, float burn) + { + float Q = step * burn / Mass; + float Q2 = Q * Q; + float Q3 = Q2 * Q; + float Q4 = Q3 * Q; + float Q5 = Q4 * Q; + float J = Speed + Gravity * step + Z * (-Q - Q2 / 2 - Q3 / 3 - Q4 / 4 - Q5 / 5); + float I = Altitude - Gravity * step * step / 2 - Speed * step + Z * step * (Q / 2 + Q2 / 6 + Q3 / 12 + Q4 / 20 + Q5 / 30); + return new Tuple(J, I); + } + + /// + /// Updates the mass of the LEM for the given burn for the given step time. The altitude and + /// speed are updated from the result parameter. + /// + /// Time of the burn + /// Amount of fuel burn + /// Output from a call to Update(step,burn) + private void Apply(float step, float burn, Tuple result) + { + Mass -= step * burn; + Altitude = result.Item2; + Speed = result.Item1; + } + + /// + /// Main method to apply the descent burn to the LEM. Computes the new flight parameters. + /// + /// The number of pounds of fuel to burn per second + /// The number of seconds to perform the burn + /// The elapsed time for the burn procedure + public float ApplyBurn(float burn, float time =10.0f) + { + float elapsed = 0; + // Time decay loop + while (time > 0.001) + { + float step = time; + if (OutOfFuel) + { + break; + } + if (Mass < FuelMass + step * burn) + { + step = (Mass - FuelMass) / burn; // line 190 + } + Tuple r = Update(step, burn); // Line 200 -> 420 + if (r.Item2 <= 0.0) // Line 200 + { + while (step > 5e-3) // line 340 + { + float D = Speed + (float)Math.Sqrt(Speed * Speed + 2 * Altitude * (Gravity - Z * burn / Mass)); + step = 2 * Altitude / D; + r = Update(step, burn); // -> line 420 + // Line 330 in basic + elapsed += step; + time -= step; + Apply(step, burn, r); // -> Line 330 + } + // Done + return elapsed; + } + if (Speed <= 0.0) // Line 210 + { + elapsed += step; + time -= step; + Apply(step, burn, r); + continue; // Line 230 + } + if (r.Item1 < 0.0) + { + do + { + // Line 370 + float U = (1 - Mass * Gravity / (Z * burn)) / 2; + step = Mass * Speed / (Z * burn * (U + (float)Math.Sqrt(U * U + Speed / Z))) + 0.5f; + r = Update(step, burn); + if (r.Item2 <= 0.0) + { + while (step > 5e-3) + { + float D = Speed + (float)Math.Sqrt(Speed * Speed + 2 * Altitude * (Gravity - Z * burn / Mass)); + step = 2 * Altitude / D; + r = Update(step, burn); + // Line 330 in basic + elapsed += step; + time -= step; + Apply(step, burn, r); + } + return elapsed; + } + elapsed += step; + time -= step; + Apply(step, burn, r); + if (r.Item1 > 0.0) + { + // Line 390 -> 160 + continue; + } + } + while (Speed > 0.0); + } + else + { + elapsed += step; + time -= step; + Apply(step, burn, r); + } + } + return elapsed; + } + } +} +/* + * ORIGINAL BASIC CODE TRANSCRIBED FROM COMPUTE MAGAZINE + * LUNAR LEM ROCKET + * +10 PRINT TAB(33);"LUNAR" +20 PRINT TAB(15);"CREATIVE COMPUTING MORRISTOWN, NEW JERSEY" +25 PRINT:PRINT:PRINT +30 PRINT "THIS IS A COMPUTER SIMULATION OF AN APOLLO LUNAR" +40 PRINT "LANDING CAPSULE.": PRINT: PRINT +50 PRINT "THE ON-BOARD COMPUTER HAS FAILED (IT WAS MADE BY" +60 PRINT "(XEROX) SO YOU HAVE TO LAND THE CASULE MANUALLY." +70 PRINT: PRINT "SET BURN RATE OF RETRO ROCKETS TO ANY VALUE BETWEEN" +80 PRINT "0 (FREE FALL) AND 200 (MAX BURN) POUNDS PER SECOND." +90 PRINT "SET NEW BURN RATE EVERY 10 SECONDS." :PRINT +100 PRINT "CAPSULE WEIGHT 32,500 LBS; FUEL WEIGHT 16,500 LBS." +110 PRINT: PRINT: PRINT: PRINT "GOOD LUCK" +120 L=0 +130 PRINT: PRINT "SEC","MI + FT","MPH","LB FUEL","BURN RATE" :PRINT +140 A=120: V=1: M=33000: N=16500: G=1E-03: Z=1.8 +150 PRINT L, INT(A); INT(5280*(A-INT(A))),3600*V,M-N,:INPUT K:T=10 +160 IF M-N <1E-03 THEN 240 +170 IF T<1E-03 THEN 150 +180 S=T: IF M>= N+S*K THEN 200 +190 S=(M-N)/K +200 GOSUB 420: IF I<= 0 THEN 340 +210 IF V<=0 THEN 230 +220 IF J<0 THEN 370 +230 GOSUB 330: GOTO 160 +240 PRINT "FUEL OUT AT";L;"SECONDS": S=(-V+SQR(V*V+2*A*G))/G +250 V=V+G*S: L=L+S +260 U=3600*V:PRINT "ON MOON AT";L;"SECONDS - IMPACT VELOCITY";U;"MPH" +270 IF U <= 1.2 THEN PRINT "PERFECT LANDING!": GOTO 440 +280 IF U <= 10 THEN PRINT "GOOD LANDING (COULD BE BETTER)" : GOTO 440 +282 IF U > 60 THEN 300 +284 PRINT "CRAFT DAMAGE ... YOU'RE STRANDED HERE UNTIL A RESCUE" +286 PRINT "PARTY ARRIVES. HOPE YOU HAVE ENOUGH OXYGEN!" +288 GOTO 440 +300 PRINT "SORRY THERE WERE NO SURVIVORS. YOU BLEW IT!" +310 PRINT "IN FACT, YOU BLASTED A NEW LUNAR CRATER";U*.277;"FEET DEEP!" +320 GOTO 440 +330 L=L+S: T=T-S: M=M-S*K: A=I: V=J: RETURN +340 IF S < 5E-03 THEN 260 +350 D=V+SQR(V*V+2*A*(G-Z*K/M)): S=2*A/D +360 GOSUB 420: GOSUB 330: GOTO 340 +370 U=(1-M*G/(Z*K))/2: S=M*V/(Z*K*(U+SQR(U*U+V/Z)))+0.05: GOSUB 420 +380 IF I<=0 THEN 340 +390 GOSUB 330: IF J>0 THEN 160 +400 IF V>0 THEN 370 +410 GOTO 160 +420 Q=S*K/M: J=V+G*S+Z*(-Q-Q*Q/2-Q^3/3-Q^4/4-Q^5/5) +430 I=A-G*S*S/2-V*S+Z*S*(Q/2+Q^2/6+Q^3/12+Q^4/20+Q^5/30):RETURN +440 PRINT:PRINT:PRINT:PRINT "TRY AGAIN??": GOTO 70 +*/ \ No newline at end of file diff --git a/src/Gym.Environments/Envs/Classic/LEMLanderEnv.cs b/src/Gym.Environments/Envs/Classic/LEMLanderEnv.cs new file mode 100644 index 0000000..d0a4417 --- /dev/null +++ b/src/Gym.Environments/Envs/Classic/LEMLanderEnv.cs @@ -0,0 +1,303 @@ +using Gym.Collections; +using Gym.Envs; +using Gym.Observations; +using Gym.Spaces; +using NumSharp; +using SixLabors.ImageSharp; +using System; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Drawing; +using SixLabors.ImageSharp.Drawing.Processing; +using System.Linq; +using Gym.Threading; +using SixLabors.ImageSharp.ColorSpaces; +using Gym.Exceptions; +using System.Diagnostics; +using System.Collections.Generic; + +namespace Gym.Environments.Envs.Classic +{ + /// + /// The status of the lander during the simulation. Starts in Landing status and + /// is updated periodically during the simulation. + /// + public enum LanderStatus + { + Landing, // In the process of landing + Crashed, // Smash + Landed, // Successful + FreeFall // Out of fuel + } + public class LEMLanderEnv : Env + { + // Screen + public const int VIEWPORT_W = 600; + public const int VIEWPORT_H = 400; + + private IEnvironmentViewerFactoryDelegate _viewerFactory; + private IEnvViewer _viewer; + + private const float START_ALTITUDE = 120; + private const float START_VELOCITY = 1; + private const float START_MASS = 32500; + + private float _ElapsedTime; + private float _StepTimeDuration = 10.0f; + + public LanderStatus Status { get; private set; } + /// + /// The LEM physics + /// + private LEM _LEM = null; + private NumPyRandom RandomState { get; set; } + /// + /// Set to true to get the original output transcript + /// + public bool Verbose { get; set; } = false; + + public LEMLanderEnv(IEnvViewer viewer) : this((IEnvironmentViewerFactoryDelegate)null) + { + _viewer = viewer ?? throw new ArgumentNullException(nameof(viewer)); + } + + public LEMLanderEnv(IEnvironmentViewerFactoryDelegate viewerFactory, float g = 0.001f, float netmass = 16500f, Random ran = null, bool quiet = false, NumPyRandom random_state = null) + { + RandomState = random_state; + if (RandomState == null) + { + RandomState = np.random; + } + _viewerFactory = viewerFactory; + _LEM = new LEM(a: START_ALTITUDE, m: START_MASS, v: START_VELOCITY, g: g, n: netmass); + // Altitude, Speed, Fuel + NDArray low = np.array(new float[] { 0f, -1000, 0 }); + NDArray high = np.array(new float[] { START_ALTITUDE, 1000f, netmass }); + ObservationSpace = new Box(low, high); + // Burn, Time + low = np.array(new float[] { 0f, 0f }); + high = np.array(new float[] { 200f, 10f }); + ActionSpace = new Box(low, high, random_state: random_state); + Metadata = new Dict("render.modes", new[] { "human", "rgb_array" }, "video.frames_per_second", 50); + } + /// + /// Resets the LEM to the original game parameters and runs a step with 0 burn and 0 time. + /// + /// + public override NDArray Reset() + { + _LEM = new LEM(a: START_ALTITUDE, m: START_MASS, v: START_VELOCITY, g: _LEM.Gravity, n: _LEM.FuelMass); + _ElapsedTime = 0; + Status = LanderStatus.Landing; + return Step(np.array(new float[] { 0f, 0f })).Observation; + } + + /// + /// Continuous action step that takes an array of 2 floats for the action: (thrust, duration of thrust). + /// Original game was a 10 second fixed thrust period and variable burn amount. In this environment you will + /// specify the burn rate and the amount of time for that burn. + /// + /// The action as a 2 element array of float (burn,time) + /// Step has (Altitude, Speed, Fuel) as the state. Information dictionary uses the same keys. + public override Step Step(object action) + { + NDArray c_action = (NDArray)action; + float burn = c_action[0]; + float time = c_action[1]; + if (burn > 200f) + { + burn = 200f; + } + if (burn < 0f) + { + burn = 0f; + } + if (time < 0f) + { + time = 0f; + } + float score = 0f; + _ElapsedTime += _LEM.ApplyBurn(burn, time: time); + if (Verbose) + System.Diagnostics.Debug.WriteLine("K=: {0:F2}", burn); + if (_LEM.OutOfFuel) + { + _ElapsedTime += _LEM.Drift(); + Status = LanderStatus.FreeFall; + if (Verbose) + { + System.Diagnostics.Debug.WriteLine("FUEL OUT AT {0} SECONDS", _ElapsedTime); + } + } + if (_LEM.Altitude < 1e-5) + { + float U = _LEM.SpeedMPH; + // Check for landing + if (U <= 1.2) + { + if (Verbose) + System.Diagnostics.Debug.WriteLine("PERFECT LANDING!"); + Status = LanderStatus.Landed; + score = 10.0f - U; + } + else if (U <= 10.0) + { + if (Verbose) + System.Diagnostics.Debug.WriteLine("GOOD LANDING (COULD BE BETTER) AT {0:F2} MPH",U); + score = 2.0f; + Status = LanderStatus.Landed; + } + else if (U > 60.0) + { + if (Verbose) + { + System.Diagnostics.Debug.WriteLine("SORRY THERE WERE NO SURVIVORS. YOU BLEW IT!"); + System.Diagnostics.Debug.WriteLine("IN FACT, YOU BLASTED A NEW LUNAR CRATER {0} FEET DEEP!", 0.277 * U); + } + Status = LanderStatus.Crashed; + score = -U; + } + else + { + if (Verbose) + { + System.Diagnostics.Debug.WriteLine("LANDED AT {0:F2} MPH",U); + System.Diagnostics.Debug.WriteLine("CRAFT DAMAGE ... YOU'RE STRANDED HERE UNTIL A RESCUE"); + System.Diagnostics.Debug.WriteLine("PARTY ARRIVES. HOPE YOU HAVE ENOUGH OXYGEN!"); + } + score = 0.5f; + Status = LanderStatus.Landed; + } + } + else if (_LEM.OutOfFuel) + { + score = -_LEM.SpeedMPH; + } + Step step = new Step(); + step.Information = new Dict(); + step.Information["Distance"] = _LEM.Altitude; + step.Information["Speed"] = _LEM.Speed; + step.Information["Fuel"] = _LEM.NetFuel; + step.Observation = new float[] { _LEM.Altitude, _LEM.Speed, _LEM.NetFuel }; + step.Done = (Status != LanderStatus.Landing); + step.Reward = score; + return (step); + } + + public override Image Render(string mode = "human") + { + float ms = 1f / 50f * 1000f; // ms left for this frame + long lstart = DateTime.Now.Ticks; + if (Verbose) + { + if (_ElapsedTime < 1e-5) + { + System.Diagnostics.Debug.WriteLine("CONTROL CALLING LUNAR MODULE. MANUAL CONTROL IS NECESSARY"); + System.Diagnostics.Debug.WriteLine("YOU MAY RESET FUEL RATE K EACH 10 SECS TO 0 OR ANY VALUE"); + System.Diagnostics.Debug.WriteLine("BETWEEN 8 & 200 LBS/SEC. YOU'VE 16000 LBS FUEL. ESTIMATED"); + System.Diagnostics.Debug.WriteLine("FREE FALL IMPACT TIME-120 SECS. CAPSULE WEIGHT-32500 LBS"); + System.Diagnostics.Debug.WriteLine("FIRST RADAR CHECK COMING UP"); + System.Diagnostics.Debug.WriteLine("COMMENCE LANDING PROCEDURE"); + System.Diagnostics.Debug.WriteLine("TIME,SECS ALTITUDE,MILES+FEET VELOCITY,MPH FUEL,LBS FUEL RATE"); + } + else + { + System.Diagnostics.Debug.WriteLine("{0,8:F0}{1,15}{2,7}{3,15:F2}{4,12:F1}", _ElapsedTime, Math.Truncate(_LEM.Altitude), Math.Truncate(_LEM.AltitudeFeet), _LEM.SpeedMPH, _LEM.NetFuel); + } + } + if (_viewer == null && mode == "human") + { + lock (this) + { + //to prevent double initalization. + if (_viewer == null) + { + if (_viewerFactory == null) + _viewerFactory = NullEnvViewer.Factory; + _viewer = _viewerFactory(VIEWPORT_W, VIEWPORT_H, "lemlander-v1").GetAwaiter().GetResult(); + } + } + } // Define the buffer image for drawing + var img = new Image(VIEWPORT_W, VIEWPORT_H); + var draw = new List<(IPath, Rgba32)>(); + var fill = new List<(IPath, Rgba32)>(); + // Setup the gauge for fuel and speed + int gauge_w = 100; + int gauge_h = VIEWPORT_H / 3; + + var fuel_gauge = new RectangularPolygon(VIEWPORT_W/4 - gauge_w/2, VIEWPORT_H/2 - gauge_h, gauge_w, gauge_h); + draw.Add((fuel_gauge, new Rgba32(204, 153, 102))); + float net_fuel = _LEM.NetFuel / _LEM.FuelMass*(gauge_h-4); // [0,1] + fuel_gauge = new RectangularPolygon(fuel_gauge.Left+3, fuel_gauge.Bottom-net_fuel-2, gauge_w-4, net_fuel); + if (net_fuel < 0.1) + { + fill.Add((fuel_gauge, new Rgba32(255, 0, 0))); + } + else + { + fill.Add((fuel_gauge, new Rgba32(0, 255, 0))); + } + + var speed_gauge = new RectangularPolygon(3*VIEWPORT_W / 4 - gauge_w / 2, VIEWPORT_H / 2 - gauge_h, gauge_w, gauge_h); + draw.Add((speed_gauge, new Rgba32(204, 153, 102))); + float speed = Math.Abs(_LEM.SpeedMPH / 3600 * (gauge_h - 4)); // [0,1] + speed_gauge = new RectangularPolygon(speed_gauge.Left + 3, speed_gauge.Bottom - speed - 2, gauge_w - 4, speed); + if (_LEM.Speed < 0f) + { + fill.Add((speed_gauge, new Rgba32(255, 0, 0))); + } + else + { + fill.Add((speed_gauge, new Rgba32(0, 255, 0))); + } + + var alt_gauge = new RectangularPolygon(VIEWPORT_W / 2 - gauge_w / 2, VIEWPORT_H / 2 - gauge_h, gauge_w, gauge_h); + draw.Add((alt_gauge, new Rgba32(204, 153, 102))); + float alt = Math.Abs(_LEM.Altitude / 118 * (gauge_h - 4)); // [0,1] + alt_gauge = new RectangularPolygon(alt_gauge.Left + 3, alt_gauge.Bottom - alt - 2, gauge_w - 4, alt); + if (_LEM.Altitude < 15) + { + fill.Add((alt_gauge, new Rgba32(255, 0, 0))); + } + else + { + fill.Add((alt_gauge, new Rgba32(0, 255, 0))); + } + + // Do the drawing + img.Mutate(i => i.BackgroundColor(new Rgba32(0, 0, 0))); // Space is black + foreach (var (path, rgba32) in draw) + { + img.Mutate(i => i.Draw(rgba32,1f, path)); + } + foreach (var (path, rgba32) in fill) + { + img.Mutate(i => i.Fill(rgba32, path)); + } + //img.Mutate(x => x.DrawText("FUEL", ..., Color.Yellow, new PointF(100, 100))); + //img.Mutate(x => x.DrawText("ALTITUDE", ..., Color.Yellow, new PointF(100, 100))); + //img.Mutate(x => x.DrawText("SPEED", ..., Color.Yellow, new PointF(100, 100))); + + _viewer.Render(img); + ms -= (float)(new TimeSpan(DateTime.Now.Ticks - lstart).TotalMilliseconds); + if (ms > 0) + { + System.Threading.Thread.Sleep((int)ms); + } + return (img); + } + public override void CloseEnvironment() + { + if (_viewer != null) + { + _viewer.CloseEnvironment(); + _viewer = null; + } + } + + public override void Seed(int seed) + { + RandomState.seed(seed); + } + } +} \ No newline at end of file diff --git a/tests/Gym.Tests/Envs/Aether/CarRacingEnvironment.cs b/tests/Gym.Tests/Envs/Aether/CarRacingEnvironment.cs new file mode 100644 index 0000000..0678b7f --- /dev/null +++ b/tests/Gym.Tests/Envs/Aether/CarRacingEnvironment.cs @@ -0,0 +1,163 @@ +using Gym.Environments; +using Gym.Environments.Envs.Classic; +using Gym.Rendering.Avalonia; +using Gym.Tests.Helpers; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Gym.Environments.Envs.Aether; +using Gym.Rendering.WinForm; +using NumSharp; +using static System.Net.Mime.MediaTypeNames; +using System.Security.Policy; +using System.Windows.Documents; +using System.Xml.Linq; +using tainicom.Aether.Physics2D.Common; + +namespace Gym.Tests.Envs.Aether +{ + [TestClass] + public class CarRacingEnvironment + { + private Dictionary _ExpectedScoreForRandomSeed = new Dictionary(); + private Dictionary _ExpectedStepsForRandomSeed = new Dictionary(); + private const int MAX_STEPS = 5000; + private const bool VERBOSE = true; + public void Run(IEnvironmentViewerFactoryDelegate factory, int? seed, bool continuous = false) + { + seed = seed ?? 0; + NumPyRandom rando = np.random.RandomState(seed.Value); + CarRacingEnv env = new CarRacingEnv(factory, random_state:rando , continuous: continuous, verbose : VERBOSE); + try + { + // Run a PID test + float total_reward = 0f; + int steps = 0; + NDArray state = env.Reset(); + while (true) + { + object a = null; + if (env.StateOutputFormat == CarRacingEnv.StateFormat.Pixels) + { + a = Driver(env, (float[,,])state, rando); + } + else + { + a = Driver(env, (float[])state, rando); + } + var (observation, reward, done, information) = env.Step(a); + total_reward += reward; + steps++; + if (VERBOSE && (steps % 1000 == 0)) + { + System.Diagnostics.Debug.WriteLine("{0}: a={1}, reward={2}, total={3}", steps, a, reward, total_reward); + } + + if (done || steps > MAX_STEPS) + { + break; + } + + state = observation; + env.Render(); + } + if (rando != null) + { + float escore = _ExpectedScoreForRandomSeed[rando.Seed]; + int esteps = _ExpectedStepsForRandomSeed[rando.Seed]; + Assert.AreEqual(esteps, steps, string.Format("Expected number of steps for seed {0} did not match.", rando.Seed)); + Assert.AreEqual(escore, total_reward, 1e-5f, string.Format("Expected score for seed {0} did not match.", rando.Seed)); + } + Assert.IsTrue(steps < MAX_STEPS, "Too many steps."); + System.Diagnostics.Debug.WriteLine("Total reward: {0} in {1} steps.", total_reward, steps); + } + finally + { + env.CloseEnvironment(); + } + } + private object Driver(CarRacingEnv env, float[,,] s, NumPyRandom random_state) + { + // { steering, gas, brake } + NDArray guidance = random_state.rand(3); + return (guidance); + } + private object Driver(CarRacingEnv env, float[] s, NumPyRandom random_state) + { + // { steering, gas, brake } + NDArray guidance = new float[] { .15f, .5f, 0f }; + return (guidance); + } + + [TestCleanup] + public void Cleanup() + { + StaticAvaloniaApp.Shutdown(); + } + + [TestMethod] + public void Run_Discrete_WinFormEnv() + { + Run(WinFormEnvViewer.Factory, null); + } + + [TestMethod] + public void Run_Discrete_AvaloniaEnv() + { + Run(AvaloniaEnvViewer.Factory, null); + } + + [TestMethod] + public void Run_Discrete_NullEnv() + { + Run(NullEnvViewer.Factory, null); + } + + [TestMethod] + public void Run_Continuous_WinFormEnv() + { + Run(WinFormEnvViewer.Factory, null, true); + } + + [TestMethod] + public void Run_Continuous_AvaloniaEnv() + { + Run(AvaloniaEnvViewer.Factory, null, true); + } + + [TestMethod] + public async Task Run_TwoInstances_Continuous_AvaloniaEnv() + { + var task1 = Task.Run(() => Run(AvaloniaEnvViewer.Factory, null, true)); + var task2 = Task.Run(() => Run(AvaloniaEnvViewer.Factory, null, true)); + await Task.WhenAll(task1, task2); + } + + [TestMethod] + public void Run_Continuous_NullEnv() + { + Run(NullEnvViewer.Factory, null, true); + } + + [TestMethod] + public void Run_Discrete_WinFormEnv_ConsistencyCheck() + { + Run(WinFormEnvViewer.Factory, 1000); + } + + [TestMethod] + public void Run_Discrete_AvaloniaEnv_ConsistencyCheck() + { + Run(AvaloniaEnvViewer.Factory, 1000); + } + + [TestMethod] + public void Run_Discrete_NullEnv_ConsistencyCheck() + { + Run(NullEnvViewer.Factory, 1000); + } + } +} diff --git a/tests/Gym.Tests/Envs/Classic/LEMLanderEnvironment.cs b/tests/Gym.Tests/Envs/Classic/LEMLanderEnvironment.cs new file mode 100644 index 0000000..29a0731 --- /dev/null +++ b/tests/Gym.Tests/Envs/Classic/LEMLanderEnvironment.cs @@ -0,0 +1,216 @@ +using Gym.Environments; +using Gym.Environments.Envs.Classic; +using Gym.Rendering.Avalonia; +using Gym.Tests.Helpers; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Gym.Environments.Envs.Aether; +using Gym.Rendering.WinForm; +using NumSharp; +using static System.Net.Mime.MediaTypeNames; +using System.Security.Policy; +using System.Windows.Documents; +using System.Xml.Linq; +using tainicom.Aether.Physics2D.Common; + +namespace Gym.Tests.Envs.Classic +{ + [TestClass] + public class LEMLanderEnvironment + { + private const bool VERBOSE = true; + // Test1 : Fuel runs out + private float[] _BurnsTest1 = new float[] { 35.6f, 177f, 180.2f, 8.2f, 13.3f, 121, 3f, 133.6f, 68.2f, 124.10f, 147.5f, 91f, 133.9f, 199.1f, 184.2f, 145.8f, 178.9f, 30.3f, 101.7f, 69.4f, 152.6f, 58.1f, 74.2f, 187.1f, 42.4f, 170.10f, 154.8f, 24.9f, 189.7f, 65.8f, 106.2f, 74.6f, 140.9f, 128f, 37f, 190.5f, 161f, 104.6f, 86f, 160.4f, 197.6f, 96.8f, 86f, 180.4f, 180.5f, 121, 8f, 178.2f }; + private float[] _TimeTest1 = new float[] { 3f }; + // Test2 : Crash + private float[] _BurnsTest2 = new float[] { 64f, 195.8f, 121.3f, 120.5f, 82.8f, 68.9f, 41.2f, 63.7f, 47.2f, 25.1f, 16.5f, 12.7f, 31.5f, 73f, 143.1f, 69.7f, 51.8f, 12.5f, 183.7f, 50f, 15f, 15f, 12f, 12.5f, 12.5f, 57.95f, 0.05f }; + private float[] _TimeTest2 = new float[] { 10f }; + // Test4 : Good Landing + private float[] _BurnsTest4 = new float[] { 164.8f,13.6f,96.1f,70.8f,64.5f,33.7f,29f,11.1f,20.3f,196f,86.5f,21.6f,27.6f,116.8f,162.8f,189f,28.9f,112.7f,110.1f,21.1f, 12.1f }; + private float[] _TimeTest4 = new float[] { 10f }; + // Test5 : Perfect Landing + private float[] _BurnsTest5 = new float[] { 164.8f, 13.6f, 96.1f, 70.8f, 64.5f, 33.7f, 29f, 11.1f, 20.3f, 196f, 86.5f, 21.6f, 27.6f, 116.8f, 162.8f, 189f, 28.9f, 112.7f, 110.1f, 21.1f, 13.7f }; + private float[] _TimeTest5 = new float[] { 10f }; + // Test6 : So-So Landing + private float[] _BurnsTest6 = new float[] { 164.8f, 13.6f, 96.1f, 70.8f, 64.5f, 33.7f, 29f, 11.1f, 20.3f, 196f, 86.5f, 21.6f, 27.6f, 116.8f, 162.8f, 189f, 28.9f, 112.7f, 110.1f, 23.1f, 11.5f, 7f, 2f, 0f }; + private float[] _TimeTest6 = new float[] { 10f }; + private int MAX_STEPS = 100; + public LEMLanderEnv Run(IEnvironmentViewerFactoryDelegate factory, NumPyRandom random_state, float[] burns, float[] time) + { + LEMLanderEnv env = new LEMLanderEnv(factory, random_state: random_state); + env.Verbose = VERBOSE; + try + { + // Run a PID test + float total_reward = 0f; + int steps = 0; + NDArray state = env.Reset(); + int ix = 0; + while (true) + { + object a = np.array(new float[] { burns[ix % burns.Length], time[ix % time.Length] }); + ix = (ix + 1) % burns.Length; + var (observation, reward, done, information) = env.Step(a); + total_reward += reward; + steps++; + if (VERBOSE && (steps % 1000 == 0)) + { + System.Diagnostics.Debug.WriteLine("{0}: a={1}, reward={2}, total={3}", steps, a, reward, total_reward); + } + + if (done || steps > MAX_STEPS) + { + break; + } + + state = observation; + env.Render(); + } + System.Diagnostics.Debug.WriteLine("Total reward: {0} in {1} steps.", total_reward, steps); + } + finally + { + env.CloseEnvironment(); + } + return (env); + } + #region Test 1 - A Known Series of Burns + [TestMethod] + public void Run_Test1_WinFormEnv() + { + LEMLanderEnv env = Run(WinFormEnvViewer.Factory, null, _BurnsTest1, _TimeTest1); + Assert.AreEqual(LanderStatus.FreeFall, env.Status); + } + + [TestMethod] + public void Run_Test1_AvaloniaEnv() + { + LEMLanderEnv env = Run(AvaloniaEnvViewer.Factory, null, _BurnsTest1, _TimeTest1); + Assert.AreEqual(LanderStatus.FreeFall, env.Status); + } + + [TestMethod] + public void Run_Test1_NullEnv() + { + LEMLanderEnv env = Run(NullEnvViewer.Factory, null, _BurnsTest1, _TimeTest1); + Assert.AreEqual(LanderStatus.FreeFall, env.Status); + } + #endregion + + #region Test 2 - Crash + [TestMethod] + public void Run_Test2_WinFormEnv() + { + LEMLanderEnv env = Run(WinFormEnvViewer.Factory, null, _BurnsTest2, _TimeTest2); + Assert.AreEqual(LanderStatus.Crashed, env.Status); + } + + [TestMethod] + public void Run_Test2_AvaloniaEnv() + { + LEMLanderEnv env = Run(AvaloniaEnvViewer.Factory, null, _BurnsTest2, _TimeTest2); + Assert.AreEqual(LanderStatus.Crashed, env.Status); + } + + [TestMethod] + public void Run_Test2_NullEnv() + { + LEMLanderEnv env = Run(NullEnvViewer.Factory, null, _BurnsTest2, _TimeTest2); + Assert.AreEqual(LanderStatus.Crashed, env.Status); + } + #endregion + + #region Test 3 - Force The GUI Display + [TestMethod] + public void Run_Test3_WinFormEnv() + { + Run(WinFormEnvViewer.Factory, null, new float[] { 5f }, new float[] { 2f }); + } + /// + /// Test the avalonia viewer with the slow run to force the display of the GUI + /// + [TestMethod] + public void Run_Test3_AvaloniaEnv() + { + Run(AvaloniaEnvViewer.Factory, null, new float[] { 5f }, new float[] { 2f }); + } + + [TestMethod] + public void Run_Test3_NullEnv() + { + Run(NullEnvViewer.Factory, null, new float[] { 5f }, new float[] { 2f }); + } + #endregion + #region Test 4 - Good Landing Solution + [TestMethod] + public void Run_Test4_WinFormEnv() + { + LEMLanderEnv env = Run(WinFormEnvViewer.Factory, null, _BurnsTest4, _TimeTest4); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + + [TestMethod] + public void Run_Test4_AvaloniaEnv() + { + LEMLanderEnv env = Run(AvaloniaEnvViewer.Factory, null, _BurnsTest4, _TimeTest4); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + + [TestMethod] + public void Run_Test4_NullEnv() + { + LEMLanderEnv env = Run(NullEnvViewer.Factory, null, _BurnsTest4, _TimeTest4); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + #endregion + + #region Test 5 - Perfect Landing Solution + [TestMethod] + public void Run_Test5_WinFormEnv() + { + LEMLanderEnv env = Run(WinFormEnvViewer.Factory, null, _BurnsTest5, _TimeTest5); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + + [TestMethod] + public void Run_Test5_AvaloniaEnv() + { + LEMLanderEnv env = Run(AvaloniaEnvViewer.Factory, null, _BurnsTest5, _TimeTest5); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + + [TestMethod] + public void Run_Test5_NullEnv() + { + LEMLanderEnv env = Run(NullEnvViewer.Factory, null, _BurnsTest5, _TimeTest5); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + #endregion + #region Test 6 - So-So Landing Solution + [TestMethod] + public void Run_Test6_WinFormEnv() + { + LEMLanderEnv env = Run(WinFormEnvViewer.Factory, null, _BurnsTest6, _TimeTest6); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + + [TestMethod] + public void Run_Test6_AvaloniaEnv() + { + LEMLanderEnv env = Run(AvaloniaEnvViewer.Factory, null, _BurnsTest6, _TimeTest6); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + + [TestMethod] + public void Run_Test6_NullEnv() + { + LEMLanderEnv env = Run(NullEnvViewer.Factory, null, _BurnsTest6, _TimeTest6); + Assert.AreEqual(LanderStatus.Landed, env.Status); + } + #endregion + } +} \ No newline at end of file