artificial intelligence - Q-learning in game not working as expected -


i have attempted implement q-learning in simple game have written. game based around player having "jump" avoid oncoming boxes.

i have designed system 2 actions; jump , do_nothing , states distances next block (divided , floored ensure there not large number of states).

my issue seems implementation of algorithm isn't considering "future reward", , ends jumping @ wrong times.

here implementation of q-learning algorithm;

jumpgameaiclass.prototype.getq = function getq(state) {     if (!this.q.hasownproperty(state)) {         this.q[state] = {};          (var actionindex = 0; actionindex < this.actions.length; actionindex++) {             var action = this.actions[actionindex];              this.q[state][action] = 0;         }     }      return this.q[state]; };  jumpgameaiclass.prototype.getblockdistance = function getblockdistance() {     var closest = -1;      (var blockindex = 0; blockindex < this.blocks.length; blockindex++) {         var block = this.blocks[blockindex];          var distance = block.x - this.playerx;          if (distance >= 0 && (closest === -1 || distance < closest)) {             closest = distance;         }     }      return math.max(0, math.floor(closest * this.resolution)); };  jumpgameaiclass.prototype.getactionwithhighestq = function getactionwithhighestq(distance) {     var jumpreward = this.getq(distance)[this.actions[0]];     var donothingreward = this.getq(distance)[this.actions[1]];      if (jumpreward > donothingreward) {         return this.actions[0];     } else if (donothingreward > jumpreward) {         return this.actions[1];     } else {         if (!this.canjump()) {             return this.actions[1];         }          return this.actions[math.floor(math.random() * this.actions.length)];     } };  jumpgameaiclass.prototype.getactionepsilongreedy = function getactionepsilongreedy() {     // can't jump while in mid-air     if (!this.canjump()) {         return this.actions[1];     }      if (math.random() < this.epsilon) {         return this.actions[math.floor(math.random() * this.actions.length)];     } else {         return this.getactionwithhighestq(this.getblockdistance());     } };  jumpgameaiclass.prototype.think = function think() {     var reward = this.livereward;      if (this.score !== this.lastscore) {         this.lastscore = this.score;         reward = this.scorereward;     } else if (!this.playeralive) {         reward = this.deathreward;     }      this.drawdistance();      var distance = this.getblockdistance(),         maxq = this.getq(distance)[this.getactionwithhighestq(distance)],         previousq = this.getq(this.lastdistance)[this.lastaction];      this.getq(this.lastdistance)[this.lastaction] = previousq + this.alpha * (reward + (this.gamma * maxq) - previousq);      this.lastaction = this.getactionepsilongreedy();     this.lastdistance = distance;      switch (this.lastaction) {         case this.actions[0]:             this.jump();             break;     } }; 

and here of properties used it:

epsilon: 0.05, alpha: 1, gamma: 1, resolution: 0.1, actions: [ 'jump', 'do_nothing' ], q: {}, livereward: 0, scorereward: 100, deathreward: -1000, lastaction: 'do_nothing', lastdistance: 0, lastscore: 0 

i having use lastaction/lastdistance calculate q, cannot use current data (would acting on action performed in frame before).

the think method called once every frame after rendering , game stuff done (physics, controls, death, etc).

var jumpgameaiclass = function jumpgame(canvas) {      game.jumpgame.call(this, canvas);        object.defineproperties(this, {          epsilon: {              value: 0.05          },            alpha: {              value: 1          },            gamma: {              value: 1          },            resolution: {              value: 0.1          },            actions: {              value: [ 'jump', 'do_nothing' ]          },            q: {              value: { },              writable: true          },            livereward: {              value: 0          },            scorereward: {              value: 100          },            deathreward: {              value: -1000          },            lastaction: {              value: 'do_nothing',              writable: true          },            lastdistance: {              value: 0,              writable: true          },            lastscore: {              value: 0,              writable: true          }      });  };    jumpgameaiclass.prototype = object.create(game.jumpgame.prototype);    jumpgameaiclass.prototype.getq = function getq(state) {      if (!this.q.hasownproperty(state)) {          this.q[state] = {};            (var actionindex = 0; actionindex < this.actions.length; actionindex++) {              var action = this.actions[actionindex];                this.q[state][action] = 0;          }      }        return this.q[state];  };    jumpgameaiclass.prototype.getblockdistance = function getblockdistance() {      var closest = -1;        (var blockindex = 0; blockindex < this.blocks.length; blockindex++) {          var block = this.blocks[blockindex];            var distance = block.x - this.playerx;            if (distance >= 0 && (closest === -1 || distance < closest)) {              closest = distance;          }      }        return math.max(0, math.floor(closest * this.resolution));  };    jumpgameaiclass.prototype.getactionwithhighestq = function getactionwithhighestq(distance) {      var jumpreward = this.getq(distance)[this.actions[0]];      var donothingreward = this.getq(distance)[this.actions[1]];        if (jumpreward > donothingreward) {          return this.actions[0];      } else if (donothingreward > jumpreward) {          return this.actions[1];      } else {          if (!this.canjump()) {              return this.actions[1];          }            return this.actions[math.floor(math.random() * this.actions.length)];      }  };    jumpgameaiclass.prototype.getactionepsilongreedy = function getactionepsilongreedy() {      if (!this.canjump()) {          return this.actions[1];      }        if (math.random() < this.epsilon) {          return this.actions[math.floor(math.random() * this.actions.length)];      } else {          return this.getactionwithhighestq(this.getblockdistance());      }  };    jumpgameaiclass.prototype.ondeath = function ondeath() {      this.restart();  };    jumpgameaiclass.prototype.think = function think() {      var reward = this.livereward;        if (this.score !== this.lastscore) {          this.lastscore = this.score;          reward = this.scorereward;      } else if (!this.playeralive) {          reward = this.deathreward;      }        this.drawdistance();        var distance = this.getblockdistance(),          maxq = this.getq(distance)[this.getactionwithhighestq(distance)],          previousq = this.getq(this.lastdistance)[this.lastaction];        this.getq(this.lastdistance)[this.lastaction] = previousq + this.alpha * (reward + (this.gamma * maxq) - previousq);        this.lastaction = this.getactionepsilongreedy();      this.lastdistance = distance;        switch (this.lastaction) {          case this.actions[0]:              this.jump();              break;      }  };    jumpgameaiclass.prototype.drawdistance = function drawdistance() {      this.context.save();        this.context.textalign = 'center';      this.context.textbaseline = 'bottom';        this.context.filltext('distance: ' + this.getblockdistance(), this.canvaswidth / 2, this.canvasheight / 4);        this.context.textbaseline = 'top';        this.context.filltext('last distance: ' + this.lastdistance, this.canvaswidth / 2, this.canvasheight / 4);        this.context.restore();  };    jumpgameaiclass.prototype.onframe = function onframe() {      game.jumpgame.prototype.onframe.apply(this, arguments);        this.think();  }    game.jumpgameai = jumpgameaiclass;
body {      background-color: #eeeeee;      text-align: center;  }    canvas#game {      background-color: #ffffff;      border: 1px solid #dddddd;  }
<!doctype html>  <html lang="en">  <head>      <title>jump</title>  </head>  <body>      <canvas id="game" width="512" height="512">          <h1>your browser doesn't support canvas!</h1>      </canvas>          <script src="https://raw.githubusercontent.com/cagosta/requestanimationframe/master/app/requestanimationframe.js"></script>          <!-- https://gist.github.com/jackwilsdon/d06bffa6b32c53321478 -->          <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/4e467f82590e76543bf55ff788504e26afc3d694/game.js"></script>      <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2b7ce2c3dd268c4aef9ad27316edb0b235ad0d06/canvasgame.js"></script>      <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/2696c72e001e48359a6ce880f1c475613fe359f5/jump.js"></script>      <script src="https://cdn.rawgit.com/jackwilsdon/d06bffa6b32c53321478/raw/249c92f3385757b6edf2ceb49e26f14b89ffdcfe/bootstrap.js"></script>  </body>

you have simplified version of :

enter image description here

source: flappy bird rl

i used values :

    epsilon: {         value: 0.01     },     alpha: {         value: 0.7     },     gamma: {         value: 0.9     },     resolution: {         value: 0.1     },       livereward: {         value: 10     },     scorereward: {         value: -100     },     deathreward: {         value: 1000     }, 

it had no trouble of getting beyond 100 in first 20 attempts.


q-learning can described temporal logic

q(s, a)=r(s,a)+gamma*max_a'(q(s', a')) 

where

  • r(s,a) = r = immediate reward
  • gamma = relative value of delayed vs. immediate rewards (0 1)
  • s' = new state after action a
  • a = action in state s
  • a' = action in state s'

you should execute as

select action , execute it

  1. for each state-action pair (s, a), initialize table entry q(s, a) zero
  2. observe current state s
  3. do forever:
    • select action a , execute it
    • receive immediate reward r aka q(s, a)
    • observe new state s'
    • update table entry q(s, a)=r(s,a)+gamma*max_a'(q(s', a'))
    • s=s'

Comments

Popular posts from this blog

javascript - Google App Script ContentService downloadAsFile not working -

javascript - Function overwritting -

php - Find a regex to take part of Email -