Source code for MABpy.ActionRewardAgents

import numpy as np

from MABpy.base import Agent

[docs]class RandomAgent(Agent): """ Choose random action and never learn """ def _makeDesicion(self,context=None): return np.random.random_integers(low=0,high=self._envParams["N_bandits"]-1)
[docs]class SimpleAgent(Agent): def __init__(self,optimistic=0,verbose=0,): super().__init__(verbose) self._optimistic = optimistic pass
[docs] def initEnviromentParams(self, params): super().initEnviromentParams(params) self._qreward = [self._optimistic for i in self._envParams["ActionRange"] ] self._qtry = [0 for i in self._envParams["ActionRange"]] self._iter = 0
def _makeDesicion(self,context=None): return np.argmax(self._qreward) def _learn(self,action,reward,context=None): self._iter+=1 self._qtry[action]+=1 self._qreward[action] = (self._qreward[action]*(self._qtry[action]-1)+reward) / self._qtry[action]
[docs]class eGreedyAgent(SimpleAgent): def __init__(self, greedy=0.1, optimistic=0,verbose=0): super().__init__(optimistic,verbose) self._greedy = greedy if greedy>=1 or greedy<=0: raise ValueError("greedy must be between 0 and 1") def _makeDesicion(self,context=None): action = super()._makeDesicion() e = np.random.rand() return action if e>self._greedy else np.random.random_integers(low=0,high=self._envParams["N_bandits"]-1)
[docs]class enGreedyAgent(SimpleAgent): def __init__(self, c=0.1, d=1.0, optimistic=0,verbose=0): super().__init__(optimistic,verbose) self._c = c self._d = d if d>1 or d<=0: raise ValueError("greedy must be between 0 and 1") def _makeDesicion(self,context=None): action = super()._makeDesicion() e = np.random.rand() en = self._c*self._envParams["N_bandits"]/(self._d*self._d*(self._iter+1)) en = 1 if en >1 else en return action if e>en else np.random.random_integers(low=0,high=self._envParams["N_bandits"]-1)
[docs]class UCB1Agent(SimpleAgent): def __init__(self, optimistic=0, c=1.5, verbose=0): super().__init__(optimistic, verbose) self._c = c def _makeDesicion(self): if self._iter<self._envParams["N_bandits"]: return self._iter return np.argmax(self._qreward + self._c* np.sqrt(2*np.log(self._iter+1)/self._qtry) )
[docs]class UCB2Agent(SimpleAgent): def __init__(self, optimistic=0, a=0.01, verbose=0): super().__init__(optimistic, verbose) self._a = a if a>1 or a<=0: raise ValueError("a must be between 0 and 1")
[docs] def initEnviromentParams(self, params): super().initEnviromentParams(params) self._r = [0 for i in self._envParams["ActionRange"]]
def _makeDesicion(self): if self._iter<self._envParams["N_bandits"]: return self._iter if self._iter>self._envParams["N_bandits"]: if self._steps_to_play>0: self._steps_to_play-=1 return self._action_to_play else: self._r[self._action_to_play]+=1 action = np.argmax(self._qreward + self._af(self._iter+1,self._r) ) self._steps_to_play = self._tf(self._r[action]+1)-self._tf(self._r[action]) -1 self._action_to_play = action return action def _af(self,n,r): return np.sqrt( (1+self._a)*np.log(np.e*n/self._tf(r))/(2*self._tf(r)) ) def _tf(self,r): return np.ceil( np.power (1+self._a,r))