Source code for MABpy.ActionRewardAgents

import numpy as np

from MABpy.base import Agent

[docs]class RandomAgent(Agent):
    """
    Choose random action and never learn
    """
    def _makeDesicion(self,context=None):
        return np.random.random_integers(low=0,high=self._envParams["N_bandits"]-1)


[docs]class SimpleAgent(Agent):

    def __init__(self,optimistic=0,verbose=0,):
        super().__init__(verbose)
        self._optimistic = optimistic
        pass

[docs]    def initEnviromentParams(self, params):
        super().initEnviromentParams(params)

        self._qreward = [self._optimistic for i in self._envParams["ActionRange"] ]
        self._qtry = [0 for i in self._envParams["ActionRange"]]
        self._iter = 0

    def _makeDesicion(self,context=None):
        return np.argmax(self._qreward)

    def _learn(self,action,reward,context=None):
        self._iter+=1
        self._qtry[action]+=1
        self._qreward[action] = (self._qreward[action]*(self._qtry[action]-1)+reward) / self._qtry[action]


[docs]class eGreedyAgent(SimpleAgent):


    def __init__(self, greedy=0.1, optimistic=0,verbose=0):
        super().__init__(optimistic,verbose)
        self._greedy = greedy

        if greedy>=1 or greedy<=0:
            raise ValueError("greedy must be between 0 and 1")

    def _makeDesicion(self,context=None):
        action = super()._makeDesicion()
        e = np.random.rand()
        return action if e>self._greedy else  np.random.random_integers(low=0,high=self._envParams["N_bandits"]-1)

[docs]class enGreedyAgent(SimpleAgent):


    def __init__(self, c=0.1, d=1.0, optimistic=0,verbose=0):
        super().__init__(optimistic,verbose)
        self._c = c
        self._d = d

        if d>1 or d<=0:
            raise ValueError("greedy must be between 0 and 1")

    def _makeDesicion(self,context=None):
        action = super()._makeDesicion()
        e = np.random.rand()
        en = self._c*self._envParams["N_bandits"]/(self._d*self._d*(self._iter+1))
        en = 1 if en >1 else en
        return action if e>en else  np.random.random_integers(low=0,high=self._envParams["N_bandits"]-1)

[docs]class UCB1Agent(SimpleAgent):

    def __init__(self, optimistic=0, c=1.5, verbose=0):
        super().__init__(optimistic, verbose)
        self._c = c

    def _makeDesicion(self):
        if self._iter<self._envParams["N_bandits"]:
            return self._iter
        return np.argmax(self._qreward + self._c* np.sqrt(2*np.log(self._iter+1)/self._qtry) )

[docs]class UCB2Agent(SimpleAgent):

    def __init__(self, optimistic=0, a=0.01, verbose=0):
        super().__init__(optimistic, verbose)
        self._a = a

        if a>1 or a<=0:
            raise ValueError("a must be between 0 and 1")

[docs]    def initEnviromentParams(self, params):
        super().initEnviromentParams(params)
        self._r = [0 for i in self._envParams["ActionRange"]]

    def _makeDesicion(self):
        if self._iter<self._envParams["N_bandits"]:
            return self._iter


        if self._iter>self._envParams["N_bandits"]:
            if self._steps_to_play>0:
                self._steps_to_play-=1
                return self._action_to_play
            else:
                self._r[self._action_to_play]+=1



        action = np.argmax(self._qreward + self._af(self._iter+1,self._r) )

        self._steps_to_play = self._tf(self._r[action]+1)-self._tf(self._r[action]) -1
        self._action_to_play = action

        return action

    def _af(self,n,r):
        return np.sqrt( (1+self._a)*np.log(np.e*n/self._tf(r))/(2*self._tf(r)) )

    def _tf(self,r):
        return np.ceil( np.power (1+self._a,r))