Source code for MABpy.SimpleMAB

from MABpy.base import GameEnviroment
import numpy as np


[docs]class DummyEnviroment(GameEnviroment):

    def __init__(self, n_bandits):
        super().__init__(n_bandits)
        self._bestAvgReward = n_bandits-1

    def _getReward(self, action):
        return action



[docs]class GaussianEnviroment(GameEnviroment):

    _mu = []
    _sigma = []

    def __init__(self, n_bandits, min_mu=0, max_mu=1, min_sigma=1, max_sigma=1):
        super().__init__(n_bandits)
        self._mu = np.random.uniform(min_mu,max_mu,n_bandits)
        self._sigma = np.random.uniform(min_sigma, max_sigma, n_bandits)
        self._bestAvgReward = np.max(self._mu)

    def _getReward(self, action):
        return np.random.normal(self._mu[action],self._sigma[action],1)[0]



[docs]class BernoulliEnviroment(GameEnviroment):

    _p = []

    def __init__(self, n_bandits):
        super().__init__(n_bandits)
        self._p = np.random.uniform(size=n_bandits)
        self._bestAvgReward =  np.max(self._p)

    def __init__(self, p):
        super().__init__(len(p))
        self._p = p
        self._bestAvgReward =  np.max(self._p)


    def _afterGetReward(self,rewards,action):
        self._bestAction = np.argmax(self._p)
        self._bestReward = 1
        pass

    def _getReward(self, action):
        return 1 if np.random.rand()<=self._p[action] else 0