Source code for MABpy.SimpleMAB

from MABpy.base import GameEnviroment
import numpy as np


[docs]class DummyEnviroment(GameEnviroment): def __init__(self, n_bandits): super().__init__(n_bandits) self._bestAvgReward = n_bandits-1 def _getReward(self, action): return action
[docs]class GaussianEnviroment(GameEnviroment): _mu = [] _sigma = [] def __init__(self, n_bandits, min_mu=0, max_mu=1, min_sigma=1, max_sigma=1): super().__init__(n_bandits) self._mu = np.random.uniform(min_mu,max_mu,n_bandits) self._sigma = np.random.uniform(min_sigma, max_sigma, n_bandits) self._bestAvgReward = np.max(self._mu) def _getReward(self, action): return np.random.normal(self._mu[action],self._sigma[action],1)[0]
[docs]class BernoulliEnviroment(GameEnviroment): _p = [] def __init__(self, n_bandits): super().__init__(n_bandits) self._p = np.random.uniform(size=n_bandits) self._bestAvgReward = np.max(self._p) def __init__(self, p): super().__init__(len(p)) self._p = p self._bestAvgReward = np.max(self._p) def _afterGetReward(self,rewards,action): self._bestAction = np.argmax(self._p) self._bestReward = 1 pass def _getReward(self, action): return 1 if np.random.rand()<=self._p[action] else 0