Source code for MABpy.base

from abc import abstractmethod, ABCMeta
import numpy as np

[docs]class IteractionModel(object, metaclass=ABCMeta): """ Base class for agents-enviroments iteraction Attributes: _verbose - verbosity level """ _verbose = 0 def __init__ (self,verbose=0): """ Class initialization :param verbose: verobosity level """ self._verbose=verbose pass @abstractmethod
[docs] def Reset(self): """ Reset iteraction to init """ pass
@abstractmethod
[docs] def GetGameLogs(self): """ Get game logs :return: game logs """ pass
@abstractmethod
[docs] def Play(self, max_iter): """ start iteraction :param max_iter: maximum iteration number :return: """ pass
[docs]class Agent(metaclass=ABCMeta): """ Base agent class. Agent makes decisions based on algorithm Attributes: _verbose - verbosity level _envParams - enviroment parametes """ _verbose=0 _envParams = None def __init__(self,verbose=0): """ class initisialization with verbosity level :param verbose: verbosity level """ self._verbose = verbose pass
[docs] def initEnviromentParams(self, params): """ Save enviroment parameters :param params: enviroment params :return: nothing """ self._envParams = params
def _beforeMakingDesicion(self,context=None): if self._envParams is None: raise ValueError("can't make decision, initEnviromentParams first") def _afterMakingDesicion(self,action,context=None): pass @abstractmethod def _makeDesicion(self,context=None): return 0
[docs] def MakeDecision(self,context=None): self._beforeMakingDesicion(context) action = self._makeDesicion() self._afterMakingDesicion(action,context) return action
def _beforeLearn(self,action,reward,context=None): pass def _afterLearn(self,action,reward,context=None): pass def _learn(self,action,reward,context=None): pass
[docs] def Learn(self,action,reward,context=None): """ Main learn function :param action: action :param reward: reward :param context: context vector :return: nothing """ self._beforeLearn(action,reward,context) self._learn(action,reward,context) self._afterLearn(action,reward,context) pass
[docs]class EnvParams(dict): """ base enviroment """ def __init__(self, n_bandits): self["N_bandits"] = n_bandits self["ActionRange"] = range(self["N_bandits"]) pass
[docs]class GameEnviroment(metaclass=ABCMeta): """ Base class for game enviroment Attributes: done - flag for end game params - public enviroment params """ done = False params = None _bestAction = None _bestReward = None _bestAvgReward = 0 def __init__(self, n_bandits): self.params = EnvParams(n_bandits) def _beforeGetReward(self,action): if action>=self.params["N_bandits"]: raise ValueError("Got action= %d but action must be < N_bandits=%d" % (action, self.params["N_bandits"])) def _getReward(self,action): return 0 def _afterGetReward(self,rewards,action): self._bestAction = np.argmax(rewards) self._bestReward = np.max(rewards) pass
[docs] def getReward(self,action): self._beforeGetReward(action) rewards = [self._getReward(a) for a in self.params["ActionRange"]] self._afterGetReward(rewards,action) return rewards[action]
[docs] def getRewardSample(self,sample_size=100): result = [] for action in self.params["ActionRange"]: subresult = [] for i in range(sample_size): subresult.append(self._getReward(action)) result.append(subresult) self.reset() return result
[docs] def getBestReward(self): return self._bestReward
[docs] def getBestAction(self): return self._bestAction
[docs] def getBestAvgReward(self): return self._bestAvgReward
[docs] def reset(self): self.done = False self._bestAction = None self._bestReward = None pass