Scippy

SCIP

Solving Constraint Integer Programs

pub_bandit_epsgreedy.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2019 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not visit scip.zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file pub_bandit_epsgreedy.h
17  * @ingroup PublicBanditMethods
18  * @brief public methods for the epsilon greedy bandit selector
19  * @author Gregor Hendel
20  */
21 
22 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
23 
24 #ifndef SRC_SCIP_PUB_BANDIT_EPSGREEDY_H_
25 #define SRC_SCIP_PUB_BANDIT_EPSGREEDY_H_
26 
27 
28 #include "scip/def.h"
29 #include "scip/type_scip.h"
30 #include "scip/type_bandit.h"
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 /**@addtogroup PublicBanditMethods
37  *
38  * ## Epsilon greedy
39  *
40  * Epsilon greedy is a randomized algorithm for the multi-armed bandit problem.
41  *
42  * In every iteration, it either
43  * selects an action uniformly at random with
44  * probability \f$ \varepsilon_t\f$
45  * or it greedily exploits the best action seen so far with
46  * probability \f$ 1 - \varepsilon_t \f$.
47  * In this implementation, \f$ \varepsilon_t \f$ decreases over time
48  * (number of selections performed), controlled by the epsilon parameter.
49  *
50  * @{
51  */
52 
53 /** create and resets an epsilon greedy bandit algorithm */
56  SCIP* scip, /**< SCIP data structure */
57  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
58  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
59  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
60  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
61  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
62  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
63  * only relevant if exponential decay is enabled
64  */
65  int nactions, /**< the number of possible actions */
66  unsigned int initseed /**< initial seed for random number generation */
67  );
68 
69 /** get weights array of epsilon greedy bandit algorithm */
72  SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
73  );
74 
75 /** set epsilon parameter of epsilon greedy bandit algorithm */
78  SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
79  SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
80  );
81 
82 /* @} */
83 
84 
85 
86 #ifdef __cplusplus
87 }
88 #endif
89 
90 #endif
SCIP_EXPORT SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define SCIP_EXPORT
Definition: def.h:98
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:53
real eps
type definitions for SCIP&#39;s main datastructure
type definitions for bandit selection algorithms
#define SCIP_Bool
Definition: def.h:70
#define SCIP_Real
Definition: def.h:164
SCIP_EXPORT void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
common defines and data types used in all packages of SCIP
SCIP_EXPORT SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)