Scippy

SCIP

Solving Constraint Integer Programs

pub_bandit_exp3.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2022 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not visit scipopt.org. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file pub_bandit_exp3.h
17  * @ingroup PublicBanditMethods
18  * @brief public methods for Exp.3
19  * @author Gregor Hendel
20  */
21 
22 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
23 
24 #ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
25 #define SRC_SCIP_PUB_BANDIT_EXP3_H_
26 
27 #include "scip/def.h"
28 #include "scip/type_bandit.h"
29 #include "scip/type_retcode.h"
30 #include "scip/type_scip.h"
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 /**@addtogroup PublicBanditMethods
37  *
38  * ## Exp.3
39  *
40  * Exp.3 is a randomized selection method for the multi-armed bandit problem
41  *
42  * Exp3 maintains a probability distribution
43  * according to which an action is drawn
44  * in every iteration.
45  * The probability distribution is a mixture between
46  * a uniform distribution and a softmax distribution
47  * based on the cumulative rewards of the actions.
48  * The weight of the uniform distribution in the mixture
49  * is controlled by the parameter \f$ \gamma \f$, ie.,
50  * setting \f$ \gamma = 1\f$ uses a uniform distribution
51  * in every selection step.
52  * The cumulative reward for the actions can be
53  * fine-tuned by adding a general bias for all actions.
54  * The bias is given by the parameter \f$ \beta \f$.
55  *
56  * @{
57  */
58 
59 /** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
60 SCIP_EXPORT
62  SCIP* scip, /**< SCIP data structure */
63  SCIP_BANDIT** exp3, /**< pointer to store bandit algorithm */
64  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
65  SCIP_Real gammaparam, /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
66  SCIP_Real beta, /**< gain offset between 0 and 1 at every observation */
67  int nactions, /**< the positive number of actions for this bandit algorithm */
68  unsigned int initseed /**< initial seed for random number generation */
69  );
70 
71 /** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
72 SCIP_EXPORT
73 void SCIPsetGammaExp3(
74  SCIP_BANDIT* exp3, /**< bandit algorithm */
75  SCIP_Real gammaparam /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
76  );
77 
78 /** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
79 SCIP_EXPORT
80 void SCIPsetBetaExp3(
81  SCIP_BANDIT* exp3, /**< bandit algorithm */
82  SCIP_Real beta /**< gain offset between 0 and 1 at every observation */
83  );
84 
85 /** returns probability to play an action */
86 SCIP_EXPORT
88  SCIP_BANDIT* exp3, /**< bandit algorithm */
89  int action /**< index of the requested action */
90  );
91 
92 /** @}*/
93 
94 #ifdef __cplusplus
95 }
96 #endif
97 
98 #endif
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
Definition: bandit_exp3.c:302
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
Definition: bandit_exp3.c:341
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:54
type definitions for return codes for SCIP methods
type definitions for SCIP&#39;s main datastructure
type definitions for bandit selection algorithms
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
Definition: bandit_exp3.c:354
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
Definition: bandit_exp3.c:328
#define SCIP_Real
Definition: def.h:177
common defines and data types used in all packages of SCIP