From 49803949038744bf975dcfe7171cb0395777916d Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Mon, 21 Jun 2021 19:27:15 +0530 Subject: [PATCH] run link --- docs/cfr/kuhn/index.html | 181 +++++++++++++++++----------------- labml_nn/cfr/kuhn/__init__.py | 1 + 2 files changed, 92 insertions(+), 90 deletions(-) diff --git a/docs/cfr/kuhn/index.html b/docs/cfr/kuhn/index.html index 76380cd5..2d90bd8a 100644 --- a/docs/cfr/kuhn/index.html +++ b/docs/cfr/kuhn/index.html @@ -88,17 +88,18 @@ This game is played repeatedly and a good strategy will optimize for the long te

He we extend the InfoSet class and History class defined in __init__.py with Kuhn Poker specifics.

-

Open In Colab

+

Open In Colab +View Run

-
37from typing import List, cast, Dict
-38
-39import numpy as np
-40
-41from labml import experiment
-42from labml.configs import option
-43from labml_nn.cfr import History as _History, InfoSet as _InfoSet, Action, Player, CFRConfigs
-44from labml_nn.cfr.infoset_saver import InfoSetSaver
+
38from typing import List, cast, Dict
+39
+40import numpy as np
+41
+42from labml import experiment
+43from labml.configs import option
+44from labml_nn.cfr import History as _History, InfoSet as _InfoSet, Action, Player, CFRConfigs
+45from labml_nn.cfr.infoset_saver import InfoSetSaver
@@ -109,7 +110,7 @@ with Kuhn Poker specifics.

Kuhn poker actions are pass (p) or bet (b)

-
47ACTIONS = cast(List[Action], ['p', 'b'])
+
48ACTIONS = cast(List[Action], ['p', 'b'])
@@ -120,7 +121,7 @@ with Kuhn Poker specifics.

The three cards in play are Ace, King and Queen

-
49CHANCES = cast(List[Action], ['A', 'K', 'Q'])
+
50CHANCES = cast(List[Action], ['A', 'K', 'Q'])
@@ -131,7 +132,7 @@ with Kuhn Poker specifics.

There are two players

-
51PLAYERS = cast(List[Player], [0, 1])
+
52PLAYERS = cast(List[Player], [0, 1])
@@ -142,7 +143,7 @@ with Kuhn Poker specifics.

Information set

-
54class InfoSet(_InfoSet):
+
55class InfoSet(_InfoSet):
@@ -153,8 +154,8 @@ with Kuhn Poker specifics.

Does not support save/load

-
59    @staticmethod
-60    def from_dict(data: Dict[str, any]) -> 'InfoSet':
+
60    @staticmethod
+61    def from_dict(data: Dict[str, any]) -> 'InfoSet':
@@ -165,7 +166,7 @@ with Kuhn Poker specifics.

-
62        pass
+
63        pass
@@ -176,7 +177,7 @@ with Kuhn Poker specifics.

Return the list of actions. Terminal states are handled by History class.

-
64    def actions(self) -> List[Action]:
+
65    def actions(self) -> List[Action]:
@@ -187,7 +188,7 @@ with Kuhn Poker specifics.

-
68        return ACTIONS
+
69        return ACTIONS
@@ -198,7 +199,7 @@ with Kuhn Poker specifics.

Human readable string representation - it gives the betting probability

-
70    def __repr__(self):
+
71    def __repr__(self):
@@ -209,10 +210,10 @@ with Kuhn Poker specifics.

-
74        total = sum(self.cumulative_strategy.values())
-75        total = max(total, 1e-6)
-76        bet = self.cumulative_strategy[cast(Action, 'b')] / total
-77        return f'{bet * 100: .1f}%'
+
75        total = sum(self.cumulative_strategy.values())
+76        total = max(total, 1e-6)
+77        bet = self.cumulative_strategy[cast(Action, 'b')] / total
+78        return f'{bet * 100: .1f}%'
@@ -228,7 +229,7 @@ with Kuhn Poker specifics.

* Fourth character is the action by the second player

-
80class History(_History):
+
81class History(_History):
@@ -239,7 +240,7 @@ with Kuhn Poker specifics.

History

-
93    history: str
+
94    history: str
@@ -250,7 +251,7 @@ with Kuhn Poker specifics.

Initialize with a given history string

-
95    def __init__(self, history: str = ''):
+
96    def __init__(self, history: str = ''):
@@ -261,7 +262,7 @@ with Kuhn Poker specifics.

-
99        self.history = history
+
100        self.history = history
@@ -272,7 +273,7 @@ with Kuhn Poker specifics.

Whether the history is terminal (game over).

-
101    def is_terminal(self):
+
102    def is_terminal(self):
@@ -283,8 +284,8 @@ with Kuhn Poker specifics.

Players are yet to take actions

-
106        if len(self.history) <= 2:
-107            return False
+
107        if len(self.history) <= 2:
+108            return False
@@ -295,8 +296,8 @@ with Kuhn Poker specifics.

Last player to play passed (game over)

-
109        elif self.history[-1] == 'p':
-110            return True
+
110        elif self.history[-1] == 'p':
+111            return True
@@ -307,8 +308,8 @@ with Kuhn Poker specifics.

Both players called (bet) (game over)

-
112        elif self.history[-2:] == 'bb':
-113            return True
+
113        elif self.history[-2:] == 'bb':
+114            return True
@@ -319,8 +320,8 @@ with Kuhn Poker specifics.

Any other combination

-
115        else:
-116            return False
+
116        else:
+117            return False
@@ -331,7 +332,7 @@ with Kuhn Poker specifics.

Calculate the terminal utility for player $1$, $u_1(z)$

-
118    def _terminal_utility_p1(self) -> float:
+
119    def _terminal_utility_p1(self) -> float:
@@ -342,7 +343,7 @@ with Kuhn Poker specifics.

$+1$ if Player 1 has a better card and $-1$ otherwise

-
123        winner = -1 + 2 * (self.history[0] < self.history[1])
+
124        winner = -1 + 2 * (self.history[0] < self.history[1])
@@ -353,8 +354,8 @@ with Kuhn Poker specifics.

Second player passed

-
126        if self.history[-2:] == 'bp':
-127            return 1
+
127        if self.history[-2:] == 'bp':
+128            return 1
@@ -365,8 +366,8 @@ with Kuhn Poker specifics.

Both players called, the player with better card wins $2$ chips

-
129        elif self.history[-2:] == 'bb':
-130            return winner * 2
+
130        elif self.history[-2:] == 'bb':
+131            return winner * 2
@@ -377,8 +378,8 @@ with Kuhn Poker specifics.

First player passed, the player with better card wins $1$ chip

-
132        elif self.history[-1] == 'p':
-133            return winner
+
133        elif self.history[-1] == 'p':
+134            return winner
@@ -389,8 +390,8 @@ with Kuhn Poker specifics.

History is non-terminal

-
135        else:
-136            raise RuntimeError()
+
136        else:
+137            raise RuntimeError()
@@ -401,7 +402,7 @@ with Kuhn Poker specifics.

Get the terminal utility for player $i$

-
138    def terminal_utility(self, i: Player) -> float:
+
139    def terminal_utility(self, i: Player) -> float:
@@ -412,8 +413,8 @@ with Kuhn Poker specifics.

If $i$ is Player 1

-
143        if i == PLAYERS[0]:
-144            return self._terminal_utility_p1()
+
144        if i == PLAYERS[0]:
+145            return self._terminal_utility_p1()
@@ -424,8 +425,8 @@ with Kuhn Poker specifics.

Otherwise, $u_2(z) = -u_1(z)$

-
146        else:
-147            return -1 * self._terminal_utility_p1()
+
147        else:
+148            return -1 * self._terminal_utility_p1()
@@ -436,7 +437,7 @@ with Kuhn Poker specifics.

The first two events are card dealing; i.e. chance events

-
149    def is_chance(self) -> bool:
+
150    def is_chance(self) -> bool:
@@ -447,7 +448,7 @@ with Kuhn Poker specifics.

-
153        return len(self.history) < 2
+
154        return len(self.history) < 2
@@ -458,7 +459,7 @@ with Kuhn Poker specifics.

Add an action to the history and return a new history

-
155    def __add__(self, other: Action):
+
156    def __add__(self, other: Action):
@@ -469,7 +470,7 @@ with Kuhn Poker specifics.

-
159        return History(self.history + other)
+
160        return History(self.history + other)
@@ -480,7 +481,7 @@ with Kuhn Poker specifics.

Current player

-
161    def player(self) -> Player:
+
162    def player(self) -> Player:
@@ -491,7 +492,7 @@ with Kuhn Poker specifics.

-
165        return cast(Player, len(self.history) % 2)
+
166        return cast(Player, len(self.history) % 2)
@@ -502,7 +503,7 @@ with Kuhn Poker specifics.

Sample a chance action

-
167    def sample_chance(self) -> Action:
+
168    def sample_chance(self) -> Action:
@@ -513,7 +514,7 @@ with Kuhn Poker specifics.

-
171        while True:
+
172        while True:
@@ -524,8 +525,8 @@ with Kuhn Poker specifics.

Randomly pick a card

-
173            r = np.random.randint(len(CHANCES))
-174            chance = CHANCES[r]
+
174            r = np.random.randint(len(CHANCES))
+175            chance = CHANCES[r]
@@ -536,10 +537,10 @@ with Kuhn Poker specifics.

See if the card was dealt before

-
176            for c in self.history:
-177                if c == chance:
-178                    chance = None
-179                    break
+
177            for c in self.history:
+178                if c == chance:
+179                    chance = None
+180                    break
@@ -550,8 +551,8 @@ with Kuhn Poker specifics.

Return the card if it was not dealt before

-
182            if chance is not None:
-183                return cast(Action, chance)
+
183            if chance is not None:
+184                return cast(Action, chance)
@@ -562,7 +563,7 @@ with Kuhn Poker specifics.

Human readable representation

-
185    def __repr__(self):
+
186    def __repr__(self):
@@ -573,7 +574,7 @@ with Kuhn Poker specifics.

-
189        return repr(self.history)
+
190        return repr(self.history)
@@ -585,7 +586,7 @@ with Kuhn Poker specifics.

This is a string of actions only visible to the current player.

-
191    def info_set_key(self) -> str:
+
192    def info_set_key(self) -> str:
@@ -596,7 +597,7 @@ This is a string of actions only visible to the current player.

Get current player

-
197        i = self.player()
+
198        i = self.player()
@@ -607,7 +608,7 @@ This is a string of actions only visible to the current player.

Current player sees her card and the betting actions

-
199        return self.history[i] + self.history[2:]
+
200        return self.history[i] + self.history[2:]
@@ -618,7 +619,7 @@ This is a string of actions only visible to the current player.

-
201    def new_info_set(self) -> InfoSet:
+
202    def new_info_set(self) -> InfoSet:
@@ -629,7 +630,7 @@ This is a string of actions only visible to the current player.

Create a new information set object

-
203        return InfoSet(self.info_set_key())
+
204        return InfoSet(self.info_set_key())
@@ -640,7 +641,7 @@ This is a string of actions only visible to the current player.

A function to create an empty history object

-
206def create_new_history():
+
207def create_new_history():
@@ -651,7 +652,7 @@ This is a string of actions only visible to the current player.

-
208    return History()
+
209    return History()
@@ -662,7 +663,7 @@ This is a string of actions only visible to the current player.

Configurations extends the CFR configurations class

-
211class Configs(CFRConfigs):
+
212class Configs(CFRConfigs):
@@ -673,7 +674,7 @@ This is a string of actions only visible to the current player.

-
215    pass
+
216    pass
@@ -684,8 +685,8 @@ This is a string of actions only visible to the current player.

Set the create_new_history method for Kuhn Poker

-
218@option(Configs.create_new_history)
-219def _cnh():
+
219@option(Configs.create_new_history)
+220def _cnh():
@@ -696,7 +697,7 @@ This is a string of actions only visible to the current player.

-
223    return create_new_history
+
224    return create_new_history
@@ -707,7 +708,7 @@ This is a string of actions only visible to the current player.

Run the experiment

-
226def main():
+
227def main():
@@ -721,7 +722,7 @@ other destinations such as Tensorboard can be relatively time consuming. SQLite is enough for our analytics.

-
235    experiment.create(name='kuhn_poker', writers={'sqlite'})
+
236    experiment.create(name='kuhn_poker', writers={'sqlite'})
@@ -732,7 +733,7 @@ SQLite is enough for our analytics.

Initialize configuration

-
237    conf = Configs()
+
238    conf = Configs()
@@ -743,7 +744,7 @@ SQLite is enough for our analytics.

Load configuration

-
239    experiment.configs(conf)
+
240    experiment.configs(conf)
@@ -754,7 +755,7 @@ SQLite is enough for our analytics.

Set models for saving

-
241    experiment.add_model_savers({'info_sets': InfoSetSaver(conf.cfr.info_sets)})
+
242    experiment.add_model_savers({'info_sets': InfoSetSaver(conf.cfr.info_sets)})
@@ -765,7 +766,7 @@ SQLite is enough for our analytics.

Start the experiment

-
243    with experiment.start():
+
244    with experiment.start():
@@ -776,7 +777,7 @@ SQLite is enough for our analytics.

Start iterating

-
245        conf.cfr.iterate()
+
246        conf.cfr.iterate()
@@ -787,8 +788,8 @@ SQLite is enough for our analytics.

-
249if __name__ == '__main__':
-250    main()
+
250if __name__ == '__main__':
+251    main()
diff --git a/labml_nn/cfr/kuhn/__init__.py b/labml_nn/cfr/kuhn/__init__.py index a3deada9..ab8886b0 100644 --- a/labml_nn/cfr/kuhn/__init__.py +++ b/labml_nn/cfr/kuhn/__init__.py @@ -32,6 +32,7 @@ He we extend the `InfoSet` class and `History` class defined in [`__init__.py`]( with Kuhn Poker specifics. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/cfr/kuhn/experiment.ipynb) +[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/7c35d3fad29711eba588acde48001122) """ from typing import List, cast, Dict