this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Continue rapport

+21 -10
slides/main.pdf

This is a binary file and will not be displayed.

+21 -10
slides/main.typ
··· 65 65 66 66 67 67 68 - #let what_is_surrogate_adv = [ 69 - == C'est quoi $cal(L)$ ? 68 + == C'est quoi $cal(L)$ ? 70 69 71 - #centered[ 70 + #centered[ 72 71 73 - C'est très simple: 72 + C'est très simple: 74 73 75 - $ 76 - cal(L)_r (pi', pi) := exp_((s_t, a_t)_(t in NN) in cal(C)) sum_(t=0)^oo (Q_pi (s_t, a_t)) / (Q_pi' (s_t, a_t)) A_(pi, r)(s_t, a_t) 77 - $ 74 + $ 75 + cal(L)_r (pi', pi) := exp_((s_t, a_t)_(t in NN) in cal(C)) sum_(t=0)^oo (Q_pi (s_t, a_t)) / (Q_pi' (s_t, a_t)) A_(pi, r)(s_t, a_t) 76 + $ 78 77 79 - ] 80 78 ] 81 79 82 - #what_is_surrogate_adv 83 80 84 81 #title-slide[ 85 82 ··· 207 204 #pagebreak() 208 205 209 206 210 - #what_is_surrogate_adv 207 + == C'est quoi $cal(L)$ ? 208 + 209 + #centered[ 210 + 211 + $ 212 + cal(L)_r (pi', pi) := pause exp_((s_t, a_t)_(t in NN) in cal(C)) pause sum_(t=0)^oo pause (Q_pi (s_t, a_t)) / (Q_pi' (s_t, a_t)) pause A_(pi, r)(s_t, a_t) 213 + $ 214 + 215 + ] 216 + 217 + #title-slide[ 218 + == Optimisation de $Pi$ 219 + Mise à jour de la politique RL 220 + ] 221 +