Continue rapport · gwen.works/internshiplogs@2c625cb

+19 -25

autopush.ps1

··· 1 1 #!/usr/bin/env pwsh 2 - # PowerShell version of the Fish script using `mutool` for PDF page count 2 + # PowerShell equivalent of the provided Fish script 3 3 4 - # Checkout the PDF from Git 5 4 git checkout -- rapport/main.pdf 6 - 7 - # Pull latest changes with rebase and autostash 8 5 git pull --rebase --autostash 9 6 10 7 while ($true) { 11 - # --- Get the page count using mutool --- 12 - try { 13 - $pages = mutool info rapport/main.pdf | 14 - Select-String "^Pages:" | 15 - ForEach-Object { ($_ -split "\s+")[1] } 16 - } catch { 17 - Write-Host "Error: Could not get page count from rapport/main.pdf" 18 - $pages = "0" 19 - } 8 + # Get page count using mutool instead of pdfinfo 9 + $pageCount = & mutool info rapport/main.pdf | 10 + Select-String '^Pages:' | 11 + ForEach-Object { ($_ -split '\s+')[-1] } 20 12 21 - # Save page count to a file 22 - Set-Content -Path "pages_count" -Value $pages 13 + $pageCount > pages_count 23 14 24 - # --- Stage files --- 25 - git add rapport/*.typ bib.yaml *.ps1 rapport/*.dot pages_count rapport/*.png 15 + # Check for changes in relevant files 16 + git diff --no-patch --exit-code slides/*.typ slides/*.dot slides/*.png rapport/*.typ rapport/*.dot rapport/*.png bib.yaml | Out-Null 17 + $pdfChanges = $LASTEXITCODE 26 18 27 - # --- Commit quietly --- 28 - git commit --quiet -m "Continue rapport" 2>$null 19 + Write-Host "PDF updates with these changes: $pdfChanges" 29 20 30 - # --- Push quietly and force --- 31 - git push --quiet --force 21 + # Stage relevant files 22 + git add rapport/ slides/ bib.yaml *.fish *.ps1 pages_count 23 + git commit --quiet -m "Continue rapport" 32 24 33 - # --- Print timestamp --- 34 - Write-Host "Pushed at $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')" 25 + # Push if there were changes 26 + if ($pdfChanges -ne 0) { 27 + git push --quiet --force 28 + Write-Host "Pushed at $(Get-Date)" 29 + } 35 30 36 - # --- Sleep for 30 minutes --- 37 - Start-Sleep -Seconds 1800 31 + Start-Sleep -Seconds (60 * 30) 38 32 }

+1 -1

pages_count

··· 1 - 49 1 + 49

+5 -4

rapport/context.typ

··· 1 - #import "utils.typ": comment, refneeded, todo 1 + #import "utils.typ": comment, refneeded, todo, dontbreak 2 2 #import "@preview/fletcher:0.5.8": edge, node 3 3 #import "@preview/fletcher:0.5.8" 4 4 #import "@preview/diagraph:0.3.6" ··· 8 8 caption: caption, 9 9 fletcher.diagram(..args), 10 10 ) 11 - #let dontbreak = content => block(breakable: false, content) 12 11 13 12 #show math.equation.where(block: true): set block(spacing: 2em) 14 13 ··· 252 251 253 252 On définit également l'ensemble de _tout_ les chemins d'états possibles, peut importe la politique, $cal(C)$ : 254 253 255 - $ 256 - cal(C) := 254 + #let definitions_paths_set = $ 255 + cal(C) &:= 257 256 setbuilder( 258 257 cases( 259 258 & c_0 & = (s_0, a_0), ··· 262 261 (s_0, a) in S times A^NN 263 262 ) 264 263 $ 264 + 265 + #definitions_paths_set 265 266 266 267 On notera que, selon $M$, on peut avoir $cal(C) subset.neq (S times A)^NN$: par exemple, certains états de l'environnement peuvent représenter des "impasses", où il est impossible d'évoluer vers un autre état, peut importe l'action choisie. 267 268

slides/main.pdf

This is a binary file and will not be displayed.

+195 -1

slides/main.typ

··· 1 - #import "../rapport/utils.typ": dontbreak 1 + #import "../rapport/utils.typ": dontbreak, todo 2 + #import "../rapport/context.typ": definitions_paths_set, exp 2 3 #import "@preview/touying:0.6.1": * 3 4 #import themes.simple: * 4 5 6 + #import "@preview/fletcher:0.5.8": edge, node 7 + #import "@preview/fletcher:0.5.8" 8 + #import "@preview/diagraph:0.3.6" 9 + 10 + #show figure: set block(spacing: 4em) 11 + #let diagram = (caption: none, ..args) => figure( 12 + caption: caption, 13 + touying-reducer.with(reduce: fletcher.diagram, cover: fletcher.hide)(..args), 14 + ) 15 + 16 + #let centered = content => { 17 + v(1fr) 18 + align(center, content) 19 + v(1fr) 20 + } 21 + 5 22 #show: simple-theme.with(aspect-ratio: "16-9") 6 23 #set text(font: "New Computer Modern") 7 24 #show raw: set text(font: "Martian Mono", size: 0.8em) ··· 10 27 11 28 Gwenn Le Bihan `<gwenn.lebihan@etu.inp-n7.fr>` \ 12 29 #datetime.today().display("[day padding:none] Novembre [year]") \ 30 + 31 + #title-slide[ 32 + == Reinforcement Learning 33 + 34 + Et son application à la robotique 35 + ] 36 + 37 + 38 + == Bases du RL 39 + 40 + #diagram( 41 + node((0, 0))[Agent], 42 + node((1, 0))[Environnement], 43 + node((2, 0))[Score], 44 + 45 + pause, 46 + edge((0, 0), (1, 0), "->")[Action], 47 + 48 + pause, 49 + edge((1, 0), (2, 0), "-->")[Fonction coût], 50 + 51 + pause, 52 + edge((2, 0), (0, 0), "->", bend: 45deg)[Mise à jour], 53 + ) 54 + 55 + == RL en robotique 56 + 57 + #diagram( 58 + node((0, 0), todo[Photo H1v2]), 59 + node((1, 0), todo[H1v2 dans gz]), 60 + node((2, 0))[Score], 61 + edge((0, 0), (1, 0), "->")[genou gauche +0.5°], 62 + edge((1, 0), (2, 0), "-->", $cal(L)$), 63 + edge((2, 0), (0, 0), "->", bend: 45deg)[Mise à jour], 64 + ) 65 + 66 + 67 + 68 + #let what_is_surrogate_adv = [ 69 + == C'est quoi $cal(L)$ ? 70 + 71 + #centered[ 72 + 73 + C'est très simple: 74 + 75 + $ 76 + cal(L)_r (pi', pi) := exp_((s_t, a_t)_(t in NN) in cal(C)) sum_(t=0)^oo (Q_pi (s_t, a_t)) / (Q_pi' (s_t, a_t)) A_(pi, r)(s_t, a_t) 77 + $ 78 + 79 + ] 80 + ] 81 + 82 + #what_is_surrogate_adv 83 + 84 + #title-slide[ 85 + 86 + == Comparaison des politiques 87 + 88 + En Reinforcement Learning 89 + 90 + ] 91 + 92 + #let loop = (pauses: false) => diagram( 93 + node((0, 0), $s_t$), 94 + if pauses { pause } else { none }, 95 + if pauses { edge(corner: right, label-pos: 2 / 8, label-side: left)[choix de l'action] } else { none }, 96 + edge("->", corner: right, label-pos: 3 / 8, label-side: left)[$Pi$], 97 + node((1, -1))[$a_t$], 98 + if pauses { pause } else { none }, 99 + edge("->", corner: right, label-pos: 5 / 8, label-side: left)[$M$], 100 + if pauses { edge(corner: right, label-pos: 6 / 8, label-side: left)[simulation] } else { none }, 101 + node((2, 0))[$s_(t+1)$], 102 + if pauses { pause } else { none }, 103 + edge((2, 0), (2, .75), (0, .75), (0, 0), "-->", label-side: left)[itération], 104 + ) 105 + 106 + #centered(loop(pauses: true)) 107 + #pagebreak() 108 + 109 + 110 + #centered[ 111 + #grid( 112 + columns: 2, 113 + gutter: 3em, 114 + 115 + loop(pauses: false), 116 + 117 + [ 118 + #diagram( 119 + $ 120 + s_0 edge(a_0, ->) & s_1 edge(a_1, ->) & s_2 edge(a_2, ->) & dots.c 121 + $, 122 + ) 123 + 124 + #pause 125 + 126 + $ 127 + ((s_0, a_0), (s_1, a_1), (s_2, a_2), ...) 128 + pause 129 + in cal(C) 130 + $ 131 + 132 + ], 133 + ) 134 + ] 135 + 136 + #pagebreak() 137 + 138 + #centered[ 139 + $ 140 + A & := "actions possibles" \ 141 + S & := "états possibles" \ 142 + #definitions_paths_set 143 + $ 144 + ] 145 + 146 + #pagebreak() 147 + 148 + == Comparaison des politiques: Avantage $A$ 149 + 150 + #centered[ 151 + À quel point est-il mieux de choisir $a_t$ plutôt qu'une autre action? 152 + ] 153 + 154 + #pagebreak() 155 + 156 + #centered[ 157 + 158 + 159 + #let height = 2 160 + #scale(70%, reflow: true, diagram(( 161 + // Prior path 162 + node((0, 0))[$dots.c$], 163 + edge("->")[$a_(t-2)$], 164 + node((1, 0))[$s_(t-1)$], 165 + edge("->")[$a_(t-1)$], 166 + pause, 167 + node((2, 0), name: <break>)[$s_t$], 168 + edge("-")[], 169 + node((3.5, 0)), 170 + edge("->", label-pos: 0%)[$a_t$], 171 + node((4.5, 0))[$sum_(i=t+1)^oo gamma^t r(s_i)$], 172 + node(name: <bottom>, (4.5, +1.5))[$sum_(i=t+1)^oo gamma^t r(s'_i)$ ], 173 + node(name: <top>, (4.5, -1.5))[$sum_(i=t+1)^oo gamma^t r(s''_i)$], 174 + edge(<break>, <bottom>, "->", bend: -25deg)[$a'_t$], 175 + edge(<break>, <top>, "->", bend: 25deg)[$a''_t$], 176 + pause, 177 + // Expectation bar V(s) 178 + node((5, height)), 179 + edge("--"), 180 + node((1.85, height)), 181 + edge("-", label-side: left, label-pos: 75%)[$exp$], 182 + node((1.85, -height)), 183 + edge("--")[], 184 + node((5, -height)), 185 + // Expectation bar Q(s, a) 186 + node((5, 0.5)), 187 + edge("--"), 188 + node((3.25, 0.5)), 189 + edge("-", label-side: left, label-pos: 75%)[$exp$], 190 + node((3.25, -0.5)), 191 + edge("--")[], 192 + node((5, -0.5)), 193 + ))) 194 + 195 + #pause 196 + 197 + $ 198 + A_(pi, r)(s, a) := exp("avec" a_t) - exp("à" thick t-1) 199 + $ 200 + 201 + ] 202 + 203 + #pagebreak() 204 + 205 + 206 + #what_is_surrogate_adv

Configure Feed

Configure Feed