#lang racket/base (require racket/contract/base) (provide (contract-out [transition-system? (-> any/c boolean?)] [make-transition-system (->* (#:states (sequence/c any/c) #:actions (sequence/c any/c) #:transition-function (-> any/c any/c distribution?)) (#:applicable-action-selector (or/c (-> any/c set?) #false) #:name (or/c symbol? #false)) transition-system?)] [transition-system-states (-> transition-system? set?)] [transition-system-actions (-> transition-system? set?)] [transition-system-contains-state? (-> transition-system? any/c boolean?)] [transition-system-contains-action? (-> transition-system? any/c boolean?)] [transition-system-action-applicable? (-> transition-system? any/c any/c boolean?)] [transition-system-applicable-actions (-> transition-system? any/c set?)] [transition-system-outcomes (-> transition-system? any/c any/c distribution?)] [transition-system-perform-action (-> transition-system? any/c any/c any/c)])) (require math/distributions racket/match racket/sequence racket/set rebellion/custom-write) (struct transition-system (states actions transition-function applicable-action-selector name) #:omit-define-syntaxes #:constructor-name constructor:transition-system #:property prop:object-name (struct-field-index name) #:property prop:custom-write (make-named-object-custom-write 'transition-system)) (define (make-transition-system #:states states #:actions actions #:transition-function transition-function #:applicable-action-selector [selector #false] #:name [name #false]) (let* ([states (for/set ([s states]) s)] [actions (for/set ([a actions]) a)] [selector (or selector (λ (_) actions))]) (constructor:transition-system states actions transition-function selector name))) ; Gridworld, from Reinforcement Learning: An Introduction Chapter 4.1: ; ; T 1 2 3 ; 4 5 6 7 ; 8 9 10 11 ; 12 13 14 T (define gridworld-states (set 'terminal 1 2 3 4 5 6 7 8 9 10 11 12 13 14)) (define gridworld-actions (set 'left 'up 'right 'down)) (define (gridworld-transition state action) (define new-state (match* (state action) [(1 'left) 'terminal] [(1 'up) 1] [(1 'right) 2] [(1 'down) 5] [(2 'left) 1] [(2 'up) 2] [(2 'right) 3] [(2 'down) 6] [(3 'left) 2] [(3 'up) 3] [(3 'right) 3] [(3 'down) 7] [(4 'left) 4] [(4 'up) 'terminal] [(4 'right) 5] [(4 'down) 8] [(5 'left) 4] [(5 'up) 1] [(5 'right) 6] [(5 'down) 9] [(6 'left) 5] [(6 'up) 2] [(6 'right) 7] [(6 'down) 10] [(7 'left) 6] [(7 'up) 3] [(7 'right) 7] [(7 'down) 11] [(8 'left) 8] [(8 'up) 4] [(8 'right) 9] [(8 'down) 12] [(9 'left) 8] [(9 'up) 5] [(9 'right) 10] [(9 'down) 13] [(10 'left) 9] [(10 'up) 6] [(10 'right) 11] [(10 'down) 14] [(11 'left) 10] [(11 'up) 7] [(11 'right) 11] [(11 'down) 'terminal] [(12 'left) 12] [(12 'up) 8] [(12 'right) 13] [(12 'down) 12] [(13 'left) 12] [(13 'up) 9] [(13 'right) 14] [(13 'down) 13] [(14 'left) 13] [(14 'up) 10] [(14 'right) 'terminal] [(14 'down) 14] [('terminal _) 'terminal])) (discrete-dist (list new-state))) (define gridworld (make-transition-system #:states gridworld-states #:actions gridworld-actions #:transition-function gridworld-transition #:name 'gridworld)) (define (transition-system-contains-state? system state) (set-member? (transition-system-states system) state)) (define (transition-system-contains-action? system action) (set-member? (transition-system-actions system) action)) (define (transition-system-action-applicable? system state action) (set-member? (transition-system-applicable-actions system state) action)) (define (transition-system-applicable-actions system state) (check-transition-system-contains-state #:who 'transition-system-applicable-actions system state) ((transition-system-applicable-action-selector system) state)) (define (transition-system-outcomes system state action) (check-transition-system-contains-state #:who 'transition-system-outcomes system state) (check-transition-system-contains-action #:who 'transition-system-outcomes system action) (check-transition-system-action-applicable #:who 'transition-system-outcomes system state action) ((transition-system-transition-function system) state action)) (define (transition-system-perform-action system state action) (check-transition-system-contains-state #:who 'transition-system-perform-action system state) (check-transition-system-contains-action #:who 'transition-system-perform-action system action) (check-transition-system-action-applicable #:who 'transition-system-perform-action system state action) (sample ((transition-system-transition-function system) state action))) (define (check-transition-system-contains-state #:who who system state) (unless (transition-system-contains-state? system state) (raise-arguments-error who "transition system does not contain given state" "state" state "system" system))) (define (check-transition-system-contains-action #:who who system action) (unless (transition-system-contains-action? system action) (raise-arguments-error who "transition system does not contain given action" "action" action "system" system))) (define (check-transition-system-action-applicable #:who who system state action) (unless (transition-system-action-applicable? system state action) (raise-arguments-error who "action is not applicable in given state" "action" action "state" state "system" system)))