···1919 /* The mmu context belongs to a secure guest. */2020 atomic_t protected_count;2121 /*2222- * The following bitfields need a down_write on the mm2323- * semaphore when they are written to. As they are only2424- * written once, they can be read without a lock.2525- */2626- /* The mmu context uses extended page tables. */2727- unsigned int has_pgste:1;2828- /* The mmu context uses storage keys. */2929- unsigned int uses_skeys:1;3030- /* The mmu context uses CMM. */3131- unsigned int uses_cmm:1;3232- /*3322 * The mmu context allows COW-sharing of memory pages (KSM, zeropage).3423 * Note that COW-sharing during fork() is currently always allowed.3524 */3625 unsigned int allow_cow_sharing:1;3737- /* The gmaps associated with this context are allowed to use huge pages. */3838- unsigned int allow_gmap_hpage_1m:1;3926} mm_context_t;40274128#define INIT_MM_CONTEXT(name) \
-4
arch/s390/include/asm/page.h
···7878#ifdef STRICT_MM_TYPECHECKS79798080typedef struct { unsigned long pgprot; } pgprot_t;8181-typedef struct { unsigned long pgste; } pgste_t;8281typedef struct { unsigned long pte; } pte_t;8382typedef struct { unsigned long pmd; } pmd_t;8483typedef struct { unsigned long pud; } pud_t;···9394#else /* STRICT_MM_TYPECHECKS */94959596typedef unsigned long pgprot_t;9696-typedef unsigned long pgste_t;9797typedef unsigned long pte_t;9898typedef unsigned long pmd_t;9999typedef unsigned long pud_t;···108110#endif /* STRICT_MM_TYPECHECKS */109111110112DEFINE_PGVAL_FUNC(pgprot)111111-DEFINE_PGVAL_FUNC(pgste)112113DEFINE_PGVAL_FUNC(pte)113114DEFINE_PGVAL_FUNC(pmd)114115DEFINE_PGVAL_FUNC(pud)···117120typedef pte_t *pgtable_t;118121119122#define __pgprot(x) ((pgprot_t) { (x) } )120120-#define __pgste(x) ((pgste_t) { (x) } )121123#define __pte(x) ((pte_t) { (x) } )122124#define __pmd(x) ((pmd_t) { (x) } )123125#define __pud(x) ((pud_t) { (x) } )
-4
arch/s390/include/asm/pgalloc.h
···2727#define page_table_alloc(...) alloc_hooks(page_table_alloc_noprof(__VA_ARGS__))2828void page_table_free(struct mm_struct *, unsigned long *);29293030-struct ptdesc *page_table_alloc_pgste_noprof(struct mm_struct *mm);3131-#define page_table_alloc_pgste(...) alloc_hooks(page_table_alloc_pgste_noprof(__VA_ARGS__))3232-void page_table_free_pgste(struct ptdesc *ptdesc);3333-3430static inline void crst_table_init(unsigned long *crst, unsigned long entry)3531{3632 memset64((u64 *)crst, entry, _CRST_ENTRIES);
+7-114
arch/s390/include/asm/pgtable.h
···413413 * SW-bits: y young, d dirty, r read, w write414414 */415415416416-/* Page status table bits for virtualization */417417-#define PGSTE_ACC_BITS 0xf000000000000000UL418418-#define PGSTE_FP_BIT 0x0800000000000000UL419419-#define PGSTE_PCL_BIT 0x0080000000000000UL420420-#define PGSTE_HR_BIT 0x0040000000000000UL421421-#define PGSTE_HC_BIT 0x0020000000000000UL422422-#define PGSTE_GR_BIT 0x0004000000000000UL423423-#define PGSTE_GC_BIT 0x0002000000000000UL424424-#define PGSTE_ST2_MASK 0x0000ffff00000000UL425425-#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */426426-#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */427427-#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */428428-429429-/* Guest Page State used for virtualization */430430-#define _PGSTE_GPS_ZERO 0x0000000080000000UL431431-#define _PGSTE_GPS_NODAT 0x0000000040000000UL432432-#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL433433-#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL434434-#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL435435-#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL436436-#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK437437-438416/*439417 * A user page table pointer has the space-switch-event bit, the440418 * private-space-control bit and the storage-alteration-event-control···544566}545567#define mm_pmd_folded(mm) mm_pmd_folded(mm)546568547547-static inline int mm_has_pgste(struct mm_struct *mm)548548-{549549-#ifdef CONFIG_PGSTE550550- if (unlikely(mm->context.has_pgste))551551- return 1;552552-#endif553553- return 0;554554-}555555-556569static inline int mm_is_protected(struct mm_struct *mm)557570{558571#if IS_ENABLED(CONFIG_KVM)···551582 return 1;552583#endif553584 return 0;554554-}555555-556556-static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask)557557-{558558- return __pgste(pgste_val(pgste) & ~mask);559559-}560560-561561-static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask)562562-{563563- return __pgste(pgste_val(pgste) | mask);564585}565586566587static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)···593634{594635#if IS_ENABLED(CONFIG_KVM)595636 if (!mm->context.allow_cow_sharing)596596- return 1;597597-#endif598598- return 0;599599-}600600-601601-static inline int mm_uses_skeys(struct mm_struct *mm)602602-{603603-#ifdef CONFIG_PGSTE604604- if (mm->context.uses_skeys)605637 return 1;606638#endif607639 return 0;···13061356{13071357 if (pte_same(*ptep, entry))13081358 return 0;13091309- if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))13591359+ if (cpu_has_rdp() && pte_allow_rdp(*ptep, entry))13101360 ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);13111361 else13121362 ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);13131363 return 1;13141364}13151315-13161316-/*13171317- * Additional functions to handle KVM guest page tables13181318- */13191319-void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,13201320- pte_t *ptep, pte_t entry);13211321-void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);13221322-int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,13231323- pte_t *ptep, int prot, unsigned long bit);13241324-void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,13251325- pte_t *ptep , int reset);13261326-void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);13271327-int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,13281328- pte_t *sptep, pte_t *tptep, pte_t pte);13291329-void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);13301330-13311331-bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,13321332- pte_t *ptep);13331333-int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,13341334- unsigned char key, bool nq);13351335-int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,13361336- unsigned char key, unsigned char *oldkey,13371337- bool nq, bool mr, bool mc);13381338-int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);13391339-int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,13401340- unsigned char *key);13411341-13421342-int set_pgste_bits(struct mm_struct *mm, unsigned long addr,13431343- unsigned long bits, unsigned long value);13441344-int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);13451345-int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,13461346- unsigned long *oldpte, unsigned long *oldpgste);1347136513481366#define pgprot_writecombine pgprot_writecombine13491367pgprot_t pgprot_writecombine(pgprot_t prot);···13271409{13281410 if (pte_present(entry))13291411 entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));13301330- if (mm_has_pgste(mm)) {13311331- for (;;) {13321332- ptep_set_pte_at(mm, addr, ptep, entry);13331333- if (--nr == 0)13341334- break;13351335- ptep++;13361336- entry = __pte(pte_val(entry) + PAGE_SIZE);13371337- addr += PAGE_SIZE;13381338- }13391339- } else {13401340- for (;;) {13411341- set_pte(ptep, entry);13421342- if (--nr == 0)13431343- break;13441344- ptep++;13451345- entry = __pte(pte_val(entry) + PAGE_SIZE);13461346- }14121412+ for (;;) {14131413+ set_pte(ptep, entry);14141414+ if (--nr == 0)14151415+ break;14161416+ ptep++;14171417+ entry = __pte(pte_val(entry) + PAGE_SIZE);13471418 }13481419}13491420#define set_ptes set_ptes···1932202519332026#define pmd_pgtable(pmd) \19342027 ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))19351935-19361936-static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)19371937-{19381938- unsigned long *pgstes, res;19391939-19401940- pgstes = pgt + _PAGE_ENTRIES;19411941-19421942- res = (pgstes[0] & PGSTE_ST2_MASK) << 16;19431943- res |= pgstes[1] & PGSTE_ST2_MASK;19441944- res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;19451945- res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;19461946-19471947- return res;19481948-}1949202819502029#endif /* _S390_PAGE_H */
+1
arch/s390/kvm/dat.h
···108108#define _PAGE_SD 0x002109109110110/* Needed as macro to perform atomic operations */111111+#define PGSTE_PCL_BIT 0x0080000000000000UL /* PCL lock, HW bit */111112#define PGSTE_CMMA_D_BIT 0x0000000000008000UL /* CMMA dirty soft-bit */112113113114enum pgste_gps_usage {
···115115 return old;116116}117117118118-static inline pgste_t pgste_get_lock(pte_t *ptep)119119-{120120- unsigned long value = 0;121121-#ifdef CONFIG_PGSTE122122- unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);123123-124124- do {125125- value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);126126- } while (value & PGSTE_PCL_BIT);127127- value |= PGSTE_PCL_BIT;128128-#endif129129- return __pgste(value);130130-}131131-132132-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)133133-{134134-#ifdef CONFIG_PGSTE135135- barrier();136136- WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);137137-#endif138138-}139139-140140-static inline pgste_t pgste_get(pte_t *ptep)141141-{142142- unsigned long pgste = 0;143143-#ifdef CONFIG_PGSTE144144- pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);145145-#endif146146- return __pgste(pgste);147147-}148148-149149-static inline void pgste_set(pte_t *ptep, pgste_t pgste)150150-{151151-#ifdef CONFIG_PGSTE152152- *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;153153-#endif154154-}155155-156156-static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,157157- struct mm_struct *mm)158158-{159159-#ifdef CONFIG_PGSTE160160- unsigned long address, bits, skey;161161-162162- if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)163163- return pgste;164164- address = pte_val(pte) & PAGE_MASK;165165- skey = (unsigned long) page_get_storage_key(address);166166- bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);167167- /* Transfer page changed & referenced bit to guest bits in pgste */168168- pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */169169- /* Copy page access key and fetch protection bit to pgste */170170- pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);171171- pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);172172-#endif173173- return pgste;174174-175175-}176176-177177-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,178178- struct mm_struct *mm)179179-{180180-#ifdef CONFIG_PGSTE181181- unsigned long address;182182- unsigned long nkey;183183-184184- if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)185185- return;186186- VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));187187- address = pte_val(entry) & PAGE_MASK;188188- /*189189- * Set page access key and fetch protection bit from pgste.190190- * The guest C/R information is still in the PGSTE, set real191191- * key C/R to 0.192192- */193193- nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;194194- nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;195195- page_set_storage_key(address, nkey, 0);196196-#endif197197-}198198-199199-static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)200200-{201201-#ifdef CONFIG_PGSTE202202- if ((pte_val(entry) & _PAGE_PRESENT) &&203203- (pte_val(entry) & _PAGE_WRITE) &&204204- !(pte_val(entry) & _PAGE_INVALID)) {205205- if (!machine_has_esop()) {206206- /*207207- * Without enhanced suppression-on-protection force208208- * the dirty bit on for all writable ptes.209209- */210210- entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));211211- entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));212212- }213213- if (!(pte_val(entry) & _PAGE_PROTECT))214214- /* This pte allows write access, set user-dirty */215215- pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);216216- }217217-#endif218218- set_pte(ptep, entry);219219- return pgste;220220-}221221-222222-static inline pgste_t pgste_pte_notify(struct mm_struct *mm,223223- unsigned long addr,224224- pte_t *ptep, pgste_t pgste)225225-{226226-#ifdef CONFIG_PGSTE227227- unsigned long bits;228228-229229- bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);230230- if (bits) {231231- pgste = __pgste(pgste_val(pgste) ^ bits);232232- ptep_notify(mm, addr, ptep, bits);233233- }234234-#endif235235- return pgste;236236-}237237-238238-static inline pgste_t ptep_xchg_start(struct mm_struct *mm,239239- unsigned long addr, pte_t *ptep)240240-{241241- pgste_t pgste = __pgste(0);242242-243243- if (mm_has_pgste(mm)) {244244- pgste = pgste_get_lock(ptep);245245- pgste = pgste_pte_notify(mm, addr, ptep, pgste);246246- }247247- return pgste;248248-}249249-250250-static inline pte_t ptep_xchg_commit(struct mm_struct *mm,251251- unsigned long addr, pte_t *ptep,252252- pgste_t pgste, pte_t old, pte_t new)253253-{254254- if (mm_has_pgste(mm)) {255255- if (pte_val(old) & _PAGE_INVALID)256256- pgste_set_key(ptep, pgste, new, mm);257257- if (pte_val(new) & _PAGE_INVALID) {258258- pgste = pgste_update_all(old, pgste, mm);259259- if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==260260- _PGSTE_GPS_USAGE_UNUSED)261261- old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));262262- }263263- pgste = pgste_set_pte(ptep, pgste, new);264264- pgste_set_unlock(ptep, pgste);265265- } else {266266- set_pte(ptep, new);267267- }268268- return old;269269-}270270-271118pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,272119 pte_t *ptep, pte_t new)273120{274274- pgste_t pgste;275121 pte_t old;276276- int nodat;277122278123 preempt_disable();279279- pgste = ptep_xchg_start(mm, addr, ptep);280280- nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);281281- old = ptep_flush_direct(mm, addr, ptep, nodat);282282- old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);124124+ old = ptep_flush_direct(mm, addr, ptep, 1);125125+ set_pte(ptep, new);283126 preempt_enable();284127 return old;285128}···156313pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,157314 pte_t *ptep, pte_t new)158315{159159- pgste_t pgste;160316 pte_t old;161161- int nodat;162317163318 preempt_disable();164164- pgste = ptep_xchg_start(mm, addr, ptep);165165- nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);166166- old = ptep_flush_lazy(mm, addr, ptep, nodat);167167- old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);319319+ old = ptep_flush_lazy(mm, addr, ptep, 1);320320+ set_pte(ptep, new);168321 preempt_enable();169322 return old;170323}···169330pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,170331 pte_t *ptep)171332{172172- pgste_t pgste;173173- pte_t old;174174- int nodat;175175- struct mm_struct *mm = vma->vm_mm;176176-177177- pgste = ptep_xchg_start(mm, addr, ptep);178178- nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);179179- old = ptep_flush_lazy(mm, addr, ptep, nodat);180180- if (mm_has_pgste(mm)) {181181- pgste = pgste_update_all(old, pgste, mm);182182- pgste_set(ptep, pgste);183183- }184184- return old;333333+ return ptep_flush_lazy(vma->vm_mm, addr, ptep, 1);185334}186335187336void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,188337 pte_t *ptep, pte_t old_pte, pte_t pte)189338{190190- pgste_t pgste;191191- struct mm_struct *mm = vma->vm_mm;192192-193193- if (mm_has_pgste(mm)) {194194- pgste = pgste_get(ptep);195195- pgste_set_key(ptep, pgste, pte, mm);196196- pgste = pgste_set_pte(ptep, pgste, pte);197197- pgste_set_unlock(ptep, pgste);198198- } else {199199- set_pte(ptep, pte);200200- }339339+ set_pte(ptep, pte);201340}202341203342static inline void pmdp_idte_local(struct mm_struct *mm,204343 unsigned long addr, pmd_t *pmdp)205344{206345 if (machine_has_tlb_guest())207207- __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,208208- mm->context.asce, IDTE_LOCAL);346346+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL);209347 else210348 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);211349}···235419 atomic_dec(&mm->context.flush_count);236420 return old;237421}238238-239239-#ifdef CONFIG_PGSTE240240-static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)241241-{242242- struct vm_area_struct *vma;243243- pgd_t *pgd;244244- p4d_t *p4d;245245- pud_t *pud;246246-247247- /* We need a valid VMA, otherwise this is clearly a fault. */248248- vma = vma_lookup(mm, addr);249249- if (!vma)250250- return -EFAULT;251251-252252- pgd = pgd_offset(mm, addr);253253- if (!pgd_present(*pgd))254254- return -ENOENT;255255-256256- p4d = p4d_offset(pgd, addr);257257- if (!p4d_present(*p4d))258258- return -ENOENT;259259-260260- pud = pud_offset(p4d, addr);261261- if (!pud_present(*pud))262262- return -ENOENT;263263-264264- /* Large PUDs are not supported yet. */265265- if (pud_leaf(*pud))266266- return -EFAULT;267267-268268- *pmdp = pmd_offset(pud, addr);269269- return 0;270270-}271271-#endif272422273423pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,274424 pmd_t *pmdp, pmd_t new)···353571 return pgtable;354572}355573#endif /* CONFIG_TRANSPARENT_HUGEPAGE */356356-357357-#ifdef CONFIG_PGSTE358358-void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,359359- pte_t *ptep, pte_t entry)360360-{361361- pgste_t pgste;362362-363363- /* the mm_has_pgste() check is done in set_pte_at() */364364- preempt_disable();365365- pgste = pgste_get_lock(ptep);366366- pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);367367- pgste_set_key(ptep, pgste, entry, mm);368368- pgste = pgste_set_pte(ptep, pgste, entry);369369- pgste_set_unlock(ptep, pgste);370370- preempt_enable();371371-}372372-373373-void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)374374-{375375- pgste_t pgste;376376-377377- preempt_disable();378378- pgste = pgste_get_lock(ptep);379379- pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);380380- pgste_set_unlock(ptep, pgste);381381- preempt_enable();382382-}383383-384384-/**385385- * ptep_force_prot - change access rights of a locked pte386386- * @mm: pointer to the process mm_struct387387- * @addr: virtual address in the guest address space388388- * @ptep: pointer to the page table entry389389- * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE390390- * @bit: pgste bit to set (e.g. for notification)391391- *392392- * Returns 0 if the access rights were changed and -EAGAIN if the current393393- * and requested access rights are incompatible.394394- */395395-int ptep_force_prot(struct mm_struct *mm, unsigned long addr,396396- pte_t *ptep, int prot, unsigned long bit)397397-{398398- pte_t entry;399399- pgste_t pgste;400400- int pte_i, pte_p, nodat;401401-402402- pgste = pgste_get_lock(ptep);403403- entry = *ptep;404404- /* Check pte entry after all locks have been acquired */405405- pte_i = pte_val(entry) & _PAGE_INVALID;406406- pte_p = pte_val(entry) & _PAGE_PROTECT;407407- if ((pte_i && (prot != PROT_NONE)) ||408408- (pte_p && (prot & PROT_WRITE))) {409409- pgste_set_unlock(ptep, pgste);410410- return -EAGAIN;411411- }412412- /* Change access rights and set pgste bit */413413- nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);414414- if (prot == PROT_NONE && !pte_i) {415415- ptep_flush_direct(mm, addr, ptep, nodat);416416- pgste = pgste_update_all(entry, pgste, mm);417417- entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));418418- }419419- if (prot == PROT_READ && !pte_p) {420420- ptep_flush_direct(mm, addr, ptep, nodat);421421- entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));422422- entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));423423- }424424- pgste = set_pgste_bit(pgste, bit);425425- pgste = pgste_set_pte(ptep, pgste, entry);426426- pgste_set_unlock(ptep, pgste);427427- return 0;428428-}429429-430430-int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,431431- pte_t *sptep, pte_t *tptep, pte_t pte)432432-{433433- pgste_t spgste, tpgste;434434- pte_t spte, tpte;435435- int rc = -EAGAIN;436436-437437- if (!(pte_val(*tptep) & _PAGE_INVALID))438438- return 0; /* already shadowed */439439- spgste = pgste_get_lock(sptep);440440- spte = *sptep;441441- if (!(pte_val(spte) & _PAGE_INVALID) &&442442- !((pte_val(spte) & _PAGE_PROTECT) &&443443- !(pte_val(pte) & _PAGE_PROTECT))) {444444- spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);445445- tpgste = pgste_get_lock(tptep);446446- tpte = __pte((pte_val(spte) & PAGE_MASK) |447447- (pte_val(pte) & _PAGE_PROTECT));448448- /* don't touch the storage key - it belongs to parent pgste */449449- tpgste = pgste_set_pte(tptep, tpgste, tpte);450450- pgste_set_unlock(tptep, tpgste);451451- rc = 1;452452- }453453- pgste_set_unlock(sptep, spgste);454454- return rc;455455-}456456-457457-void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)458458-{459459- pgste_t pgste;460460- int nodat;461461-462462- pgste = pgste_get_lock(ptep);463463- /* notifier is called by the caller */464464- nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);465465- ptep_flush_direct(mm, saddr, ptep, nodat);466466- /* don't touch the storage key - it belongs to parent pgste */467467- pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));468468- pgste_set_unlock(ptep, pgste);469469-}470470-471471-static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)472472-{473473- if (softleaf_is_swap(entry))474474- dec_mm_counter(mm, MM_SWAPENTS);475475- else if (softleaf_is_migration(entry)) {476476- struct folio *folio = softleaf_to_folio(entry);477477-478478- dec_mm_counter(mm, mm_counter(folio));479479- }480480- free_swap_and_cache(entry);481481-}482482-483483-void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,484484- pte_t *ptep, int reset)485485-{486486- unsigned long pgstev;487487- pgste_t pgste;488488- pte_t pte;489489-490490- /* Zap unused and logically-zero pages */491491- preempt_disable();492492- pgste = pgste_get_lock(ptep);493493- pgstev = pgste_val(pgste);494494- pte = *ptep;495495- if (!reset && pte_swap(pte) &&496496- ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||497497- (pgstev & _PGSTE_GPS_ZERO))) {498498- ptep_zap_softleaf_entry(mm, softleaf_from_pte(pte));499499- pte_clear(mm, addr, ptep);500500- }501501- if (reset)502502- pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);503503- pgste_set_unlock(ptep, pgste);504504- preempt_enable();505505-}506506-507507-void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)508508-{509509- unsigned long ptev;510510- pgste_t pgste;511511-512512- /* Clear storage key ACC and F, but set R/C */513513- preempt_disable();514514- pgste = pgste_get_lock(ptep);515515- pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);516516- pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);517517- ptev = pte_val(*ptep);518518- if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))519519- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);520520- pgste_set_unlock(ptep, pgste);521521- preempt_enable();522522-}523523-524524-/*525525- * Test and reset if a guest page is dirty526526- */527527-bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,528528- pte_t *ptep)529529-{530530- pgste_t pgste;531531- pte_t pte;532532- bool dirty;533533- int nodat;534534-535535- pgste = pgste_get_lock(ptep);536536- dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);537537- pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);538538- pte = *ptep;539539- if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {540540- pgste = pgste_pte_notify(mm, addr, ptep, pgste);541541- nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);542542- ptep_ipte_global(mm, addr, ptep, nodat);543543- if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))544544- pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));545545- else546546- pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));547547- set_pte(ptep, pte);548548- }549549- pgste_set_unlock(ptep, pgste);550550- return dirty;551551-}552552-EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);553553-554554-int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,555555- unsigned char key, bool nq)556556-{557557- unsigned long keyul, paddr;558558- spinlock_t *ptl;559559- pgste_t old, new;560560- pmd_t *pmdp;561561- pte_t *ptep;562562-563563- /*564564- * If we don't have a PTE table and if there is no huge page mapped,565565- * we can ignore attempts to set the key to 0, because it already is 0.566566- */567567- switch (pmd_lookup(mm, addr, &pmdp)) {568568- case -ENOENT:569569- return key ? -EFAULT : 0;570570- case 0:571571- break;572572- default:573573- return -EFAULT;574574- }575575-again:576576- ptl = pmd_lock(mm, pmdp);577577- if (!pmd_present(*pmdp)) {578578- spin_unlock(ptl);579579- return key ? -EFAULT : 0;580580- }581581-582582- if (pmd_leaf(*pmdp)) {583583- paddr = pmd_val(*pmdp) & HPAGE_MASK;584584- paddr |= addr & ~HPAGE_MASK;585585- /*586586- * Huge pmds need quiescing operations, they are587587- * always mapped.588588- */589589- page_set_storage_key(paddr, key, 1);590590- spin_unlock(ptl);591591- return 0;592592- }593593- spin_unlock(ptl);594594-595595- ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);596596- if (!ptep)597597- goto again;598598- new = old = pgste_get_lock(ptep);599599- new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |600600- PGSTE_ACC_BITS | PGSTE_FP_BIT);601601- keyul = (unsigned long) key;602602- new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);603603- new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);604604- if (!(pte_val(*ptep) & _PAGE_INVALID)) {605605- unsigned long bits, skey;606606-607607- paddr = pte_val(*ptep) & PAGE_MASK;608608- skey = (unsigned long) page_get_storage_key(paddr);609609- bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);610610- skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);611611- /* Set storage key ACC and FP */612612- page_set_storage_key(paddr, skey, !nq);613613- /* Merge host changed & referenced into pgste */614614- new = set_pgste_bit(new, bits << 52);615615- }616616- /* changing the guest storage key is considered a change of the page */617617- if ((pgste_val(new) ^ pgste_val(old)) &618618- (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))619619- new = set_pgste_bit(new, PGSTE_UC_BIT);620620-621621- pgste_set_unlock(ptep, new);622622- pte_unmap_unlock(ptep, ptl);623623- return 0;624624-}625625-EXPORT_SYMBOL(set_guest_storage_key);626626-627627-/*628628- * Conditionally set a guest storage key (handling csske).629629- * oldkey will be updated when either mr or mc is set and a pointer is given.630630- *631631- * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest632632- * storage key was updated and -EFAULT on access errors.633633- */634634-int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,635635- unsigned char key, unsigned char *oldkey,636636- bool nq, bool mr, bool mc)637637-{638638- unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;639639- int rc;640640-641641- /* we can drop the pgste lock between getting and setting the key */642642- if (mr | mc) {643643- rc = get_guest_storage_key(current->mm, addr, &tmp);644644- if (rc)645645- return rc;646646- if (oldkey)647647- *oldkey = tmp;648648- if (!mr)649649- mask |= _PAGE_REFERENCED;650650- if (!mc)651651- mask |= _PAGE_CHANGED;652652- if (!((tmp ^ key) & mask))653653- return 0;654654- }655655- rc = set_guest_storage_key(current->mm, addr, key, nq);656656- return rc < 0 ? rc : 1;657657-}658658-EXPORT_SYMBOL(cond_set_guest_storage_key);659659-660660-/*661661- * Reset a guest reference bit (rrbe), returning the reference and changed bit.662662- *663663- * Returns < 0 in case of error, otherwise the cc to be reported to the guest.664664- */665665-int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)666666-{667667- spinlock_t *ptl;668668- unsigned long paddr;669669- pgste_t old, new;670670- pmd_t *pmdp;671671- pte_t *ptep;672672- int cc = 0;673673-674674- /*675675- * If we don't have a PTE table and if there is no huge page mapped,676676- * the storage key is 0 and there is nothing for us to do.677677- */678678- switch (pmd_lookup(mm, addr, &pmdp)) {679679- case -ENOENT:680680- return 0;681681- case 0:682682- break;683683- default:684684- return -EFAULT;685685- }686686-again:687687- ptl = pmd_lock(mm, pmdp);688688- if (!pmd_present(*pmdp)) {689689- spin_unlock(ptl);690690- return 0;691691- }692692-693693- if (pmd_leaf(*pmdp)) {694694- paddr = pmd_val(*pmdp) & HPAGE_MASK;695695- paddr |= addr & ~HPAGE_MASK;696696- cc = page_reset_referenced(paddr);697697- spin_unlock(ptl);698698- return cc;699699- }700700- spin_unlock(ptl);701701-702702- ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);703703- if (!ptep)704704- goto again;705705- new = old = pgste_get_lock(ptep);706706- /* Reset guest reference bit only */707707- new = clear_pgste_bit(new, PGSTE_GR_BIT);708708-709709- if (!(pte_val(*ptep) & _PAGE_INVALID)) {710710- paddr = pte_val(*ptep) & PAGE_MASK;711711- cc = page_reset_referenced(paddr);712712- /* Merge real referenced bit into host-set */713713- new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);714714- }715715- /* Reflect guest's logical view, not physical */716716- cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;717717- /* Changing the guest storage key is considered a change of the page */718718- if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)719719- new = set_pgste_bit(new, PGSTE_UC_BIT);720720-721721- pgste_set_unlock(ptep, new);722722- pte_unmap_unlock(ptep, ptl);723723- return cc;724724-}725725-EXPORT_SYMBOL(reset_guest_reference_bit);726726-727727-int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,728728- unsigned char *key)729729-{730730- unsigned long paddr;731731- spinlock_t *ptl;732732- pgste_t pgste;733733- pmd_t *pmdp;734734- pte_t *ptep;735735-736736- /*737737- * If we don't have a PTE table and if there is no huge page mapped,738738- * the storage key is 0.739739- */740740- *key = 0;741741-742742- switch (pmd_lookup(mm, addr, &pmdp)) {743743- case -ENOENT:744744- return 0;745745- case 0:746746- break;747747- default:748748- return -EFAULT;749749- }750750-again:751751- ptl = pmd_lock(mm, pmdp);752752- if (!pmd_present(*pmdp)) {753753- spin_unlock(ptl);754754- return 0;755755- }756756-757757- if (pmd_leaf(*pmdp)) {758758- paddr = pmd_val(*pmdp) & HPAGE_MASK;759759- paddr |= addr & ~HPAGE_MASK;760760- *key = page_get_storage_key(paddr);761761- spin_unlock(ptl);762762- return 0;763763- }764764- spin_unlock(ptl);765765-766766- ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);767767- if (!ptep)768768- goto again;769769- pgste = pgste_get_lock(ptep);770770- *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;771771- paddr = pte_val(*ptep) & PAGE_MASK;772772- if (!(pte_val(*ptep) & _PAGE_INVALID))773773- *key = page_get_storage_key(paddr);774774- /* Reflect guest's logical view, not physical */775775- *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;776776- pgste_set_unlock(ptep, pgste);777777- pte_unmap_unlock(ptep, ptl);778778- return 0;779779-}780780-EXPORT_SYMBOL(get_guest_storage_key);781781-782782-/**783783- * pgste_perform_essa - perform ESSA actions on the PGSTE.784784- * @mm: the memory context. It must have PGSTEs, no check is performed here!785785- * @hva: the host virtual address of the page whose PGSTE is to be processed786786- * @orc: the specific action to perform, see the ESSA_SET_* macros.787787- * @oldpte: the PTE will be saved there if the pointer is not NULL.788788- * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.789789- *790790- * Return: 1 if the page is to be added to the CBRL, otherwise 0,791791- * or < 0 in case of error. -EINVAL is returned for invalid values792792- * of orc, -EFAULT for invalid addresses.793793- */794794-int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,795795- unsigned long *oldpte, unsigned long *oldpgste)796796-{797797- struct vm_area_struct *vma;798798- unsigned long pgstev;799799- spinlock_t *ptl;800800- pgste_t pgste;801801- pte_t *ptep;802802- int res = 0;803803-804804- WARN_ON_ONCE(orc > ESSA_MAX);805805- if (unlikely(orc > ESSA_MAX))806806- return -EINVAL;807807-808808- vma = vma_lookup(mm, hva);809809- if (!vma || is_vm_hugetlb_page(vma))810810- return -EFAULT;811811- ptep = get_locked_pte(mm, hva, &ptl);812812- if (unlikely(!ptep))813813- return -EFAULT;814814- pgste = pgste_get_lock(ptep);815815- pgstev = pgste_val(pgste);816816- if (oldpte)817817- *oldpte = pte_val(*ptep);818818- if (oldpgste)819819- *oldpgste = pgstev;820820-821821- switch (orc) {822822- case ESSA_GET_STATE:823823- break;824824- case ESSA_SET_STABLE:825825- pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);826826- pgstev |= _PGSTE_GPS_USAGE_STABLE;827827- break;828828- case ESSA_SET_UNUSED:829829- pgstev &= ~_PGSTE_GPS_USAGE_MASK;830830- pgstev |= _PGSTE_GPS_USAGE_UNUSED;831831- if (pte_val(*ptep) & _PAGE_INVALID)832832- res = 1;833833- break;834834- case ESSA_SET_VOLATILE:835835- pgstev &= ~_PGSTE_GPS_USAGE_MASK;836836- pgstev |= _PGSTE_GPS_USAGE_VOLATILE;837837- if (pte_val(*ptep) & _PAGE_INVALID)838838- res = 1;839839- break;840840- case ESSA_SET_POT_VOLATILE:841841- pgstev &= ~_PGSTE_GPS_USAGE_MASK;842842- if (!(pte_val(*ptep) & _PAGE_INVALID)) {843843- pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;844844- break;845845- }846846- if (pgstev & _PGSTE_GPS_ZERO) {847847- pgstev |= _PGSTE_GPS_USAGE_VOLATILE;848848- break;849849- }850850- if (!(pgstev & PGSTE_GC_BIT)) {851851- pgstev |= _PGSTE_GPS_USAGE_VOLATILE;852852- res = 1;853853- break;854854- }855855- break;856856- case ESSA_SET_STABLE_RESIDENT:857857- pgstev &= ~_PGSTE_GPS_USAGE_MASK;858858- pgstev |= _PGSTE_GPS_USAGE_STABLE;859859- /*860860- * Since the resident state can go away any time after this861861- * call, we will not make this page resident. We can revisit862862- * this decision if a guest will ever start using this.863863- */864864- break;865865- case ESSA_SET_STABLE_IF_RESIDENT:866866- if (!(pte_val(*ptep) & _PAGE_INVALID)) {867867- pgstev &= ~_PGSTE_GPS_USAGE_MASK;868868- pgstev |= _PGSTE_GPS_USAGE_STABLE;869869- }870870- break;871871- case ESSA_SET_STABLE_NODAT:872872- pgstev &= ~_PGSTE_GPS_USAGE_MASK;873873- pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;874874- break;875875- default:876876- /* we should never get here! */877877- break;878878- }879879- /* If we are discarding a page, set it to logical zero */880880- if (res)881881- pgstev |= _PGSTE_GPS_ZERO;882882-883883- pgste = __pgste(pgstev);884884- pgste_set_unlock(ptep, pgste);885885- pte_unmap_unlock(ptep, ptl);886886- return res;887887-}888888-EXPORT_SYMBOL(pgste_perform_essa);889889-890890-/**891891- * set_pgste_bits - set specific PGSTE bits.892892- * @mm: the memory context. It must have PGSTEs, no check is performed here!893893- * @hva: the host virtual address of the page whose PGSTE is to be processed894894- * @bits: a bitmask representing the bits that will be touched895895- * @value: the values of the bits to be written. Only the bits in the mask896896- * will be written.897897- *898898- * Return: 0 on success, < 0 in case of error.899899- */900900-int set_pgste_bits(struct mm_struct *mm, unsigned long hva,901901- unsigned long bits, unsigned long value)902902-{903903- struct vm_area_struct *vma;904904- spinlock_t *ptl;905905- pgste_t new;906906- pte_t *ptep;907907-908908- vma = vma_lookup(mm, hva);909909- if (!vma || is_vm_hugetlb_page(vma))910910- return -EFAULT;911911- ptep = get_locked_pte(mm, hva, &ptl);912912- if (unlikely(!ptep))913913- return -EFAULT;914914- new = pgste_get_lock(ptep);915915-916916- new = clear_pgste_bit(new, bits);917917- new = set_pgste_bit(new, value & bits);918918-919919- pgste_set_unlock(ptep, new);920920- pte_unmap_unlock(ptep, ptl);921921- return 0;922922-}923923-EXPORT_SYMBOL(set_pgste_bits);924924-925925-/**926926- * get_pgste - get the current PGSTE for the given address.927927- * @mm: the memory context. It must have PGSTEs, no check is performed here!928928- * @hva: the host virtual address of the page whose PGSTE is to be processed929929- * @pgstep: will be written with the current PGSTE for the given address.930930- *931931- * Return: 0 on success, < 0 in case of error.932932- */933933-int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)934934-{935935- struct vm_area_struct *vma;936936- spinlock_t *ptl;937937- pte_t *ptep;938938-939939- vma = vma_lookup(mm, hva);940940- if (!vma || is_vm_hugetlb_page(vma))941941- return -EFAULT;942942- ptep = get_locked_pte(mm, hva, &ptl);943943- if (unlikely(!ptep))944944- return -EFAULT;945945- *pgstep = pgste_val(pgste_get(ptep));946946- pte_unmap_unlock(ptep, ptl);947947- return 0;948948-}949949-EXPORT_SYMBOL(get_pgste);950950-#endif
-9
mm/khugepaged.c
···343343{344344 switch (advice) {345345 case MADV_HUGEPAGE:346346-#ifdef CONFIG_S390347347- /*348348- * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390349349- * can't handle this properly after s390_enable_sie, so we simply350350- * ignore the madvise to prevent qemu from causing a SIGSEGV.351351- */352352- if (mm_has_pgste(vma->vm_mm))353353- return 0;354354-#endif355346 *vm_flags &= ~VM_NOHUGEPAGE;356347 *vm_flags |= VM_HUGEPAGE;357348 /*