···11-/* SPDX-License-Identifier: GPL-2.0 */22-/*33- * KVM guest address space mapping code44- *55- * Copyright IBM Corp. 2007, 201666- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>77- */88-99-#ifndef _ASM_S390_GMAP_H1010-#define _ASM_S390_GMAP_H1111-1212-#include <linux/radix-tree.h>1313-#include <linux/refcount.h>1414-1515-/* Generic bits for GMAP notification on DAT table entry changes. */1616-#define GMAP_NOTIFY_SHADOW 0x21717-#define GMAP_NOTIFY_MPROT 0x11818-1919-/* Status bits only for huge segment entries */2020-#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */2121-#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */2222-2323-/**2424- * struct gmap_struct - guest address space2525- * @list: list head for the mm->context gmap list2626- * @mm: pointer to the parent mm_struct2727- * @guest_to_host: radix tree with guest to host address translation2828- * @host_to_guest: radix tree with pointer to segment table entries2929- * @guest_table_lock: spinlock to protect all entries in the guest page table3030- * @ref_count: reference counter for the gmap structure3131- * @table: pointer to the page directory3232- * @asce: address space control element for gmap page table3333- * @pfault_enabled: defines if pfaults are applicable for the guest3434- * @guest_handle: protected virtual machine handle for the ultravisor3535- * @host_to_rmap: radix tree with gmap_rmap lists3636- * @children: list of shadow gmap structures3737- * @shadow_lock: spinlock to protect the shadow gmap list3838- * @parent: pointer to the parent gmap for shadow guest address spaces3939- * @orig_asce: ASCE for which the shadow page table has been created4040- * @edat_level: edat level to be used for the shadow translation4141- * @removed: flag to indicate if a shadow guest address space has been removed4242- * @initialized: flag to indicate if a shadow guest address space can be used4343- */4444-struct gmap {4545- struct list_head list;4646- struct mm_struct *mm;4747- struct radix_tree_root guest_to_host;4848- struct radix_tree_root host_to_guest;4949- spinlock_t guest_table_lock;5050- refcount_t ref_count;5151- unsigned long *table;5252- unsigned long asce;5353- unsigned long asce_end;5454- void *private;5555- bool pfault_enabled;5656- /* only set for protected virtual machines */5757- unsigned long guest_handle;5858- /* Additional data for shadow guest address spaces */5959- struct radix_tree_root host_to_rmap;6060- struct list_head children;6161- spinlock_t shadow_lock;6262- struct gmap *parent;6363- unsigned long orig_asce;6464- int edat_level;6565- bool removed;6666- bool initialized;6767-};6868-6969-/**7070- * struct gmap_rmap - reverse mapping for shadow page table entries7171- * @next: pointer to next rmap in the list7272- * @raddr: virtual rmap address in the shadow guest address space7373- */7474-struct gmap_rmap {7575- struct gmap_rmap *next;7676- unsigned long raddr;7777-};7878-7979-#define gmap_for_each_rmap(pos, head) \8080- for (pos = (head); pos; pos = pos->next)8181-8282-#define gmap_for_each_rmap_safe(pos, n, head) \8383- for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)8484-8585-/**8686- * struct gmap_notifier - notify function block for page invalidation8787- * @notifier_call: address of callback function8888- */8989-struct gmap_notifier {9090- struct list_head list;9191- struct rcu_head rcu;9292- void (*notifier_call)(struct gmap *gmap, unsigned long start,9393- unsigned long end);9494-};9595-9696-static inline int gmap_is_shadow(struct gmap *gmap)9797-{9898- return !!gmap->parent;9999-}100100-101101-struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);102102-void gmap_remove(struct gmap *gmap);103103-struct gmap *gmap_get(struct gmap *gmap);104104-void gmap_put(struct gmap *gmap);105105-void gmap_free(struct gmap *gmap);106106-struct gmap *gmap_alloc(unsigned long limit);107107-108108-int gmap_map_segment(struct gmap *gmap, unsigned long from,109109- unsigned long to, unsigned long len);110110-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);111111-unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);112112-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);113113-void __gmap_zap(struct gmap *, unsigned long gaddr);114114-void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);115115-116116-int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);117117-118118-void gmap_unshadow(struct gmap *sg);119119-int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,120120- int fake);121121-int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,122122- int fake);123123-int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,124124- int fake);125125-int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,126126- int fake);127127-int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);128128-129129-void gmap_register_pte_notifier(struct gmap_notifier *);130130-void gmap_unregister_pte_notifier(struct gmap_notifier *);131131-132132-int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);133133-134134-void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],135135- unsigned long gaddr, unsigned long vmaddr);136136-int s390_replace_asce(struct gmap *gmap);137137-void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);138138-int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,139139- unsigned long end, bool interruptible);140140-unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);141141-142142-/**143143- * s390_uv_destroy_range - Destroy a range of pages in the given mm.144144- * @mm: the mm on which to operate on145145- * @start: the start of the range146146- * @end: the end of the range147147- *148148- * This function will call cond_sched, so it should not generate stalls, but149149- * it will otherwise only return when it completed.150150- */151151-static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,152152- unsigned long end)153153-{154154- (void)__s390_uv_destroy_range(mm, start, end, false);155155-}156156-157157-/**158158- * s390_uv_destroy_range_interruptible - Destroy a range of pages in the159159- * given mm, but stop when a fatal signal is received.160160- * @mm: the mm on which to operate on161161- * @start: the start of the range162162- * @end: the end of the range163163- *164164- * This function will call cond_sched, so it should not generate stalls. If165165- * a fatal signal is received, it will return with -EINTR immediately,166166- * without finishing destroying the whole range. Upon successful167167- * completion, 0 is returned.168168- */169169-static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,170170- unsigned long end)171171-{172172- return __s390_uv_destroy_range(mm, start, end, true);173173-}174174-#endif /* _ASM_S390_GMAP_H */
-8
arch/s390/include/asm/pgtable.h
···13691369void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,13701370 pte_t *ptep, pte_t entry);13711371void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);13721372-void ptep_notify(struct mm_struct *mm, unsigned long addr,13731373- pte_t *ptep, unsigned long bits);13741372int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,13751373 pte_t *ptep, int prot, unsigned long bit);13761374void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,···13941396int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);13951397int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,13961398 unsigned long *oldpte, unsigned long *oldpgste);13971397-void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);13981398-void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);13991399-void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);1400139914011400#define pgprot_writecombine pgprot_writecombine14021401pgprot_t pgprot_writecombine(pgprot_t prot);···20182023extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);20192024extern void vmem_unmap_4k_page(unsigned long addr);20202025extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);20212021-extern int s390_enable_sie(void);20222022-extern int s390_enable_skey(void);20232023-extern void s390_reset_cmma(struct mm_struct *mm);2024202620252027/* s390 has a private copy of get unmapped area to deal with cache synonyms */20262028#define HAVE_ARCH_UNMAPPED_AREA
···11-// SPDX-License-Identifier: GPL-2.022-/*33- * KVM guest address space mapping code44- *55- * Copyright IBM Corp. 2007, 202066- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>77- * David Hildenbrand <david@redhat.com>88- * Janosch Frank <frankja@linux.vnet.ibm.com>99- */1010-1111-#include <linux/cpufeature.h>1212-#include <linux/export.h>1313-#include <linux/kernel.h>1414-#include <linux/pagewalk.h>1515-#include <linux/swap.h>1616-#include <linux/smp.h>1717-#include <linux/spinlock.h>1818-#include <linux/slab.h>1919-#include <linux/swapops.h>2020-#include <linux/ksm.h>2121-#include <linux/mman.h>2222-#include <linux/pgtable.h>2323-#include <asm/page-states.h>2424-#include <asm/pgalloc.h>2525-#include <asm/machine.h>2626-#include <asm/gmap_helpers.h>2727-#include <asm/gmap.h>2828-#include <asm/page.h>2929-3030-/*3131- * The address is saved in a radix tree directly; NULL would be ambiguous,3232- * since 0 is a valid address, and NULL is returned when nothing was found.3333- * The lower bits are ignored by all users of the macro, so it can be used3434- * to distinguish a valid address 0 from a NULL.3535- */3636-#define VALID_GADDR_FLAG 13737-#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)3838-#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)3939-4040-#define GMAP_SHADOW_FAKE_TABLE 1ULL4141-4242-static struct page *gmap_alloc_crst(void)4343-{4444- struct page *page;4545-4646- page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);4747- if (!page)4848- return NULL;4949- __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);5050- return page;5151-}5252-5353-/**5454- * gmap_alloc - allocate and initialize a guest address space5555- * @limit: maximum address of the gmap address space5656- *5757- * Returns a guest address space structure.5858- */5959-struct gmap *gmap_alloc(unsigned long limit)6060-{6161- struct gmap *gmap;6262- struct page *page;6363- unsigned long *table;6464- unsigned long etype, atype;6565-6666- if (limit < _REGION3_SIZE) {6767- limit = _REGION3_SIZE - 1;6868- atype = _ASCE_TYPE_SEGMENT;6969- etype = _SEGMENT_ENTRY_EMPTY;7070- } else if (limit < _REGION2_SIZE) {7171- limit = _REGION2_SIZE - 1;7272- atype = _ASCE_TYPE_REGION3;7373- etype = _REGION3_ENTRY_EMPTY;7474- } else if (limit < _REGION1_SIZE) {7575- limit = _REGION1_SIZE - 1;7676- atype = _ASCE_TYPE_REGION2;7777- etype = _REGION2_ENTRY_EMPTY;7878- } else {7979- limit = -1UL;8080- atype = _ASCE_TYPE_REGION1;8181- etype = _REGION1_ENTRY_EMPTY;8282- }8383- gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);8484- if (!gmap)8585- goto out;8686- INIT_LIST_HEAD(&gmap->children);8787- INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);8888- INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);8989- INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);9090- spin_lock_init(&gmap->guest_table_lock);9191- spin_lock_init(&gmap->shadow_lock);9292- refcount_set(&gmap->ref_count, 1);9393- page = gmap_alloc_crst();9494- if (!page)9595- goto out_free;9696- table = page_to_virt(page);9797- crst_table_init(table, etype);9898- gmap->table = table;9999- gmap->asce = atype | _ASCE_TABLE_LENGTH |100100- _ASCE_USER_BITS | __pa(table);101101- gmap->asce_end = limit;102102- return gmap;103103-104104-out_free:105105- kfree(gmap);106106-out:107107- return NULL;108108-}109109-EXPORT_SYMBOL_GPL(gmap_alloc);110110-111111-/**112112- * gmap_create - create a guest address space113113- * @mm: pointer to the parent mm_struct114114- * @limit: maximum size of the gmap address space115115- *116116- * Returns a guest address space structure.117117- */118118-struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)119119-{120120- struct gmap *gmap;121121- unsigned long gmap_asce;122122-123123- gmap = gmap_alloc(limit);124124- if (!gmap)125125- return NULL;126126- gmap->mm = mm;127127- spin_lock(&mm->context.lock);128128- list_add_rcu(&gmap->list, &mm->context.gmap_list);129129- if (list_is_singular(&mm->context.gmap_list))130130- gmap_asce = gmap->asce;131131- else132132- gmap_asce = -1UL;133133- WRITE_ONCE(mm->context.gmap_asce, gmap_asce);134134- spin_unlock(&mm->context.lock);135135- return gmap;136136-}137137-EXPORT_SYMBOL_GPL(gmap_create);138138-139139-static void gmap_flush_tlb(struct gmap *gmap)140140-{141141- __tlb_flush_idte(gmap->asce);142142-}143143-144144-static void gmap_radix_tree_free(struct radix_tree_root *root)145145-{146146- struct radix_tree_iter iter;147147- unsigned long indices[16];148148- unsigned long index;149149- void __rcu **slot;150150- int i, nr;151151-152152- /* A radix tree is freed by deleting all of its entries */153153- index = 0;154154- do {155155- nr = 0;156156- radix_tree_for_each_slot(slot, root, &iter, index) {157157- indices[nr] = iter.index;158158- if (++nr == 16)159159- break;160160- }161161- for (i = 0; i < nr; i++) {162162- index = indices[i];163163- radix_tree_delete(root, index);164164- }165165- } while (nr > 0);166166-}167167-168168-static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)169169-{170170- struct gmap_rmap *rmap, *rnext, *head;171171- struct radix_tree_iter iter;172172- unsigned long indices[16];173173- unsigned long index;174174- void __rcu **slot;175175- int i, nr;176176-177177- /* A radix tree is freed by deleting all of its entries */178178- index = 0;179179- do {180180- nr = 0;181181- radix_tree_for_each_slot(slot, root, &iter, index) {182182- indices[nr] = iter.index;183183- if (++nr == 16)184184- break;185185- }186186- for (i = 0; i < nr; i++) {187187- index = indices[i];188188- head = radix_tree_delete(root, index);189189- gmap_for_each_rmap_safe(rmap, rnext, head)190190- kfree(rmap);191191- }192192- } while (nr > 0);193193-}194194-195195-static void gmap_free_crst(unsigned long *table, bool free_ptes)196196-{197197- bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;198198- int i;199199-200200- if (is_segment) {201201- if (!free_ptes)202202- goto out;203203- for (i = 0; i < _CRST_ENTRIES; i++)204204- if (!(table[i] & _SEGMENT_ENTRY_INVALID))205205- page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));206206- } else {207207- for (i = 0; i < _CRST_ENTRIES; i++)208208- if (!(table[i] & _REGION_ENTRY_INVALID))209209- gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);210210- }211211-212212-out:213213- free_pages((unsigned long)table, CRST_ALLOC_ORDER);214214-}215215-216216-/**217217- * gmap_free - free a guest address space218218- * @gmap: pointer to the guest address space structure219219- *220220- * No locks required. There are no references to this gmap anymore.221221- */222222-void gmap_free(struct gmap *gmap)223223-{224224- /* Flush tlb of all gmaps (if not already done for shadows) */225225- if (!(gmap_is_shadow(gmap) && gmap->removed))226226- gmap_flush_tlb(gmap);227227- /* Free all segment & region tables. */228228- gmap_free_crst(gmap->table, gmap_is_shadow(gmap));229229-230230- gmap_radix_tree_free(&gmap->guest_to_host);231231- gmap_radix_tree_free(&gmap->host_to_guest);232232-233233- /* Free additional data for a shadow gmap */234234- if (gmap_is_shadow(gmap)) {235235- gmap_rmap_radix_tree_free(&gmap->host_to_rmap);236236- /* Release reference to the parent */237237- gmap_put(gmap->parent);238238- }239239-240240- kfree(gmap);241241-}242242-EXPORT_SYMBOL_GPL(gmap_free);243243-244244-/**245245- * gmap_get - increase reference counter for guest address space246246- * @gmap: pointer to the guest address space structure247247- *248248- * Returns the gmap pointer249249- */250250-struct gmap *gmap_get(struct gmap *gmap)251251-{252252- refcount_inc(&gmap->ref_count);253253- return gmap;254254-}255255-EXPORT_SYMBOL_GPL(gmap_get);256256-257257-/**258258- * gmap_put - decrease reference counter for guest address space259259- * @gmap: pointer to the guest address space structure260260- *261261- * If the reference counter reaches zero the guest address space is freed.262262- */263263-void gmap_put(struct gmap *gmap)264264-{265265- if (refcount_dec_and_test(&gmap->ref_count))266266- gmap_free(gmap);267267-}268268-EXPORT_SYMBOL_GPL(gmap_put);269269-270270-/**271271- * gmap_remove - remove a guest address space but do not free it yet272272- * @gmap: pointer to the guest address space structure273273- */274274-void gmap_remove(struct gmap *gmap)275275-{276276- struct gmap *sg, *next;277277- unsigned long gmap_asce;278278-279279- /* Remove all shadow gmaps linked to this gmap */280280- if (!list_empty(&gmap->children)) {281281- spin_lock(&gmap->shadow_lock);282282- list_for_each_entry_safe(sg, next, &gmap->children, list) {283283- list_del(&sg->list);284284- gmap_put(sg);285285- }286286- spin_unlock(&gmap->shadow_lock);287287- }288288- /* Remove gmap from the pre-mm list */289289- spin_lock(&gmap->mm->context.lock);290290- list_del_rcu(&gmap->list);291291- if (list_empty(&gmap->mm->context.gmap_list))292292- gmap_asce = 0;293293- else if (list_is_singular(&gmap->mm->context.gmap_list))294294- gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,295295- struct gmap, list)->asce;296296- else297297- gmap_asce = -1UL;298298- WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);299299- spin_unlock(&gmap->mm->context.lock);300300- synchronize_rcu();301301- /* Put reference */302302- gmap_put(gmap);303303-}304304-EXPORT_SYMBOL_GPL(gmap_remove);305305-306306-/*307307- * gmap_alloc_table is assumed to be called with mmap_lock held308308- */309309-static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,310310- unsigned long init, unsigned long gaddr)311311-{312312- struct page *page;313313- unsigned long *new;314314-315315- /* since we dont free the gmap table until gmap_free we can unlock */316316- page = gmap_alloc_crst();317317- if (!page)318318- return -ENOMEM;319319- new = page_to_virt(page);320320- crst_table_init(new, init);321321- spin_lock(&gmap->guest_table_lock);322322- if (*table & _REGION_ENTRY_INVALID) {323323- *table = __pa(new) | _REGION_ENTRY_LENGTH |324324- (*table & _REGION_ENTRY_TYPE_MASK);325325- page = NULL;326326- }327327- spin_unlock(&gmap->guest_table_lock);328328- if (page)329329- __free_pages(page, CRST_ALLOC_ORDER);330330- return 0;331331-}332332-333333-static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)334334-{335335- return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);336336-}337337-338338-static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)339339-{340340- return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);341341-}342342-343343-static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,344344- unsigned long *gaddr)345345-{346346- *gaddr = host_to_guest_delete(gmap, vmaddr);347347- if (IS_GADDR_VALID(*gaddr))348348- return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);349349- return NULL;350350-}351351-352352-/**353353- * __gmap_unlink_by_vmaddr - unlink a single segment via a host address354354- * @gmap: pointer to the guest address space structure355355- * @vmaddr: address in the host process address space356356- *357357- * Returns 1 if a TLB flush is required358358- */359359-static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)360360-{361361- unsigned long gaddr;362362- int flush = 0;363363- pmd_t *pmdp;364364-365365- BUG_ON(gmap_is_shadow(gmap));366366- spin_lock(&gmap->guest_table_lock);367367-368368- pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);369369- if (pmdp) {370370- flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);371371- *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);372372- }373373-374374- spin_unlock(&gmap->guest_table_lock);375375- return flush;376376-}377377-378378-/**379379- * __gmap_unmap_by_gaddr - unmap a single segment via a guest address380380- * @gmap: pointer to the guest address space structure381381- * @gaddr: address in the guest address space382382- *383383- * Returns 1 if a TLB flush is required384384- */385385-static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)386386-{387387- unsigned long vmaddr;388388-389389- vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,390390- gaddr >> PMD_SHIFT);391391- return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;392392-}393393-394394-/**395395- * gmap_unmap_segment - unmap segment from the guest address space396396- * @gmap: pointer to the guest address space structure397397- * @to: address in the guest address space398398- * @len: length of the memory area to unmap399399- *400400- * Returns 0 if the unmap succeeded, -EINVAL if not.401401- */402402-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)403403-{404404- unsigned long off;405405- int flush;406406-407407- BUG_ON(gmap_is_shadow(gmap));408408- if ((to | len) & (PMD_SIZE - 1))409409- return -EINVAL;410410- if (len == 0 || to + len < to)411411- return -EINVAL;412412-413413- flush = 0;414414- mmap_write_lock(gmap->mm);415415- for (off = 0; off < len; off += PMD_SIZE)416416- flush |= __gmap_unmap_by_gaddr(gmap, to + off);417417- mmap_write_unlock(gmap->mm);418418- if (flush)419419- gmap_flush_tlb(gmap);420420- return 0;421421-}422422-EXPORT_SYMBOL_GPL(gmap_unmap_segment);423423-424424-/**425425- * gmap_map_segment - map a segment to the guest address space426426- * @gmap: pointer to the guest address space structure427427- * @from: source address in the parent address space428428- * @to: target address in the guest address space429429- * @len: length of the memory area to map430430- *431431- * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.432432- */433433-int gmap_map_segment(struct gmap *gmap, unsigned long from,434434- unsigned long to, unsigned long len)435435-{436436- unsigned long off;437437- int flush;438438-439439- BUG_ON(gmap_is_shadow(gmap));440440- if ((from | to | len) & (PMD_SIZE - 1))441441- return -EINVAL;442442- if (len == 0 || from + len < from || to + len < to ||443443- from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)444444- return -EINVAL;445445-446446- flush = 0;447447- mmap_write_lock(gmap->mm);448448- for (off = 0; off < len; off += PMD_SIZE) {449449- /* Remove old translation */450450- flush |= __gmap_unmap_by_gaddr(gmap, to + off);451451- /* Store new translation */452452- if (radix_tree_insert(&gmap->guest_to_host,453453- (to + off) >> PMD_SHIFT,454454- (void *) from + off))455455- break;456456- }457457- mmap_write_unlock(gmap->mm);458458- if (flush)459459- gmap_flush_tlb(gmap);460460- if (off >= len)461461- return 0;462462- gmap_unmap_segment(gmap, to, len);463463- return -ENOMEM;464464-}465465-EXPORT_SYMBOL_GPL(gmap_map_segment);466466-467467-/**468468- * __gmap_translate - translate a guest address to a user space address469469- * @gmap: pointer to guest mapping meta data structure470470- * @gaddr: guest address471471- *472472- * Returns user space address which corresponds to the guest address or473473- * -EFAULT if no such mapping exists.474474- * This function does not establish potentially missing page table entries.475475- * The mmap_lock of the mm that belongs to the address space must be held476476- * when this function gets called.477477- *478478- * Note: Can also be called for shadow gmaps.479479- */480480-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)481481-{482482- unsigned long vmaddr;483483-484484- vmaddr = (unsigned long)485485- radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);486486- /* Note: guest_to_host is empty for a shadow gmap */487487- return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;488488-}489489-EXPORT_SYMBOL_GPL(__gmap_translate);490490-491491-/**492492- * gmap_unlink - disconnect a page table from the gmap shadow tables493493- * @mm: pointer to the parent mm_struct494494- * @table: pointer to the host page table495495- * @vmaddr: vm address associated with the host page table496496- */497497-void gmap_unlink(struct mm_struct *mm, unsigned long *table,498498- unsigned long vmaddr)499499-{500500- struct gmap *gmap;501501- int flush;502502-503503- rcu_read_lock();504504- list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {505505- flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);506506- if (flush)507507- gmap_flush_tlb(gmap);508508- }509509- rcu_read_unlock();510510-}511511-512512-static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,513513- unsigned long gaddr);514514-515515-/**516516- * __gmap_link - set up shadow page tables to connect a host to a guest address517517- * @gmap: pointer to guest mapping meta data structure518518- * @gaddr: guest address519519- * @vmaddr: vm address520520- *521521- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT522522- * if the vm address is already mapped to a different guest segment.523523- * The mmap_lock of the mm that belongs to the address space must be held524524- * when this function gets called.525525- */526526-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)527527-{528528- struct mm_struct *mm;529529- unsigned long *table;530530- spinlock_t *ptl;531531- pgd_t *pgd;532532- p4d_t *p4d;533533- pud_t *pud;534534- pmd_t *pmd;535535- u64 unprot;536536- int rc;537537-538538- BUG_ON(gmap_is_shadow(gmap));539539- /* Create higher level tables in the gmap page table */540540- table = gmap->table;541541- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {542542- table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;543543- if ((*table & _REGION_ENTRY_INVALID) &&544544- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,545545- gaddr & _REGION1_MASK))546546- return -ENOMEM;547547- table = __va(*table & _REGION_ENTRY_ORIGIN);548548- }549549- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {550550- table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;551551- if ((*table & _REGION_ENTRY_INVALID) &&552552- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,553553- gaddr & _REGION2_MASK))554554- return -ENOMEM;555555- table = __va(*table & _REGION_ENTRY_ORIGIN);556556- }557557- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {558558- table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;559559- if ((*table & _REGION_ENTRY_INVALID) &&560560- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,561561- gaddr & _REGION3_MASK))562562- return -ENOMEM;563563- table = __va(*table & _REGION_ENTRY_ORIGIN);564564- }565565- table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;566566- /* Walk the parent mm page table */567567- mm = gmap->mm;568568- pgd = pgd_offset(mm, vmaddr);569569- VM_BUG_ON(pgd_none(*pgd));570570- p4d = p4d_offset(pgd, vmaddr);571571- VM_BUG_ON(p4d_none(*p4d));572572- pud = pud_offset(p4d, vmaddr);573573- VM_BUG_ON(pud_none(*pud));574574- /* large puds cannot yet be handled */575575- if (pud_leaf(*pud))576576- return -EFAULT;577577- pmd = pmd_offset(pud, vmaddr);578578- VM_BUG_ON(pmd_none(*pmd));579579- /* Are we allowed to use huge pages? */580580- if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)581581- return -EFAULT;582582- /* Link gmap segment table entry location to page table. */583583- rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);584584- if (rc)585585- return rc;586586- ptl = pmd_lock(mm, pmd);587587- spin_lock(&gmap->guest_table_lock);588588- if (*table == _SEGMENT_ENTRY_EMPTY) {589589- rc = radix_tree_insert(&gmap->host_to_guest,590590- vmaddr >> PMD_SHIFT,591591- (void *)MAKE_VALID_GADDR(gaddr));592592- if (!rc) {593593- if (pmd_leaf(*pmd)) {594594- *table = (pmd_val(*pmd) &595595- _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)596596- | _SEGMENT_ENTRY_GMAP_UC597597- | _SEGMENT_ENTRY;598598- } else599599- *table = (pmd_val(*pmd) &600600- _SEGMENT_ENTRY_HARDWARE_BITS)601601- | _SEGMENT_ENTRY;602602- }603603- } else if (*table & _SEGMENT_ENTRY_PROTECT &&604604- !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {605605- unprot = (u64)*table;606606- unprot &= ~_SEGMENT_ENTRY_PROTECT;607607- unprot |= _SEGMENT_ENTRY_GMAP_UC;608608- gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);609609- }610610- spin_unlock(&gmap->guest_table_lock);611611- spin_unlock(ptl);612612- radix_tree_preload_end();613613- return rc;614614-}615615-EXPORT_SYMBOL(__gmap_link);616616-617617-/*618618- * this function is assumed to be called with mmap_lock held619619- */620620-void __gmap_zap(struct gmap *gmap, unsigned long gaddr)621621-{622622- unsigned long vmaddr;623623-624624- mmap_assert_locked(gmap->mm);625625-626626- /* Find the vm address for the guest address */627627- vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,628628- gaddr >> PMD_SHIFT);629629- if (vmaddr) {630630- vmaddr |= gaddr & ~PMD_MASK;631631- gmap_helper_zap_one_page(gmap->mm, vmaddr);632632- }633633-}634634-EXPORT_SYMBOL_GPL(__gmap_zap);635635-636636-static LIST_HEAD(gmap_notifier_list);637637-static DEFINE_SPINLOCK(gmap_notifier_lock);638638-639639-/**640640- * gmap_register_pte_notifier - register a pte invalidation callback641641- * @nb: pointer to the gmap notifier block642642- */643643-void gmap_register_pte_notifier(struct gmap_notifier *nb)644644-{645645- spin_lock(&gmap_notifier_lock);646646- list_add_rcu(&nb->list, &gmap_notifier_list);647647- spin_unlock(&gmap_notifier_lock);648648-}649649-EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);650650-651651-/**652652- * gmap_unregister_pte_notifier - remove a pte invalidation callback653653- * @nb: pointer to the gmap notifier block654654- */655655-void gmap_unregister_pte_notifier(struct gmap_notifier *nb)656656-{657657- spin_lock(&gmap_notifier_lock);658658- list_del_rcu(&nb->list);659659- spin_unlock(&gmap_notifier_lock);660660- synchronize_rcu();661661-}662662-EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);663663-664664-/**665665- * gmap_call_notifier - call all registered invalidation callbacks666666- * @gmap: pointer to guest mapping meta data structure667667- * @start: start virtual address in the guest address space668668- * @end: end virtual address in the guest address space669669- */670670-static void gmap_call_notifier(struct gmap *gmap, unsigned long start,671671- unsigned long end)672672-{673673- struct gmap_notifier *nb;674674-675675- list_for_each_entry(nb, &gmap_notifier_list, list)676676- nb->notifier_call(gmap, start, end);677677-}678678-679679-/**680680- * gmap_table_walk - walk the gmap page tables681681- * @gmap: pointer to guest mapping meta data structure682682- * @gaddr: virtual address in the guest address space683683- * @level: page table level to stop at684684- *685685- * Returns a table entry pointer for the given guest address and @level686686- * @level=0 : returns a pointer to a page table table entry (or NULL)687687- * @level=1 : returns a pointer to a segment table entry (or NULL)688688- * @level=2 : returns a pointer to a region-3 table entry (or NULL)689689- * @level=3 : returns a pointer to a region-2 table entry (or NULL)690690- * @level=4 : returns a pointer to a region-1 table entry (or NULL)691691- *692692- * Returns NULL if the gmap page tables could not be walked to the693693- * requested level.694694- *695695- * Note: Can also be called for shadow gmaps.696696- */697697-unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)698698-{699699- const int asce_type = gmap->asce & _ASCE_TYPE_MASK;700700- unsigned long *table = gmap->table;701701-702702- if (gmap_is_shadow(gmap) && gmap->removed)703703- return NULL;704704-705705- if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))706706- return NULL;707707-708708- if (asce_type != _ASCE_TYPE_REGION1 &&709709- gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))710710- return NULL;711711-712712- switch (asce_type) {713713- case _ASCE_TYPE_REGION1:714714- table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;715715- if (level == 4)716716- break;717717- if (*table & _REGION_ENTRY_INVALID)718718- return NULL;719719- table = __va(*table & _REGION_ENTRY_ORIGIN);720720- fallthrough;721721- case _ASCE_TYPE_REGION2:722722- table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;723723- if (level == 3)724724- break;725725- if (*table & _REGION_ENTRY_INVALID)726726- return NULL;727727- table = __va(*table & _REGION_ENTRY_ORIGIN);728728- fallthrough;729729- case _ASCE_TYPE_REGION3:730730- table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;731731- if (level == 2)732732- break;733733- if (*table & _REGION_ENTRY_INVALID)734734- return NULL;735735- table = __va(*table & _REGION_ENTRY_ORIGIN);736736- fallthrough;737737- case _ASCE_TYPE_SEGMENT:738738- table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;739739- if (level == 1)740740- break;741741- if (*table & _REGION_ENTRY_INVALID)742742- return NULL;743743- table = __va(*table & _SEGMENT_ENTRY_ORIGIN);744744- table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT;745745- }746746- return table;747747-}748748-EXPORT_SYMBOL(gmap_table_walk);749749-750750-/**751751- * gmap_pte_op_walk - walk the gmap page table, get the page table lock752752- * and return the pte pointer753753- * @gmap: pointer to guest mapping meta data structure754754- * @gaddr: virtual address in the guest address space755755- * @ptl: pointer to the spinlock pointer756756- *757757- * Returns a pointer to the locked pte for a guest address, or NULL758758- */759759-static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,760760- spinlock_t **ptl)761761-{762762- unsigned long *table;763763-764764- BUG_ON(gmap_is_shadow(gmap));765765- /* Walk the gmap page table, lock and get pte pointer */766766- table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */767767- if (!table || *table & _SEGMENT_ENTRY_INVALID)768768- return NULL;769769- return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);770770-}771771-772772-/**773773- * gmap_pte_op_fixup - force a page in and connect the gmap page table774774- * @gmap: pointer to guest mapping meta data structure775775- * @gaddr: virtual address in the guest address space776776- * @vmaddr: address in the host process address space777777- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE778778- *779779- * Returns 0 if the caller can retry __gmap_translate (might fail again),780780- * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing781781- * up or connecting the gmap page table.782782- */783783-static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,784784- unsigned long vmaddr, int prot)785785-{786786- struct mm_struct *mm = gmap->mm;787787- unsigned int fault_flags;788788- bool unlocked = false;789789-790790- BUG_ON(gmap_is_shadow(gmap));791791- fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;792792- if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))793793- return -EFAULT;794794- if (unlocked)795795- /* lost mmap_lock, caller has to retry __gmap_translate */796796- return 0;797797- /* Connect the page tables */798798- return __gmap_link(gmap, gaddr, vmaddr);799799-}800800-801801-/**802802- * gmap_pte_op_end - release the page table lock803803- * @ptep: pointer to the locked pte804804- * @ptl: pointer to the page table spinlock805805- */806806-static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)807807-{808808- pte_unmap_unlock(ptep, ptl);809809-}810810-811811-/**812812- * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock813813- * and return the pmd pointer814814- * @gmap: pointer to guest mapping meta data structure815815- * @gaddr: virtual address in the guest address space816816- *817817- * Returns a pointer to the pmd for a guest address, or NULL818818- */819819-static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)820820-{821821- pmd_t *pmdp;822822-823823- BUG_ON(gmap_is_shadow(gmap));824824- pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);825825- if (!pmdp)826826- return NULL;827827-828828- /* without huge pages, there is no need to take the table lock */829829- if (!gmap->mm->context.allow_gmap_hpage_1m)830830- return pmd_none(*pmdp) ? NULL : pmdp;831831-832832- spin_lock(&gmap->guest_table_lock);833833- if (pmd_none(*pmdp)) {834834- spin_unlock(&gmap->guest_table_lock);835835- return NULL;836836- }837837-838838- /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */839839- if (!pmd_leaf(*pmdp))840840- spin_unlock(&gmap->guest_table_lock);841841- return pmdp;842842-}843843-844844-/**845845- * gmap_pmd_op_end - release the guest_table_lock if needed846846- * @gmap: pointer to the guest mapping meta data structure847847- * @pmdp: pointer to the pmd848848- */849849-static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)850850-{851851- if (pmd_leaf(*pmdp))852852- spin_unlock(&gmap->guest_table_lock);853853-}854854-855855-/*856856- * gmap_protect_pmd - remove access rights to memory and set pmd notification bits857857- * @pmdp: pointer to the pmd to be protected858858- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE859859- * @bits: notification bits to set860860- *861861- * Returns:862862- * 0 if successfully protected863863- * -EAGAIN if a fixup is needed864864- * -EINVAL if unsupported notifier bits have been specified865865- *866866- * Expected to be called with sg->mm->mmap_lock in read and867867- * guest_table_lock held.868868- */869869-static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,870870- pmd_t *pmdp, int prot, unsigned long bits)871871-{872872- int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;873873- int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;874874- pmd_t new = *pmdp;875875-876876- /* Fixup needed */877877- if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))878878- return -EAGAIN;879879-880880- if (prot == PROT_NONE && !pmd_i) {881881- new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));882882- gmap_pmdp_xchg(gmap, pmdp, new, gaddr);883883- }884884-885885- if (prot == PROT_READ && !pmd_p) {886886- new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));887887- new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));888888- gmap_pmdp_xchg(gmap, pmdp, new, gaddr);889889- }890890-891891- if (bits & GMAP_NOTIFY_MPROT)892892- set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));893893-894894- /* Shadow GMAP protection needs split PMDs */895895- if (bits & GMAP_NOTIFY_SHADOW)896896- return -EINVAL;897897-898898- return 0;899899-}900900-901901-/*902902- * gmap_protect_pte - remove access rights to memory and set pgste bits903903- * @gmap: pointer to guest mapping meta data structure904904- * @gaddr: virtual address in the guest address space905905- * @pmdp: pointer to the pmd associated with the pte906906- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE907907- * @bits: notification bits to set908908- *909909- * Returns 0 if successfully protected, -ENOMEM if out of memory and910910- * -EAGAIN if a fixup is needed.911911- *912912- * Expected to be called with sg->mm->mmap_lock in read913913- */914914-static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,915915- pmd_t *pmdp, int prot, unsigned long bits)916916-{917917- int rc;918918- pte_t *ptep;919919- spinlock_t *ptl;920920- unsigned long pbits = 0;921921-922922- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)923923- return -EAGAIN;924924-925925- ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);926926- if (!ptep)927927- return -ENOMEM;928928-929929- pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;930930- pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;931931- /* Protect and unlock. */932932- rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);933933- gmap_pte_op_end(ptep, ptl);934934- return rc;935935-}936936-937937-/*938938- * gmap_protect_range - remove access rights to memory and set pgste bits939939- * @gmap: pointer to guest mapping meta data structure940940- * @gaddr: virtual address in the guest address space941941- * @len: size of area942942- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE943943- * @bits: pgste notification bits to set944944- *945945- * Returns:946946- * PAGE_SIZE if a small page was successfully protected;947947- * HPAGE_SIZE if a large page was successfully protected;948948- * -ENOMEM if out of memory;949949- * -EFAULT if gaddr is invalid (or mapping for shadows is missing);950950- * -EAGAIN if the guest mapping is missing and should be fixed by the caller.951951- *952952- * Context: Called with sg->mm->mmap_lock in read.953953- */954954-int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)955955-{956956- pmd_t *pmdp;957957- int rc = 0;958958-959959- BUG_ON(gmap_is_shadow(gmap));960960-961961- pmdp = gmap_pmd_op_walk(gmap, gaddr);962962- if (!pmdp)963963- return -EAGAIN;964964-965965- if (!pmd_leaf(*pmdp)) {966966- rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);967967- if (!rc)968968- rc = PAGE_SIZE;969969- } else {970970- rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);971971- if (!rc)972972- rc = HPAGE_SIZE;973973- }974974- gmap_pmd_op_end(gmap, pmdp);975975-976976- return rc;977977-}978978-EXPORT_SYMBOL_GPL(gmap_protect_one);979979-980980-/**981981- * gmap_read_table - get an unsigned long value from a guest page table using982982- * absolute addressing, without marking the page referenced.983983- * @gmap: pointer to guest mapping meta data structure984984- * @gaddr: virtual address in the guest address space985985- * @val: pointer to the unsigned long value to return986986- *987987- * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT988988- * if reading using the virtual address failed. -EINVAL if called on a gmap989989- * shadow.990990- *991991- * Called with gmap->mm->mmap_lock in read.992992- */993993-int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)994994-{995995- unsigned long address, vmaddr;996996- spinlock_t *ptl;997997- pte_t *ptep, pte;998998- int rc;999999-10001000- if (gmap_is_shadow(gmap))10011001- return -EINVAL;10021002-10031003- while (1) {10041004- rc = -EAGAIN;10051005- ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);10061006- if (ptep) {10071007- pte = *ptep;10081008- if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {10091009- address = pte_val(pte) & PAGE_MASK;10101010- address += gaddr & ~PAGE_MASK;10111011- *val = *(unsigned long *)__va(address);10121012- set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));10131013- /* Do *NOT* clear the _PAGE_INVALID bit! */10141014- rc = 0;10151015- }10161016- gmap_pte_op_end(ptep, ptl);10171017- }10181018- if (!rc)10191019- break;10201020- vmaddr = __gmap_translate(gmap, gaddr);10211021- if (IS_ERR_VALUE(vmaddr)) {10221022- rc = vmaddr;10231023- break;10241024- }10251025- rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);10261026- if (rc)10271027- break;10281028- }10291029- return rc;10301030-}10311031-EXPORT_SYMBOL_GPL(gmap_read_table);10321032-10331033-/**10341034- * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree10351035- * @sg: pointer to the shadow guest address space structure10361036- * @vmaddr: vm address associated with the rmap10371037- * @rmap: pointer to the rmap structure10381038- *10391039- * Called with the sg->guest_table_lock10401040- */10411041-static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,10421042- struct gmap_rmap *rmap)10431043-{10441044- struct gmap_rmap *temp;10451045- void __rcu **slot;10461046-10471047- BUG_ON(!gmap_is_shadow(sg));10481048- slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);10491049- if (slot) {10501050- rmap->next = radix_tree_deref_slot_protected(slot,10511051- &sg->guest_table_lock);10521052- for (temp = rmap->next; temp; temp = temp->next) {10531053- if (temp->raddr == rmap->raddr) {10541054- kfree(rmap);10551055- return;10561056- }10571057- }10581058- radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);10591059- } else {10601060- rmap->next = NULL;10611061- radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,10621062- rmap);10631063- }10641064-}10651065-10661066-/**10671067- * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap10681068- * @sg: pointer to the shadow guest address space structure10691069- * @raddr: rmap address in the shadow gmap10701070- * @paddr: address in the parent guest address space10711071- * @len: length of the memory area to protect10721072- *10731073- * Returns 0 if successfully protected and the rmap was created, -ENOMEM10741074- * if out of memory and -EFAULT if paddr is invalid.10751075- */10761076-static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,10771077- unsigned long paddr, unsigned long len)10781078-{10791079- struct gmap *parent;10801080- struct gmap_rmap *rmap;10811081- unsigned long vmaddr;10821082- spinlock_t *ptl;10831083- pte_t *ptep;10841084- int rc;10851085-10861086- BUG_ON(!gmap_is_shadow(sg));10871087- parent = sg->parent;10881088- while (len) {10891089- vmaddr = __gmap_translate(parent, paddr);10901090- if (IS_ERR_VALUE(vmaddr))10911091- return vmaddr;10921092- rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);10931093- if (!rmap)10941094- return -ENOMEM;10951095- rmap->raddr = raddr;10961096- rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);10971097- if (rc) {10981098- kfree(rmap);10991099- return rc;11001100- }11011101- rc = -EAGAIN;11021102- ptep = gmap_pte_op_walk(parent, paddr, &ptl);11031103- if (ptep) {11041104- spin_lock(&sg->guest_table_lock);11051105- rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,11061106- PGSTE_VSIE_BIT);11071107- if (!rc)11081108- gmap_insert_rmap(sg, vmaddr, rmap);11091109- spin_unlock(&sg->guest_table_lock);11101110- gmap_pte_op_end(ptep, ptl);11111111- }11121112- radix_tree_preload_end();11131113- if (rc) {11141114- kfree(rmap);11151115- rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);11161116- if (rc)11171117- return rc;11181118- continue;11191119- }11201120- paddr += PAGE_SIZE;11211121- len -= PAGE_SIZE;11221122- }11231123- return 0;11241124-}11251125-11261126-#define _SHADOW_RMAP_MASK 0x711271127-#define _SHADOW_RMAP_REGION1 0x511281128-#define _SHADOW_RMAP_REGION2 0x411291129-#define _SHADOW_RMAP_REGION3 0x311301130-#define _SHADOW_RMAP_SEGMENT 0x211311131-#define _SHADOW_RMAP_PGTABLE 0x111321132-11331133-/**11341134- * gmap_idte_one - invalidate a single region or segment table entry11351135- * @asce: region or segment table *origin* + table-type bits11361136- * @vaddr: virtual address to identify the table entry to flush11371137- *11381138- * The invalid bit of a single region or segment table entry is set11391139- * and the associated TLB entries depending on the entry are flushed.11401140- * The table-type of the @asce identifies the portion of the @vaddr11411141- * that is used as the invalidation index.11421142- */11431143-static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)11441144-{11451145- asm volatile(11461146- " idte %0,0,%1"11471147- : : "a" (asce), "a" (vaddr) : "cc", "memory");11481148-}11491149-11501150-/**11511151- * gmap_unshadow_page - remove a page from a shadow page table11521152- * @sg: pointer to the shadow guest address space structure11531153- * @raddr: rmap address in the shadow guest address space11541154- *11551155- * Called with the sg->guest_table_lock11561156- */11571157-static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)11581158-{11591159- unsigned long *table;11601160-11611161- BUG_ON(!gmap_is_shadow(sg));11621162- table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */11631163- if (!table || *table & _PAGE_INVALID)11641164- return;11651165- gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1);11661166- ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);11671167-}11681168-11691169-/**11701170- * __gmap_unshadow_pgt - remove all entries from a shadow page table11711171- * @sg: pointer to the shadow guest address space structure11721172- * @raddr: rmap address in the shadow guest address space11731173- * @pgt: pointer to the start of a shadow page table11741174- *11751175- * Called with the sg->guest_table_lock11761176- */11771177-static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,11781178- unsigned long *pgt)11791179-{11801180- int i;11811181-11821182- BUG_ON(!gmap_is_shadow(sg));11831183- for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE)11841184- pgt[i] = _PAGE_INVALID;11851185-}11861186-11871187-/**11881188- * gmap_unshadow_pgt - remove a shadow page table from a segment entry11891189- * @sg: pointer to the shadow guest address space structure11901190- * @raddr: address in the shadow guest address space11911191- *11921192- * Called with the sg->guest_table_lock11931193- */11941194-static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)11951195-{11961196- unsigned long *ste;11971197- phys_addr_t sto, pgt;11981198- struct ptdesc *ptdesc;11991199-12001200- BUG_ON(!gmap_is_shadow(sg));12011201- ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */12021202- if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))12031203- return;12041204- gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);12051205- sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));12061206- gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);12071207- pgt = *ste & _SEGMENT_ENTRY_ORIGIN;12081208- *ste = _SEGMENT_ENTRY_EMPTY;12091209- __gmap_unshadow_pgt(sg, raddr, __va(pgt));12101210- /* Free page table */12111211- ptdesc = page_ptdesc(phys_to_page(pgt));12121212- page_table_free_pgste(ptdesc);12131213-}12141214-12151215-/**12161216- * __gmap_unshadow_sgt - remove all entries from a shadow segment table12171217- * @sg: pointer to the shadow guest address space structure12181218- * @raddr: rmap address in the shadow guest address space12191219- * @sgt: pointer to the start of a shadow segment table12201220- *12211221- * Called with the sg->guest_table_lock12221222- */12231223-static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,12241224- unsigned long *sgt)12251225-{12261226- struct ptdesc *ptdesc;12271227- phys_addr_t pgt;12281228- int i;12291229-12301230- BUG_ON(!gmap_is_shadow(sg));12311231- for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {12321232- if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))12331233- continue;12341234- pgt = sgt[i] & _REGION_ENTRY_ORIGIN;12351235- sgt[i] = _SEGMENT_ENTRY_EMPTY;12361236- __gmap_unshadow_pgt(sg, raddr, __va(pgt));12371237- /* Free page table */12381238- ptdesc = page_ptdesc(phys_to_page(pgt));12391239- page_table_free_pgste(ptdesc);12401240- }12411241-}12421242-12431243-/**12441244- * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry12451245- * @sg: pointer to the shadow guest address space structure12461246- * @raddr: rmap address in the shadow guest address space12471247- *12481248- * Called with the shadow->guest_table_lock12491249- */12501250-static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)12511251-{12521252- unsigned long r3o, *r3e;12531253- phys_addr_t sgt;12541254- struct page *page;12551255-12561256- BUG_ON(!gmap_is_shadow(sg));12571257- r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */12581258- if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))12591259- return;12601260- gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);12611261- r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));12621262- gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);12631263- sgt = *r3e & _REGION_ENTRY_ORIGIN;12641264- *r3e = _REGION3_ENTRY_EMPTY;12651265- __gmap_unshadow_sgt(sg, raddr, __va(sgt));12661266- /* Free segment table */12671267- page = phys_to_page(sgt);12681268- __free_pages(page, CRST_ALLOC_ORDER);12691269-}12701270-12711271-/**12721272- * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table12731273- * @sg: pointer to the shadow guest address space structure12741274- * @raddr: address in the shadow guest address space12751275- * @r3t: pointer to the start of a shadow region-3 table12761276- *12771277- * Called with the sg->guest_table_lock12781278- */12791279-static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,12801280- unsigned long *r3t)12811281-{12821282- struct page *page;12831283- phys_addr_t sgt;12841284- int i;12851285-12861286- BUG_ON(!gmap_is_shadow(sg));12871287- for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {12881288- if (!(r3t[i] & _REGION_ENTRY_ORIGIN))12891289- continue;12901290- sgt = r3t[i] & _REGION_ENTRY_ORIGIN;12911291- r3t[i] = _REGION3_ENTRY_EMPTY;12921292- __gmap_unshadow_sgt(sg, raddr, __va(sgt));12931293- /* Free segment table */12941294- page = phys_to_page(sgt);12951295- __free_pages(page, CRST_ALLOC_ORDER);12961296- }12971297-}12981298-12991299-/**13001300- * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry13011301- * @sg: pointer to the shadow guest address space structure13021302- * @raddr: rmap address in the shadow guest address space13031303- *13041304- * Called with the sg->guest_table_lock13051305- */13061306-static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)13071307-{13081308- unsigned long r2o, *r2e;13091309- phys_addr_t r3t;13101310- struct page *page;13111311-13121312- BUG_ON(!gmap_is_shadow(sg));13131313- r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */13141314- if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))13151315- return;13161316- gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);13171317- r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));13181318- gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);13191319- r3t = *r2e & _REGION_ENTRY_ORIGIN;13201320- *r2e = _REGION2_ENTRY_EMPTY;13211321- __gmap_unshadow_r3t(sg, raddr, __va(r3t));13221322- /* Free region 3 table */13231323- page = phys_to_page(r3t);13241324- __free_pages(page, CRST_ALLOC_ORDER);13251325-}13261326-13271327-/**13281328- * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table13291329- * @sg: pointer to the shadow guest address space structure13301330- * @raddr: rmap address in the shadow guest address space13311331- * @r2t: pointer to the start of a shadow region-2 table13321332- *13331333- * Called with the sg->guest_table_lock13341334- */13351335-static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,13361336- unsigned long *r2t)13371337-{13381338- phys_addr_t r3t;13391339- struct page *page;13401340- int i;13411341-13421342- BUG_ON(!gmap_is_shadow(sg));13431343- for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {13441344- if (!(r2t[i] & _REGION_ENTRY_ORIGIN))13451345- continue;13461346- r3t = r2t[i] & _REGION_ENTRY_ORIGIN;13471347- r2t[i] = _REGION2_ENTRY_EMPTY;13481348- __gmap_unshadow_r3t(sg, raddr, __va(r3t));13491349- /* Free region 3 table */13501350- page = phys_to_page(r3t);13511351- __free_pages(page, CRST_ALLOC_ORDER);13521352- }13531353-}13541354-13551355-/**13561356- * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry13571357- * @sg: pointer to the shadow guest address space structure13581358- * @raddr: rmap address in the shadow guest address space13591359- *13601360- * Called with the sg->guest_table_lock13611361- */13621362-static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)13631363-{13641364- unsigned long r1o, *r1e;13651365- struct page *page;13661366- phys_addr_t r2t;13671367-13681368- BUG_ON(!gmap_is_shadow(sg));13691369- r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */13701370- if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))13711371- return;13721372- gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);13731373- r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));13741374- gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);13751375- r2t = *r1e & _REGION_ENTRY_ORIGIN;13761376- *r1e = _REGION1_ENTRY_EMPTY;13771377- __gmap_unshadow_r2t(sg, raddr, __va(r2t));13781378- /* Free region 2 table */13791379- page = phys_to_page(r2t);13801380- __free_pages(page, CRST_ALLOC_ORDER);13811381-}13821382-13831383-/**13841384- * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table13851385- * @sg: pointer to the shadow guest address space structure13861386- * @raddr: rmap address in the shadow guest address space13871387- * @r1t: pointer to the start of a shadow region-1 table13881388- *13891389- * Called with the shadow->guest_table_lock13901390- */13911391-static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,13921392- unsigned long *r1t)13931393-{13941394- unsigned long asce;13951395- struct page *page;13961396- phys_addr_t r2t;13971397- int i;13981398-13991399- BUG_ON(!gmap_is_shadow(sg));14001400- asce = __pa(r1t) | _ASCE_TYPE_REGION1;14011401- for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {14021402- if (!(r1t[i] & _REGION_ENTRY_ORIGIN))14031403- continue;14041404- r2t = r1t[i] & _REGION_ENTRY_ORIGIN;14051405- __gmap_unshadow_r2t(sg, raddr, __va(r2t));14061406- /* Clear entry and flush translation r1t -> r2t */14071407- gmap_idte_one(asce, raddr);14081408- r1t[i] = _REGION1_ENTRY_EMPTY;14091409- /* Free region 2 table */14101410- page = phys_to_page(r2t);14111411- __free_pages(page, CRST_ALLOC_ORDER);14121412- }14131413-}14141414-14151415-/**14161416- * gmap_unshadow - remove a shadow page table completely14171417- * @sg: pointer to the shadow guest address space structure14181418- *14191419- * Called with sg->guest_table_lock14201420- */14211421-void gmap_unshadow(struct gmap *sg)14221422-{14231423- unsigned long *table;14241424-14251425- BUG_ON(!gmap_is_shadow(sg));14261426- if (sg->removed)14271427- return;14281428- sg->removed = 1;14291429- gmap_call_notifier(sg, 0, -1UL);14301430- gmap_flush_tlb(sg);14311431- table = __va(sg->asce & _ASCE_ORIGIN);14321432- switch (sg->asce & _ASCE_TYPE_MASK) {14331433- case _ASCE_TYPE_REGION1:14341434- __gmap_unshadow_r1t(sg, 0, table);14351435- break;14361436- case _ASCE_TYPE_REGION2:14371437- __gmap_unshadow_r2t(sg, 0, table);14381438- break;14391439- case _ASCE_TYPE_REGION3:14401440- __gmap_unshadow_r3t(sg, 0, table);14411441- break;14421442- case _ASCE_TYPE_SEGMENT:14431443- __gmap_unshadow_sgt(sg, 0, table);14441444- break;14451445- }14461446-}14471447-EXPORT_SYMBOL(gmap_unshadow);14481448-14491449-/**14501450- * gmap_shadow_r2t - create an empty shadow region 2 table14511451- * @sg: pointer to the shadow guest address space structure14521452- * @saddr: faulting address in the shadow gmap14531453- * @r2t: parent gmap address of the region 2 table to get shadowed14541454- * @fake: r2t references contiguous guest memory block, not a r2t14551455- *14561456- * The r2t parameter specifies the address of the source table. The14571457- * four pages of the source table are made read-only in the parent gmap14581458- * address space. A write to the source table area @r2t will automatically14591459- * remove the shadow r2 table and all of its descendants.14601460- *14611461- * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the14621462- * shadow table structure is incomplete, -ENOMEM if out of memory and14631463- * -EFAULT if an address in the parent gmap could not be resolved.14641464- *14651465- * Called with sg->mm->mmap_lock in read.14661466- */14671467-int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,14681468- int fake)14691469-{14701470- unsigned long raddr, origin, offset, len;14711471- unsigned long *table;14721472- phys_addr_t s_r2t;14731473- struct page *page;14741474- int rc;14751475-14761476- BUG_ON(!gmap_is_shadow(sg));14771477- /* Allocate a shadow region second table */14781478- page = gmap_alloc_crst();14791479- if (!page)14801480- return -ENOMEM;14811481- s_r2t = page_to_phys(page);14821482- /* Install shadow region second table */14831483- spin_lock(&sg->guest_table_lock);14841484- table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */14851485- if (!table) {14861486- rc = -EAGAIN; /* Race with unshadow */14871487- goto out_free;14881488- }14891489- if (!(*table & _REGION_ENTRY_INVALID)) {14901490- rc = 0; /* Already established */14911491- goto out_free;14921492- } else if (*table & _REGION_ENTRY_ORIGIN) {14931493- rc = -EAGAIN; /* Race with shadow */14941494- goto out_free;14951495- }14961496- crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);14971497- /* mark as invalid as long as the parent table is not protected */14981498- *table = s_r2t | _REGION_ENTRY_LENGTH |14991499- _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;15001500- if (sg->edat_level >= 1)15011501- *table |= (r2t & _REGION_ENTRY_PROTECT);15021502- if (fake) {15031503- /* nothing to protect for fake tables */15041504- *table &= ~_REGION_ENTRY_INVALID;15051505- spin_unlock(&sg->guest_table_lock);15061506- return 0;15071507- }15081508- spin_unlock(&sg->guest_table_lock);15091509- /* Make r2t read-only in parent gmap page table */15101510- raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;15111511- origin = r2t & _REGION_ENTRY_ORIGIN;15121512- offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;15131513- len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;15141514- rc = gmap_protect_rmap(sg, raddr, origin + offset, len);15151515- spin_lock(&sg->guest_table_lock);15161516- if (!rc) {15171517- table = gmap_table_walk(sg, saddr, 4);15181518- if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)15191519- rc = -EAGAIN; /* Race with unshadow */15201520- else15211521- *table &= ~_REGION_ENTRY_INVALID;15221522- } else {15231523- gmap_unshadow_r2t(sg, raddr);15241524- }15251525- spin_unlock(&sg->guest_table_lock);15261526- return rc;15271527-out_free:15281528- spin_unlock(&sg->guest_table_lock);15291529- __free_pages(page, CRST_ALLOC_ORDER);15301530- return rc;15311531-}15321532-EXPORT_SYMBOL_GPL(gmap_shadow_r2t);15331533-15341534-/**15351535- * gmap_shadow_r3t - create a shadow region 3 table15361536- * @sg: pointer to the shadow guest address space structure15371537- * @saddr: faulting address in the shadow gmap15381538- * @r3t: parent gmap address of the region 3 table to get shadowed15391539- * @fake: r3t references contiguous guest memory block, not a r3t15401540- *15411541- * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the15421542- * shadow table structure is incomplete, -ENOMEM if out of memory and15431543- * -EFAULT if an address in the parent gmap could not be resolved.15441544- *15451545- * Called with sg->mm->mmap_lock in read.15461546- */15471547-int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,15481548- int fake)15491549-{15501550- unsigned long raddr, origin, offset, len;15511551- unsigned long *table;15521552- phys_addr_t s_r3t;15531553- struct page *page;15541554- int rc;15551555-15561556- BUG_ON(!gmap_is_shadow(sg));15571557- /* Allocate a shadow region second table */15581558- page = gmap_alloc_crst();15591559- if (!page)15601560- return -ENOMEM;15611561- s_r3t = page_to_phys(page);15621562- /* Install shadow region second table */15631563- spin_lock(&sg->guest_table_lock);15641564- table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */15651565- if (!table) {15661566- rc = -EAGAIN; /* Race with unshadow */15671567- goto out_free;15681568- }15691569- if (!(*table & _REGION_ENTRY_INVALID)) {15701570- rc = 0; /* Already established */15711571- goto out_free;15721572- } else if (*table & _REGION_ENTRY_ORIGIN) {15731573- rc = -EAGAIN; /* Race with shadow */15741574- goto out_free;15751575- }15761576- crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);15771577- /* mark as invalid as long as the parent table is not protected */15781578- *table = s_r3t | _REGION_ENTRY_LENGTH |15791579- _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;15801580- if (sg->edat_level >= 1)15811581- *table |= (r3t & _REGION_ENTRY_PROTECT);15821582- if (fake) {15831583- /* nothing to protect for fake tables */15841584- *table &= ~_REGION_ENTRY_INVALID;15851585- spin_unlock(&sg->guest_table_lock);15861586- return 0;15871587- }15881588- spin_unlock(&sg->guest_table_lock);15891589- /* Make r3t read-only in parent gmap page table */15901590- raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;15911591- origin = r3t & _REGION_ENTRY_ORIGIN;15921592- offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;15931593- len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;15941594- rc = gmap_protect_rmap(sg, raddr, origin + offset, len);15951595- spin_lock(&sg->guest_table_lock);15961596- if (!rc) {15971597- table = gmap_table_walk(sg, saddr, 3);15981598- if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)15991599- rc = -EAGAIN; /* Race with unshadow */16001600- else16011601- *table &= ~_REGION_ENTRY_INVALID;16021602- } else {16031603- gmap_unshadow_r3t(sg, raddr);16041604- }16051605- spin_unlock(&sg->guest_table_lock);16061606- return rc;16071607-out_free:16081608- spin_unlock(&sg->guest_table_lock);16091609- __free_pages(page, CRST_ALLOC_ORDER);16101610- return rc;16111611-}16121612-EXPORT_SYMBOL_GPL(gmap_shadow_r3t);16131613-16141614-/**16151615- * gmap_shadow_sgt - create a shadow segment table16161616- * @sg: pointer to the shadow guest address space structure16171617- * @saddr: faulting address in the shadow gmap16181618- * @sgt: parent gmap address of the segment table to get shadowed16191619- * @fake: sgt references contiguous guest memory block, not a sgt16201620- *16211621- * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the16221622- * shadow table structure is incomplete, -ENOMEM if out of memory and16231623- * -EFAULT if an address in the parent gmap could not be resolved.16241624- *16251625- * Called with sg->mm->mmap_lock in read.16261626- */16271627-int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,16281628- int fake)16291629-{16301630- unsigned long raddr, origin, offset, len;16311631- unsigned long *table;16321632- phys_addr_t s_sgt;16331633- struct page *page;16341634- int rc;16351635-16361636- BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));16371637- /* Allocate a shadow segment table */16381638- page = gmap_alloc_crst();16391639- if (!page)16401640- return -ENOMEM;16411641- s_sgt = page_to_phys(page);16421642- /* Install shadow region second table */16431643- spin_lock(&sg->guest_table_lock);16441644- table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */16451645- if (!table) {16461646- rc = -EAGAIN; /* Race with unshadow */16471647- goto out_free;16481648- }16491649- if (!(*table & _REGION_ENTRY_INVALID)) {16501650- rc = 0; /* Already established */16511651- goto out_free;16521652- } else if (*table & _REGION_ENTRY_ORIGIN) {16531653- rc = -EAGAIN; /* Race with shadow */16541654- goto out_free;16551655- }16561656- crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);16571657- /* mark as invalid as long as the parent table is not protected */16581658- *table = s_sgt | _REGION_ENTRY_LENGTH |16591659- _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;16601660- if (sg->edat_level >= 1)16611661- *table |= sgt & _REGION_ENTRY_PROTECT;16621662- if (fake) {16631663- /* nothing to protect for fake tables */16641664- *table &= ~_REGION_ENTRY_INVALID;16651665- spin_unlock(&sg->guest_table_lock);16661666- return 0;16671667- }16681668- spin_unlock(&sg->guest_table_lock);16691669- /* Make sgt read-only in parent gmap page table */16701670- raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;16711671- origin = sgt & _REGION_ENTRY_ORIGIN;16721672- offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;16731673- len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;16741674- rc = gmap_protect_rmap(sg, raddr, origin + offset, len);16751675- spin_lock(&sg->guest_table_lock);16761676- if (!rc) {16771677- table = gmap_table_walk(sg, saddr, 2);16781678- if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)16791679- rc = -EAGAIN; /* Race with unshadow */16801680- else16811681- *table &= ~_REGION_ENTRY_INVALID;16821682- } else {16831683- gmap_unshadow_sgt(sg, raddr);16841684- }16851685- spin_unlock(&sg->guest_table_lock);16861686- return rc;16871687-out_free:16881688- spin_unlock(&sg->guest_table_lock);16891689- __free_pages(page, CRST_ALLOC_ORDER);16901690- return rc;16911691-}16921692-EXPORT_SYMBOL_GPL(gmap_shadow_sgt);16931693-16941694-static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)16951695-{16961696- unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));16971697-16981698- pgstes += _PAGE_ENTRIES;16991699-17001700- pgstes[0] &= ~PGSTE_ST2_MASK;17011701- pgstes[1] &= ~PGSTE_ST2_MASK;17021702- pgstes[2] &= ~PGSTE_ST2_MASK;17031703- pgstes[3] &= ~PGSTE_ST2_MASK;17041704-17051705- pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;17061706- pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;17071707- pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;17081708- pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;17091709-}17101710-17111711-/**17121712- * gmap_shadow_pgt - instantiate a shadow page table17131713- * @sg: pointer to the shadow guest address space structure17141714- * @saddr: faulting address in the shadow gmap17151715- * @pgt: parent gmap address of the page table to get shadowed17161716- * @fake: pgt references contiguous guest memory block, not a pgtable17171717- *17181718- * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the17191719- * shadow table structure is incomplete, -ENOMEM if out of memory,17201720- * -EFAULT if an address in the parent gmap could not be resolved and17211721- *17221722- * Called with gmap->mm->mmap_lock in read17231723- */17241724-int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,17251725- int fake)17261726-{17271727- unsigned long raddr, origin;17281728- unsigned long *table;17291729- struct ptdesc *ptdesc;17301730- phys_addr_t s_pgt;17311731- int rc;17321732-17331733- BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));17341734- /* Allocate a shadow page table */17351735- ptdesc = page_table_alloc_pgste(sg->mm);17361736- if (!ptdesc)17371737- return -ENOMEM;17381738- origin = pgt & _SEGMENT_ENTRY_ORIGIN;17391739- if (fake)17401740- origin |= GMAP_SHADOW_FAKE_TABLE;17411741- gmap_pgste_set_pgt_addr(ptdesc, origin);17421742- s_pgt = page_to_phys(ptdesc_page(ptdesc));17431743- /* Install shadow page table */17441744- spin_lock(&sg->guest_table_lock);17451745- table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */17461746- if (!table) {17471747- rc = -EAGAIN; /* Race with unshadow */17481748- goto out_free;17491749- }17501750- if (!(*table & _SEGMENT_ENTRY_INVALID)) {17511751- rc = 0; /* Already established */17521752- goto out_free;17531753- } else if (*table & _SEGMENT_ENTRY_ORIGIN) {17541754- rc = -EAGAIN; /* Race with shadow */17551755- goto out_free;17561756- }17571757- /* mark as invalid as long as the parent table is not protected */17581758- *table = (unsigned long) s_pgt | _SEGMENT_ENTRY |17591759- (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;17601760- if (fake) {17611761- /* nothing to protect for fake tables */17621762- *table &= ~_SEGMENT_ENTRY_INVALID;17631763- spin_unlock(&sg->guest_table_lock);17641764- return 0;17651765- }17661766- spin_unlock(&sg->guest_table_lock);17671767- /* Make pgt read-only in parent gmap page table (not the pgste) */17681768- raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;17691769- origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;17701770- rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);17711771- spin_lock(&sg->guest_table_lock);17721772- if (!rc) {17731773- table = gmap_table_walk(sg, saddr, 1);17741774- if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)17751775- rc = -EAGAIN; /* Race with unshadow */17761776- else17771777- *table &= ~_SEGMENT_ENTRY_INVALID;17781778- } else {17791779- gmap_unshadow_pgt(sg, raddr);17801780- }17811781- spin_unlock(&sg->guest_table_lock);17821782- return rc;17831783-out_free:17841784- spin_unlock(&sg->guest_table_lock);17851785- page_table_free_pgste(ptdesc);17861786- return rc;17871787-17881788-}17891789-EXPORT_SYMBOL_GPL(gmap_shadow_pgt);17901790-17911791-/**17921792- * gmap_shadow_page - create a shadow page mapping17931793- * @sg: pointer to the shadow guest address space structure17941794- * @saddr: faulting address in the shadow gmap17951795- * @pte: pte in parent gmap address space to get shadowed17961796- *17971797- * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the17981798- * shadow table structure is incomplete, -ENOMEM if out of memory and17991799- * -EFAULT if an address in the parent gmap could not be resolved.18001800- *18011801- * Called with sg->mm->mmap_lock in read.18021802- */18031803-int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)18041804-{18051805- struct gmap *parent;18061806- struct gmap_rmap *rmap;18071807- unsigned long vmaddr, paddr;18081808- spinlock_t *ptl;18091809- pte_t *sptep, *tptep;18101810- int prot;18111811- int rc;18121812-18131813- BUG_ON(!gmap_is_shadow(sg));18141814- parent = sg->parent;18151815- prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;18161816-18171817- rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);18181818- if (!rmap)18191819- return -ENOMEM;18201820- rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;18211821-18221822- while (1) {18231823- paddr = pte_val(pte) & PAGE_MASK;18241824- vmaddr = __gmap_translate(parent, paddr);18251825- if (IS_ERR_VALUE(vmaddr)) {18261826- rc = vmaddr;18271827- break;18281828- }18291829- rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);18301830- if (rc)18311831- break;18321832- rc = -EAGAIN;18331833- sptep = gmap_pte_op_walk(parent, paddr, &ptl);18341834- if (sptep) {18351835- spin_lock(&sg->guest_table_lock);18361836- /* Get page table pointer */18371837- tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);18381838- if (!tptep) {18391839- spin_unlock(&sg->guest_table_lock);18401840- gmap_pte_op_end(sptep, ptl);18411841- radix_tree_preload_end();18421842- break;18431843- }18441844- rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);18451845- if (rc > 0) {18461846- /* Success and a new mapping */18471847- gmap_insert_rmap(sg, vmaddr, rmap);18481848- rmap = NULL;18491849- rc = 0;18501850- }18511851- gmap_pte_op_end(sptep, ptl);18521852- spin_unlock(&sg->guest_table_lock);18531853- }18541854- radix_tree_preload_end();18551855- if (!rc)18561856- break;18571857- rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);18581858- if (rc)18591859- break;18601860- }18611861- kfree(rmap);18621862- return rc;18631863-}18641864-EXPORT_SYMBOL_GPL(gmap_shadow_page);18651865-18661866-/*18671867- * gmap_shadow_notify - handle notifications for shadow gmap18681868- *18691869- * Called with sg->parent->shadow_lock.18701870- */18711871-static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,18721872- unsigned long gaddr)18731873-{18741874- struct gmap_rmap *rmap, *rnext, *head;18751875- unsigned long start, end, bits, raddr;18761876-18771877- BUG_ON(!gmap_is_shadow(sg));18781878-18791879- spin_lock(&sg->guest_table_lock);18801880- if (sg->removed) {18811881- spin_unlock(&sg->guest_table_lock);18821882- return;18831883- }18841884- /* Check for top level table */18851885- start = sg->orig_asce & _ASCE_ORIGIN;18861886- end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;18871887- if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&18881888- gaddr < end) {18891889- /* The complete shadow table has to go */18901890- gmap_unshadow(sg);18911891- spin_unlock(&sg->guest_table_lock);18921892- list_del(&sg->list);18931893- gmap_put(sg);18941894- return;18951895- }18961896- /* Remove the page table tree from on specific entry */18971897- head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);18981898- gmap_for_each_rmap_safe(rmap, rnext, head) {18991899- bits = rmap->raddr & _SHADOW_RMAP_MASK;19001900- raddr = rmap->raddr ^ bits;19011901- switch (bits) {19021902- case _SHADOW_RMAP_REGION1:19031903- gmap_unshadow_r2t(sg, raddr);19041904- break;19051905- case _SHADOW_RMAP_REGION2:19061906- gmap_unshadow_r3t(sg, raddr);19071907- break;19081908- case _SHADOW_RMAP_REGION3:19091909- gmap_unshadow_sgt(sg, raddr);19101910- break;19111911- case _SHADOW_RMAP_SEGMENT:19121912- gmap_unshadow_pgt(sg, raddr);19131913- break;19141914- case _SHADOW_RMAP_PGTABLE:19151915- gmap_unshadow_page(sg, raddr);19161916- break;19171917- }19181918- kfree(rmap);19191919- }19201920- spin_unlock(&sg->guest_table_lock);19211921-}19221922-19231923-/**19241924- * ptep_notify - call all invalidation callbacks for a specific pte.19251925- * @mm: pointer to the process mm_struct19261926- * @vmaddr: virtual address in the process address space19271927- * @pte: pointer to the page table entry19281928- * @bits: bits from the pgste that caused the notify call19291929- *19301930- * This function is assumed to be called with the page table lock held19311931- * for the pte to notify.19321932- */19331933-void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,19341934- pte_t *pte, unsigned long bits)19351935-{19361936- unsigned long offset, gaddr = 0;19371937- struct gmap *gmap, *sg, *next;19381938-19391939- offset = ((unsigned long) pte) & (255 * sizeof(pte_t));19401940- offset = offset * (PAGE_SIZE / sizeof(pte_t));19411941- rcu_read_lock();19421942- list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {19431943- spin_lock(&gmap->guest_table_lock);19441944- gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;19451945- spin_unlock(&gmap->guest_table_lock);19461946- if (!IS_GADDR_VALID(gaddr))19471947- continue;19481948-19491949- if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {19501950- spin_lock(&gmap->shadow_lock);19511951- list_for_each_entry_safe(sg, next,19521952- &gmap->children, list)19531953- gmap_shadow_notify(sg, vmaddr, gaddr);19541954- spin_unlock(&gmap->shadow_lock);19551955- }19561956- if (bits & PGSTE_IN_BIT)19571957- gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);19581958- }19591959- rcu_read_unlock();19601960-}19611961-EXPORT_SYMBOL_GPL(ptep_notify);19621962-19631963-static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,19641964- unsigned long gaddr)19651965-{19661966- set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));19671967- gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);19681968-}19691969-19701970-/**19711971- * gmap_pmdp_xchg - exchange a gmap pmd with another19721972- * @gmap: pointer to the guest address space structure19731973- * @pmdp: pointer to the pmd entry19741974- * @new: replacement entry19751975- * @gaddr: the affected guest address19761976- *19771977- * This function is assumed to be called with the guest_table_lock19781978- * held.19791979- */19801980-static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,19811981- unsigned long gaddr)19821982-{19831983- gaddr &= HPAGE_MASK;19841984- pmdp_notify_gmap(gmap, pmdp, gaddr);19851985- new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));19861986- if (machine_has_tlb_guest())19871987- __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,19881988- IDTE_GLOBAL);19891989- else19901990- __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);19911991- set_pmd(pmdp, new);19921992-}19931993-19941994-static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,19951995- int purge)19961996-{19971997- pmd_t *pmdp;19981998- struct gmap *gmap;19991999- unsigned long gaddr;20002000-20012001- rcu_read_lock();20022002- list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {20032003- spin_lock(&gmap->guest_table_lock);20042004- pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);20052005- if (pmdp) {20062006- pmdp_notify_gmap(gmap, pmdp, gaddr);20072007- WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |20082008- _SEGMENT_ENTRY_GMAP_UC |20092009- _SEGMENT_ENTRY));20102010- if (purge)20112011- __pmdp_cspg(pmdp);20122012- set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));20132013- }20142014- spin_unlock(&gmap->guest_table_lock);20152015- }20162016- rcu_read_unlock();20172017-}20182018-20192019-/**20202020- * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without20212021- * flushing20222022- * @mm: pointer to the process mm_struct20232023- * @vmaddr: virtual address in the process address space20242024- */20252025-void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)20262026-{20272027- gmap_pmdp_clear(mm, vmaddr, 0);20282028-}20292029-EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);20302030-20312031-/**20322032- * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry20332033- * @mm: pointer to the process mm_struct20342034- * @vmaddr: virtual address in the process address space20352035- */20362036-void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)20372037-{20382038- unsigned long gaddr;20392039- struct gmap *gmap;20402040- pmd_t *pmdp;20412041-20422042- rcu_read_lock();20432043- list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {20442044- spin_lock(&gmap->guest_table_lock);20452045- pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);20462046- if (pmdp) {20472047- pmdp_notify_gmap(gmap, pmdp, gaddr);20482048- WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |20492049- _SEGMENT_ENTRY_GMAP_UC |20502050- _SEGMENT_ENTRY));20512051- if (machine_has_tlb_guest())20522052- __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,20532053- gmap->asce, IDTE_LOCAL);20542054- else20552055- __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);20562056- *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);20572057- }20582058- spin_unlock(&gmap->guest_table_lock);20592059- }20602060- rcu_read_unlock();20612061-}20622062-EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);20632063-20642064-/**20652065- * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry20662066- * @mm: pointer to the process mm_struct20672067- * @vmaddr: virtual address in the process address space20682068- */20692069-void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)20702070-{20712071- unsigned long gaddr;20722072- struct gmap *gmap;20732073- pmd_t *pmdp;20742074-20752075- rcu_read_lock();20762076- list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {20772077- spin_lock(&gmap->guest_table_lock);20782078- pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);20792079- if (pmdp) {20802080- pmdp_notify_gmap(gmap, pmdp, gaddr);20812081- WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |20822082- _SEGMENT_ENTRY_GMAP_UC |20832083- _SEGMENT_ENTRY));20842084- if (machine_has_tlb_guest())20852085- __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,20862086- gmap->asce, IDTE_GLOBAL);20872087- else20882088- __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);20892089- *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);20902090- }20912091- spin_unlock(&gmap->guest_table_lock);20922092- }20932093- rcu_read_unlock();20942094-}20952095-EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);20962096-20972097-/**20982098- * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status20992099- * @gmap: pointer to guest address space21002100- * @pmdp: pointer to the pmd to be tested21012101- * @gaddr: virtual address in the guest address space21022102- *21032103- * This function is assumed to be called with the guest_table_lock21042104- * held.21052105- */21062106-static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,21072107- unsigned long gaddr)21082108-{21092109- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)21102110- return false;21112111-21122112- /* Already protected memory, which did not change is clean */21132113- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&21142114- !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))21152115- return false;21162116-21172117- /* Clear UC indication and reset protection */21182118- set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));21192119- gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);21202120- return true;21212121-}21222122-21232123-/**21242124- * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment21252125- * @gmap: pointer to guest address space21262126- * @bitmap: dirty bitmap for this pmd21272127- * @gaddr: virtual address in the guest address space21282128- * @vmaddr: virtual address in the host address space21292129- *21302130- * This function is assumed to be called with the guest_table_lock21312131- * held.21322132- */21332133-void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],21342134- unsigned long gaddr, unsigned long vmaddr)21352135-{21362136- int i;21372137- pmd_t *pmdp;21382138- pte_t *ptep;21392139- spinlock_t *ptl;21402140-21412141- pmdp = gmap_pmd_op_walk(gmap, gaddr);21422142- if (!pmdp)21432143- return;21442144-21452145- if (pmd_leaf(*pmdp)) {21462146- if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))21472147- bitmap_fill(bitmap, _PAGE_ENTRIES);21482148- } else {21492149- for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {21502150- ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);21512151- if (!ptep)21522152- continue;21532153- if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))21542154- set_bit(i, bitmap);21552155- pte_unmap_unlock(ptep, ptl);21562156- }21572157- }21582158- gmap_pmd_op_end(gmap, pmdp);21592159-}21602160-EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);21612161-21622162-#ifdef CONFIG_TRANSPARENT_HUGEPAGE21632163-static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,21642164- unsigned long end, struct mm_walk *walk)21652165-{21662166- struct vm_area_struct *vma = walk->vma;21672167-21682168- split_huge_pmd(vma, pmd, addr);21692169- return 0;21702170-}21712171-21722172-static const struct mm_walk_ops thp_split_walk_ops = {21732173- .pmd_entry = thp_split_walk_pmd_entry,21742174- .walk_lock = PGWALK_WRLOCK_VERIFY,21752175-};21762176-21772177-static inline void thp_split_mm(struct mm_struct *mm)21782178-{21792179- struct vm_area_struct *vma;21802180- VMA_ITERATOR(vmi, mm, 0);21812181-21822182- for_each_vma(vmi, vma) {21832183- vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);21842184- walk_page_vma(vma, &thp_split_walk_ops, NULL);21852185- }21862186- mm->def_flags |= VM_NOHUGEPAGE;21872187-}21882188-#else21892189-static inline void thp_split_mm(struct mm_struct *mm)21902190-{21912191-}21922192-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */21932193-21942194-/*21952195- * switch on pgstes for its userspace process (for kvm)21962196- */21972197-int s390_enable_sie(void)21982198-{21992199- struct mm_struct *mm = current->mm;22002200-22012201- /* Do we have pgstes? if yes, we are done */22022202- if (mm_has_pgste(mm))22032203- return 0;22042204- mmap_write_lock(mm);22052205- mm->context.has_pgste = 1;22062206- /* split thp mappings and disable thp for future mappings */22072207- thp_split_mm(mm);22082208- mmap_write_unlock(mm);22092209- return 0;22102210-}22112211-EXPORT_SYMBOL_GPL(s390_enable_sie);22122212-22132213-/*22142214- * Enable storage key handling from now on and initialize the storage22152215- * keys with the default key.22162216- */22172217-static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,22182218- unsigned long next, struct mm_walk *walk)22192219-{22202220- /* Clear storage key */22212221- ptep_zap_key(walk->mm, addr, pte);22222222- return 0;22232223-}22242224-22252225-/*22262226- * Give a chance to schedule after setting a key to 256 pages.22272227- * We only hold the mm lock, which is a rwsem and the kvm srcu.22282228- * Both can sleep.22292229- */22302230-static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,22312231- unsigned long next, struct mm_walk *walk)22322232-{22332233- cond_resched();22342234- return 0;22352235-}22362236-22372237-static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,22382238- unsigned long hmask, unsigned long next,22392239- struct mm_walk *walk)22402240-{22412241- pmd_t *pmd = (pmd_t *)pte;22422242- unsigned long start, end;22432243- struct folio *folio = page_folio(pmd_page(*pmd));22442244-22452245- /*22462246- * The write check makes sure we do not set a key on shared22472247- * memory. This is needed as the walker does not differentiate22482248- * between actual guest memory and the process executable or22492249- * shared libraries.22502250- */22512251- if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||22522252- !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))22532253- return 0;22542254-22552255- start = pmd_val(*pmd) & HPAGE_MASK;22562256- end = start + HPAGE_SIZE;22572257- __storage_key_init_range(start, end);22582258- set_bit(PG_arch_1, &folio->flags.f);22592259- cond_resched();22602260- return 0;22612261-}22622262-22632263-static const struct mm_walk_ops enable_skey_walk_ops = {22642264- .hugetlb_entry = __s390_enable_skey_hugetlb,22652265- .pte_entry = __s390_enable_skey_pte,22662266- .pmd_entry = __s390_enable_skey_pmd,22672267- .walk_lock = PGWALK_WRLOCK,22682268-};22692269-22702270-int s390_enable_skey(void)22712271-{22722272- struct mm_struct *mm = current->mm;22732273- int rc = 0;22742274-22752275- mmap_write_lock(mm);22762276- if (mm_uses_skeys(mm))22772277- goto out_up;22782278-22792279- mm->context.uses_skeys = 1;22802280- rc = gmap_helper_disable_cow_sharing();22812281- if (rc) {22822282- mm->context.uses_skeys = 0;22832283- goto out_up;22842284- }22852285- walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);22862286-22872287-out_up:22882288- mmap_write_unlock(mm);22892289- return rc;22902290-}22912291-EXPORT_SYMBOL_GPL(s390_enable_skey);22922292-22932293-/*22942294- * Reset CMMA state, make all pages stable again.22952295- */22962296-static int __s390_reset_cmma(pte_t *pte, unsigned long addr,22972297- unsigned long next, struct mm_walk *walk)22982298-{22992299- ptep_zap_unused(walk->mm, addr, pte, 1);23002300- return 0;23012301-}23022302-23032303-static const struct mm_walk_ops reset_cmma_walk_ops = {23042304- .pte_entry = __s390_reset_cmma,23052305- .walk_lock = PGWALK_WRLOCK,23062306-};23072307-23082308-void s390_reset_cmma(struct mm_struct *mm)23092309-{23102310- mmap_write_lock(mm);23112311- walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);23122312- mmap_write_unlock(mm);23132313-}23142314-EXPORT_SYMBOL_GPL(s390_reset_cmma);23152315-23162316-#define GATHER_GET_PAGES 3223172317-23182318-struct reset_walk_state {23192319- unsigned long next;23202320- unsigned long count;23212321- unsigned long pfns[GATHER_GET_PAGES];23222322-};23232323-23242324-static int s390_gather_pages(pte_t *ptep, unsigned long addr,23252325- unsigned long next, struct mm_walk *walk)23262326-{23272327- struct reset_walk_state *p = walk->private;23282328- pte_t pte = READ_ONCE(*ptep);23292329-23302330- if (pte_present(pte)) {23312331- /* we have a reference from the mapping, take an extra one */23322332- get_page(phys_to_page(pte_val(pte)));23332333- p->pfns[p->count] = phys_to_pfn(pte_val(pte));23342334- p->next = next;23352335- p->count++;23362336- }23372337- return p->count >= GATHER_GET_PAGES;23382338-}23392339-23402340-static const struct mm_walk_ops gather_pages_ops = {23412341- .pte_entry = s390_gather_pages,23422342- .walk_lock = PGWALK_RDLOCK,23432343-};23442344-23452345-/*23462346- * Call the Destroy secure page UVC on each page in the given array of PFNs.23472347- * Each page needs to have an extra reference, which will be released here.23482348- */23492349-void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)23502350-{23512351- struct folio *folio;23522352- unsigned long i;23532353-23542354- for (i = 0; i < count; i++) {23552355- folio = pfn_folio(pfns[i]);23562356- /* we always have an extra reference */23572357- uv_destroy_folio(folio);23582358- /* get rid of the extra reference */23592359- folio_put(folio);23602360- cond_resched();23612361- }23622362-}23632363-EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);23642364-23652365-/**23662366- * __s390_uv_destroy_range - Call the destroy secure page UVC on each page23672367- * in the given range of the given address space.23682368- * @mm: the mm to operate on23692369- * @start: the start of the range23702370- * @end: the end of the range23712371- * @interruptible: if not 0, stop when a fatal signal is received23722372- *23732373- * Walk the given range of the given address space and call the destroy23742374- * secure page UVC on each page. Optionally exit early if a fatal signal is23752375- * pending.23762376- *23772377- * Return: 0 on success, -EINTR if the function stopped before completing23782378- */23792379-int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,23802380- unsigned long end, bool interruptible)23812381-{23822382- struct reset_walk_state state = { .next = start };23832383- int r = 1;23842384-23852385- while (r > 0) {23862386- state.count = 0;23872387- mmap_read_lock(mm);23882388- r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);23892389- mmap_read_unlock(mm);23902390- cond_resched();23912391- s390_uv_destroy_pfns(state.count, state.pfns);23922392- if (interruptible && fatal_signal_pending(current))23932393- return -EINTR;23942394- }23952395- return 0;23962396-}23972397-EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);23982398-23992399-/**24002400- * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy24012401- * @gmap: the gmap whose ASCE needs to be replaced24022402- *24032403- * If the ASCE is a SEGMENT type then this function will return -EINVAL,24042404- * otherwise the pointers in the host_to_guest radix tree will keep pointing24052405- * to the wrong pages, causing use-after-free and memory corruption.24062406- * If the allocation of the new top level page table fails, the ASCE is not24072407- * replaced.24082408- * In any case, the old ASCE is always removed from the gmap CRST list.24092409- * Therefore the caller has to make sure to save a pointer to it24102410- * beforehand, unless a leak is actually intended.24112411- */24122412-int s390_replace_asce(struct gmap *gmap)24132413-{24142414- unsigned long asce;24152415- struct page *page;24162416- void *table;24172417-24182418- /* Replacing segment type ASCEs would cause serious issues */24192419- if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)24202420- return -EINVAL;24212421-24222422- page = gmap_alloc_crst();24232423- if (!page)24242424- return -ENOMEM;24252425- table = page_to_virt(page);24262426- memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));24272427-24282428- /* Set new table origin while preserving existing ASCE control bits */24292429- asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);24302430- WRITE_ONCE(gmap->asce, asce);24312431- WRITE_ONCE(gmap->mm->context.gmap_asce, asce);24322432- WRITE_ONCE(gmap->table, table);24332433-24342434- return 0;24352435-}24362436-EXPORT_SYMBOL_GPL(s390_replace_asce);