Skip to content

Commit 16a866a

Browse files
committed
NVIDIA: SAUCE: vfio/nvgrace-egm: register EGM PFNMAP range with memory_failure
EGM carveout memory is mapped directly into userspace (QEMU) and is not added to the kernel. It is not managed by the kernel page allocator and has no struct pages. The module can thus utilize the Linux memory manager's memory_failure mechanism for regions with no struct pages. The Linux MM code exposes register/unregister APIs allowing modules to register such memory regions for memory_failure handling. Register the EGM PFN range with the MM memory_failure infrastructure on open, and unregister it on the last close. Provide a PFN-to-VMA offset callback that validates the PFN is within the EGM region and the VMA, then converts it to a file offset and records the poisoned offset in the existing hashtable for reporting to userspace. Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
1 parent f79b17c commit 16a866a

File tree

1 file changed

+98
-1
lines changed
  • drivers/vfio/pci/nvgrace-gpu

1 file changed

+98
-1
lines changed

drivers/vfio/pci/nvgrace-gpu/egm.c

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/egm.h>
99
#include <linux/nvgrace-egm.h>
1010
#include <linux/vmalloc.h>
11+
#include <linux/memory-failure.h>
1112

1213
#define MAX_EGM_NODES 256
1314

@@ -26,6 +27,7 @@ struct egm_region {
2627
struct cdev cdev;
2728
struct list_head gpus;
2829
DECLARE_HASHTABLE(htbl, 0x10);
30+
struct pfn_address_space pfn_address_space;
2931
};
3032

3133
struct h_node {
@@ -37,11 +39,97 @@ static dev_t dev;
3739
static struct class *class;
3840
static struct list_head egm_list;
3941

42+
static int pfn_memregion_offset(struct egm_region *region,
43+
unsigned long pfn,
44+
pgoff_t *pfn_offset_in_region)
45+
{
46+
unsigned long start_pfn, num_pages;
47+
48+
start_pfn = PHYS_PFN(region->egmphys);
49+
num_pages = region->egmlength >> PAGE_SHIFT;
50+
51+
if (pfn < start_pfn || pfn >= start_pfn + num_pages)
52+
return -EFAULT;
53+
54+
*pfn_offset_in_region = pfn - start_pfn;
55+
56+
return 0;
57+
}
58+
59+
static int track_ecc_offset(struct egm_region *region,
60+
unsigned long mem_offset)
61+
{
62+
struct h_node *cur_page, *ecc_page;
63+
unsigned long bkt;
64+
65+
hash_for_each(region->htbl, bkt, cur_page, node) {
66+
if (cur_page->mem_offset == mem_offset)
67+
return 0;
68+
}
69+
70+
ecc_page = (struct h_node *)(vzalloc(sizeof(struct h_node)));
71+
if (!ecc_page)
72+
return -ENOMEM;
73+
74+
ecc_page->mem_offset = mem_offset;
75+
76+
hash_add(region->htbl, &ecc_page->node, ecc_page->mem_offset);
77+
78+
return 0;
79+
}
80+
81+
static int nvgrace_egm_pfn_to_vma_pgoff(struct vm_area_struct *vma,
82+
unsigned long pfn,
83+
pgoff_t *pgoff)
84+
{
85+
struct egm_region *region = vma->vm_file->private_data;
86+
pgoff_t vma_offset_in_region = vma->vm_pgoff &
87+
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
88+
pgoff_t pfn_offset_in_region;
89+
int ret;
90+
91+
ret = pfn_memregion_offset(region, pfn, &pfn_offset_in_region);
92+
if (ret)
93+
return ret;
94+
95+
/* Ensure PFN is not before VMA's start within the region */
96+
if (pfn_offset_in_region < vma_offset_in_region)
97+
return -EFAULT;
98+
99+
/* Calculate offset from VMA start */
100+
*pgoff = vma->vm_pgoff +
101+
(pfn_offset_in_region - vma_offset_in_region);
102+
103+
/* Track and save the poisoned offset */
104+
return track_ecc_offset(region, *pgoff << PAGE_SHIFT);
105+
}
106+
107+
static int
108+
nvgrace_egm_vfio_pci_register_pfn_range(struct inode *inode,
109+
struct egm_region *region)
110+
{
111+
int ret;
112+
unsigned long pfn, nr_pages;
113+
114+
pfn = PHYS_PFN(region->egmphys);
115+
nr_pages = region->egmlength >> PAGE_SHIFT;
116+
117+
region->pfn_address_space.node.start = pfn;
118+
region->pfn_address_space.node.last = pfn + nr_pages - 1;
119+
region->pfn_address_space.mapping = inode->i_mapping;
120+
region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_egm_pfn_to_vma_pgoff;
121+
122+
ret = register_pfn_address_space(&region->pfn_address_space);
123+
124+
return ret;
125+
}
126+
40127
static int nvgrace_egm_open(struct inode *inode, struct file *file)
41128
{
42129
void *memaddr;
43130
struct egm_region *region = container_of(inode->i_cdev,
44131
struct egm_region, cdev);
132+
int ret;
45133

46134
if (atomic_inc_return(&region->open_count) > 1)
47135
return 0;
@@ -56,6 +144,12 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)
56144
memunmap(memaddr);
57145
file->private_data = region;
58146

147+
ret = nvgrace_egm_vfio_pci_register_pfn_range(inode, region);
148+
if (ret && ret != -EOPNOTSUPP) {
149+
file->private_data = NULL;
150+
return ret;
151+
}
152+
59153
return 0;
60154
}
61155

@@ -64,8 +158,11 @@ static int nvgrace_egm_release(struct inode *inode, struct file *file)
64158
struct egm_region *region = container_of(inode->i_cdev,
65159
struct egm_region, cdev);
66160

67-
if (atomic_dec_and_test(&region->open_count))
161+
if (atomic_dec_and_test(&region->open_count)) {
162+
unregister_pfn_address_space(&region->pfn_address_space);
163+
68164
file->private_data = NULL;
165+
}
69166

70167
return 0;
71168
}

0 commit comments

Comments
 (0)