net: mana: Assign interrupts to CPUs based on NUMA nodes

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

In large VMs with multiple NUMA nodes, network performance is usually
best if network interrupts are all assigned to the same virtual NUMA
node. This patch assigns online CPU according to a numa aware policy,
local cpus are returned first, followed by non-local ones, then it wraps
around.

Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://lore.kernel.org/r/1667282761-11547-1-git-send-email-ssengar@linux.microsoft.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Saurabh Sengar and committed by

Paolo Abeni 3 years ago 71fa6887 6d6b39f1

+28 -3

2 changed files

expand all

drivers

net

ethernet

microsoft

mana

gdma.h

gdma_main.c

drivers/net/ethernet/microsoft/mana/gdma.h

··· 353 353 void __iomem *shm_base; 354 354 void __iomem *db_page_base; 355 355 u32 db_page_size; 356 + int numa_node; 356 357 357 358 /* Shared memory chanenl (used to bootstrap HWC) */ 358 359 struct shm_channel shm_channel;

+27 -3

drivers/net/ethernet/microsoft/mana/gdma_main.c

··· 1208 1208 struct gdma_context *gc = pci_get_drvdata(pdev); 1209 1209 struct gdma_irq_context *gic; 1210 1210 unsigned int max_irqs; 1211 + u16 *cpus; 1212 + cpumask_var_t req_mask; 1211 1213 int nvec, irq; 1212 - int err, i, j; 1214 + int err, i = 0, j; 1213 1215 1214 1216 if (max_queues_per_port > MANA_MAX_NUM_QUEUES) 1215 1217 max_queues_per_port = MANA_MAX_NUM_QUEUES; ··· 1230 1228 goto free_irq_vector; 1231 1229 } 1232 1230 1231 + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) { 1232 + err = -ENOMEM; 1233 + goto free_irq; 1234 + } 1235 + 1236 + cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL); 1237 + if (!cpus) { 1238 + err = -ENOMEM; 1239 + goto free_mask; 1240 + } 1241 + for (i = 0; i < nvec; i++) 1242 + cpus[i] = cpumask_local_spread(i, gc->numa_node); 1243 + 1233 1244 for (i = 0; i < nvec; i++) { 1245 + cpumask_set_cpu(cpus[i], req_mask); 1234 1246 gic = &gc->irq_contexts[i]; 1235 1247 gic->handler = NULL; 1236 1248 gic->arg = NULL; ··· 1252 1236 irq = pci_irq_vector(pdev, i); 1253 1237 if (irq < 0) { 1254 1238 err = irq; 1255 - goto free_irq; 1239 + goto free_mask; 1256 1240 } 1257 1241 1258 1242 err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); 1259 1243 if (err) 1260 - goto free_irq; 1244 + goto free_mask; 1245 + irq_set_affinity_and_hint(irq, req_mask); 1246 + cpumask_clear(req_mask); 1261 1247 } 1248 + free_cpumask_var(req_mask); 1249 + kfree(cpus); 1262 1250 1263 1251 err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); 1264 1252 if (err) ··· 1273 1253 1274 1254 return 0; 1275 1255 1256 + free_mask: 1257 + free_cpumask_var(req_mask); 1258 + kfree(cpus); 1276 1259 free_irq: 1277 1260 for (j = i - 1; j >= 0; j--) { 1278 1261 irq = pci_irq_vector(pdev, j); ··· 1405 1382 if (!bar0_va) 1406 1383 goto free_gc; 1407 1384 1385 + gc->numa_node = dev_to_node(&pdev->dev); 1408 1386 gc->is_pf = mana_is_pf(pdev->device); 1409 1387 gc->bar0_va = bar0_va; 1410 1388 gc->dev = &pdev->dev;

Configure Feed

Configure Feed