Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ptp: vmclock: support device notifications

Add optional support for device notifications in VMClock. When
supported, the hypervisor will send a device notification every time it
updates the seq_count to a new even value.

Moreover, add support for poll() in VMClock as a means to propagate this
notification to user space. poll() will return a POLLIN event to
listeners every time seq_count changes to a value different than the one
last seen (since open() or last read()/pread()). This means that when
poll() returns a POLLIN event, listeners need to use read() to observe
what has changed and update the reader's view of seq_count. In other
words, after a poll() returned, all subsequent calls to poll() will
immediately return with a POLLIN event until the listener calls read().

The device advertises support for the notification mechanism by setting
flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If
the flag is not present the driver won't setup the ACPI notification
handler and poll() will always immediately return POLLHUP.

Signed-off-by: Babis Chalios <bchalios@amazon.es>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Takahiro Itazuri <itazur@amazon.com>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Tested-by: Takahiro Itazuri <itazur@amazon.com>
Link: https://patch.msgid.link/20260130173704.12575-3-itazur@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Babis Chalios and committed by
Jakub Kicinski
3b1526dd 3495064b

+148 -19
+143 -19
drivers/ptp/ptp_vmclock.c
··· 5 5 * Copyright © 2024 Amazon.com, Inc. or its affiliates. 6 6 */ 7 7 8 + #include "linux/poll.h" 9 + #include "linux/types.h" 10 + #include "linux/wait.h" 8 11 #include <linux/acpi.h> 9 12 #include <linux/device.h> 10 13 #include <linux/err.h> ··· 42 39 struct resource res; 43 40 struct vmclock_abi *clk; 44 41 struct miscdevice miscdev; 42 + wait_queue_head_t disrupt_wait; 45 43 struct ptp_clock_info ptp_clock_info; 46 44 struct ptp_clock *ptp_clock; 47 45 enum clocksource_ids cs_id, sys_cs_id; ··· 361 357 return ptp_clock_register(&st->ptp_clock_info, dev); 362 358 } 363 359 360 + struct vmclock_file_state { 361 + struct vmclock_state *st; 362 + atomic_t seq; 363 + }; 364 + 364 365 static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) 365 366 { 366 - struct vmclock_state *st = container_of(fp->private_data, 367 - struct vmclock_state, miscdev); 367 + struct vmclock_file_state *fst = fp->private_data; 368 + struct vmclock_state *st = fst->st; 368 369 369 370 if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) 370 371 return -EROFS; ··· 388 379 static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, 389 380 size_t count, loff_t *ppos) 390 381 { 391 - struct vmclock_state *st = container_of(fp->private_data, 392 - struct vmclock_state, miscdev); 393 382 ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); 383 + struct vmclock_file_state *fst = fp->private_data; 384 + struct vmclock_state *st = fst->st; 385 + uint32_t seq, old_seq; 394 386 size_t max_count; 395 - uint32_t seq; 396 387 397 388 if (*ppos >= PAGE_SIZE) 398 389 return 0; ··· 401 392 if (count > max_count) 402 393 count = max_count; 403 394 395 + old_seq = atomic_read(&fst->seq); 404 396 while (1) { 405 397 seq = le32_to_cpu(st->clk->seq_count) & ~1U; 406 398 /* Pairs with hypervisor wmb */ ··· 412 402 413 403 /* Pairs with hypervisor wmb */ 414 404 virt_rmb(); 415 - if (seq == le32_to_cpu(st->clk->seq_count)) 416 - break; 405 + if (seq == le32_to_cpu(st->clk->seq_count)) { 406 + /* 407 + * Either we updated fst->seq to seq (the latest version we observed) 408 + * or someone else did (old_seq == seq), so we can break. 409 + */ 410 + if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) || 411 + old_seq == seq) { 412 + break; 413 + } 414 + } 417 415 418 416 if (ktime_after(ktime_get(), deadline)) 419 417 return -ETIMEDOUT; ··· 431 413 return count; 432 414 } 433 415 416 + static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait) 417 + { 418 + struct vmclock_file_state *fst = fp->private_data; 419 + struct vmclock_state *st = fst->st; 420 + uint32_t seq; 421 + 422 + /* 423 + * Hypervisor will not send us any notifications, so fail immediately 424 + * to avoid having caller sleeping for ever. 425 + */ 426 + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) 427 + return POLLHUP; 428 + 429 + poll_wait(fp, &st->disrupt_wait, wait); 430 + 431 + seq = le32_to_cpu(st->clk->seq_count); 432 + if (atomic_read(&fst->seq) != seq) 433 + return POLLIN | POLLRDNORM; 434 + 435 + return 0; 436 + } 437 + 438 + static int vmclock_miscdev_open(struct inode *inode, struct file *fp) 439 + { 440 + struct vmclock_state *st = container_of(fp->private_data, 441 + struct vmclock_state, miscdev); 442 + struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL); 443 + 444 + if (!fst) 445 + return -ENOMEM; 446 + 447 + fst->st = st; 448 + atomic_set(&fst->seq, 0); 449 + 450 + fp->private_data = fst; 451 + 452 + return 0; 453 + } 454 + 455 + static int vmclock_miscdev_release(struct inode *inode, struct file *fp) 456 + { 457 + kfree(fp->private_data); 458 + return 0; 459 + } 460 + 434 461 static const struct file_operations vmclock_miscdev_fops = { 435 462 .owner = THIS_MODULE, 463 + .open = vmclock_miscdev_open, 464 + .release = vmclock_miscdev_release, 436 465 .mmap = vmclock_miscdev_mmap, 437 466 .read = vmclock_miscdev_read, 467 + .poll = vmclock_miscdev_poll, 438 468 }; 439 469 440 470 /* module operations */ 441 - 442 - static void vmclock_remove(void *data) 443 - { 444 - struct vmclock_state *st = data; 445 - 446 - if (st->ptp_clock) 447 - ptp_clock_unregister(st->ptp_clock); 448 - 449 - if (st->miscdev.minor != MISC_DYNAMIC_MINOR) 450 - misc_deregister(&st->miscdev); 451 - } 452 471 453 472 static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) 454 473 { ··· 514 459 return AE_ERROR; 515 460 } 516 461 462 + static void 463 + vmclock_acpi_notification_handler(acpi_handle __always_unused handle, 464 + u32 __always_unused event, void *dev) 465 + { 466 + struct device *device = dev; 467 + struct vmclock_state *st = device->driver_data; 468 + 469 + wake_up_interruptible(&st->disrupt_wait); 470 + } 471 + 472 + static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) 473 + { 474 + struct acpi_device *adev = ACPI_COMPANION(dev); 475 + acpi_status status; 476 + 477 + /* 478 + * This should never happen as this function is only called when 479 + * has_acpi_companion(dev) is true, but the logic is sufficiently 480 + * complex that Coverity can't see the tautology. 481 + */ 482 + if (!adev) 483 + return -ENODEV; 484 + 485 + /* The device does not support notifications. Nothing else to do */ 486 + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) 487 + return 0; 488 + 489 + status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, 490 + vmclock_acpi_notification_handler, 491 + dev); 492 + if (ACPI_FAILURE(status)) { 493 + dev_err(dev, "failed to install notification handler"); 494 + return -ENODEV; 495 + } 496 + 497 + return 0; 498 + } 499 + 517 500 static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) 518 501 { 519 502 struct acpi_device *adev = ACPI_COMPANION(dev); ··· 573 480 } 574 481 575 482 return 0; 483 + } 484 + 485 + static void vmclock_remove(void *data) 486 + { 487 + struct device *dev = data; 488 + struct vmclock_state *st = dev->driver_data; 489 + 490 + if (!st) { 491 + dev_err(dev, "%s called with NULL driver_data", __func__); 492 + return; 493 + } 494 + 495 + if (has_acpi_companion(dev)) 496 + acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle, 497 + ACPI_DEVICE_NOTIFY, 498 + vmclock_acpi_notification_handler); 499 + 500 + if (st->ptp_clock) 501 + ptp_clock_unregister(st->ptp_clock); 502 + 503 + if (st->miscdev.minor != MISC_DYNAMIC_MINOR) 504 + misc_deregister(&st->miscdev); 505 + 506 + dev->driver_data = NULL; 576 507 } 577 508 578 509 static void vmclock_put_idx(void *data) ··· 662 545 663 546 st->miscdev.minor = MISC_DYNAMIC_MINOR; 664 547 665 - ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, st); 548 + init_waitqueue_head(&st->disrupt_wait); 549 + dev->driver_data = st; 550 + 551 + ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, dev); 552 + if (ret) 553 + return ret; 554 + 555 + ret = vmclock_setup_notification(dev, st); 666 556 if (ret) 667 557 return ret; 668 558
+5
include/uapi/linux/vmclock-abi.h
··· 121 121 * loaded from some save state (restored from a snapshot). 122 122 */ 123 123 #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) 124 + /* 125 + * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send 126 + * a notification every time it updates seq_count to a new even number. 127 + */ 128 + #define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) 124 129 125 130 __u8 pad[2]; 126 131 __u8 clock_status;