Merge tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger:

- Removal of dead code (TT mode leftovers, etc)

- Fixes for the network vector driver

- Fixes for time-travel mode

* tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux:
um: fix time-travel syscall scheduling hack
um: Remove outdated asm/sysrq.h header
um: Remove the declaration of user_thread function
um: Remove the call to SUBARCH_EXECVE1 macro
um: Remove unused mm_fd field from mm_id
um: Remove unused fields from thread_struct
um: Remove the redundant newpage check in update_pte_range
um: Remove unused kpte_clear_flush macro
um: Remove obsoleted declaration for execute_syscall_skas
user_mode_linux_howto_v2: add VDE vector support in doc
vector_user: add VDE support
um: remove ARCH_NO_PREEMPT_DYNAMIC
um: vector: Fix NAPI budget handling
um: vector: Replace locks guarding queue depth with atomics
um: remove variable stack array in os_rcv_fd_msg()

Linus Torvalds 2 years ago 12cc5240 0c33037c

+285 -188

24 changed files

expand all collapse all

Documentation

virt

uml

user_mode_linux_howto_v2.rst

arch

Kconfig

drivers

vector_kern.c

vector_kern.h

vector_user.c

include

asm

pgtable.h

processor-generic.h

sysrq.h

shared

skas

mm_id.h

skas.h

kernel

exec.c

process.c

reboot.c

skas

mmu.c

process.c

syscall.c

sysrq.c

time.c

tlb.c

os-Linux

file.c

skas

mem.c

process.c

x86

sysrq_32.c

sysrq_64.c

+37

Documentation/virt/uml/user_mode_linux_howto_v2.rst

reviewed

··· 217 217 +-----------+--------+------------------------------------+------------+ 218 218 | fd | vector | dependent on fd type | varies | 219 219 +-----------+--------+------------------------------------+------------+ 220 220 + | vde | vector | dep. on VDE VPN: Virt.Net Locator | varies | 221 221 + +-----------+--------+------------------------------------+------------+ 220 222 | tuntap | legacy | none | ~ 500Mbit | 221 223 +-----------+--------+------------------------------------+------------+ 222 224 | daemon | legacy | none | ~ 450Mbit | ··· 574 572 https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports 575 573 576 574 BESS transport does not require any special privileges. 575 575 + 576 576 + VDE vector transport 577 577 + -------------------- 578 578 + 579 579 + Virtual Distributed Ethernet (VDE) is a project whose main goal is to provide a 580 580 + highly flexible support for virtual networking. 581 581 + 582 582 + http://wiki.virtualsquare.org/#/tutorials/vdebasics 583 583 + 584 584 + Common usages of VDE include fast prototyping and teaching. 585 585 + 586 586 + Examples: 587 587 + 588 588 + ``vecX:transport=vde,vnl=tap://tap0`` 589 589 + 590 590 + use tap0 591 591 + 592 592 + ``vecX:transport=vde,vnl=slirp://`` 593 593 + 594 594 + use slirp 595 595 + 596 596 + ``vec0:transport=vde,vnl=vde:///tmp/switch`` 597 597 + 598 598 + connect to a vde switch 599 599 + 600 600 + ``vecX:transport=\"vde,vnl=cmd://ssh remote.host //tmp/sshlirp\"`` 601 601 + 602 602 + connect to a remote slirp (instant VPN: convert ssh to VPN, it uses sshlirp) 603 603 + https://github.com/virtualsquare/sshlirp 604 604 + 605 605 + ``vec0:transport=vde,vnl=vxvde://234.0.0.1`` 606 606 + 607 607 + connect to a local area cloud (all the UML nodes using the same 608 608 + multicast address running on hosts in the same multicast domain (LAN) 609 609 + will be automagically connected together to a virtual LAN. 577 610 578 611 Configuring Legacy transports 579 612 =============================

-1

arch/um/Kconfig

reviewed

··· 11 11 select ARCH_HAS_KCOV 12 12 select ARCH_HAS_STRNCPY_FROM_USER 13 13 select ARCH_HAS_STRNLEN_USER 14 14 - select ARCH_NO_PREEMPT_DYNAMIC 15 14 select HAVE_ARCH_AUDITSYSCALL 16 15 select HAVE_ARCH_KASAN if X86_64 17 16 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN

+109 -103

arch/um/drivers/vector_kern.c

reviewed

··· 22 22 #include <linux/interrupt.h> 23 23 #include <linux/firmware.h> 24 24 #include <linux/fs.h> 25 25 + #include <asm/atomic.h> 25 26 #include <uapi/linux/filter.h> 26 27 #include <init.h> 27 28 #include <irq_kern.h> ··· 103 102 104 103 static void vector_reset_stats(struct vector_private *vp) 105 104 { 105 105 + /* We reuse the existing queue locks for stats */ 106 106 + 107 107 + /* RX stats are modified with RX head_lock held 108 108 + * in vector_poll. 109 109 + */ 110 110 + 111 111 + spin_lock(&vp->rx_queue->head_lock); 106 112 vp->estats.rx_queue_max = 0; 107 113 vp->estats.rx_queue_running_average = 0; 108 108 - vp->estats.tx_queue_max = 0; 109 109 - vp->estats.tx_queue_running_average = 0; 110 114 vp->estats.rx_encaps_errors = 0; 115 115 + vp->estats.sg_ok = 0; 116 116 + vp->estats.sg_linearized = 0; 117 117 + spin_unlock(&vp->rx_queue->head_lock); 118 118 + 119 119 + /* TX stats are modified with TX head_lock held 120 120 + * in vector_send. 121 121 + */ 122 122 + 123 123 + spin_lock(&vp->tx_queue->head_lock); 111 124 vp->estats.tx_timeout_count = 0; 112 125 vp->estats.tx_restart_queue = 0; 113 126 vp->estats.tx_kicks = 0; 114 127 vp->estats.tx_flow_control_xon = 0; 115 128 vp->estats.tx_flow_control_xoff = 0; 116 116 - vp->estats.sg_ok = 0; 117 117 - vp->estats.sg_linearized = 0; 129 129 + vp->estats.tx_queue_max = 0; 130 130 + vp->estats.tx_queue_running_average = 0; 131 131 + spin_unlock(&vp->tx_queue->head_lock); 118 132 } 119 133 120 134 static int get_mtu(struct arglist *def) ··· 248 232 249 233 static char *drop_buffer; 250 234 251 251 - /* Array backed queues optimized for bulk enqueue/dequeue and 252 252 - * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. 253 253 - * For more details and full design rationale see 254 254 - * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt 255 255 - */ 256 256 - 257 235 258 236 /* 259 237 * Advance the mmsg queue head by n = advance. Resets the queue to ··· 257 247 258 248 static int vector_advancehead(struct vector_queue *qi, int advance) 259 249 { 260 260 - int queue_depth; 261 261 - 262 250 qi->head = 263 251 (qi->head + advance) 264 252 % qi->max_depth; 265 253 266 254 267 267 - spin_lock(&qi->tail_lock); 268 268 - qi->queue_depth -= advance; 269 269 - 270 270 - /* we are at 0, use this to 271 271 - * reset head and tail so we can use max size vectors 272 272 - */ 273 273 - 274 274 - if (qi->queue_depth == 0) { 275 275 - qi->head = 0; 276 276 - qi->tail = 0; 277 277 - } 278 278 - queue_depth = qi->queue_depth; 279 279 - spin_unlock(&qi->tail_lock); 280 280 - return queue_depth; 255 255 + atomic_sub(advance, &qi->queue_depth); 256 256 + return atomic_read(&qi->queue_depth); 281 257 } 282 258 283 259 /* Advance the queue tail by n = advance. ··· 273 277 274 278 static int vector_advancetail(struct vector_queue *qi, int advance) 275 279 { 276 276 - int queue_depth; 277 277 - 278 280 qi->tail = 279 281 (qi->tail + advance) 280 282 % qi->max_depth; 281 281 - spin_lock(&qi->head_lock); 282 282 - qi->queue_depth += advance; 283 283 - queue_depth = qi->queue_depth; 284 284 - spin_unlock(&qi->head_lock); 285 285 - return queue_depth; 283 283 + atomic_add(advance, &qi->queue_depth); 284 284 + return atomic_read(&qi->queue_depth); 286 285 } 287 286 288 287 static int prep_msg(struct vector_private *vp, ··· 330 339 int iov_count; 331 340 332 341 spin_lock(&qi->tail_lock); 333 333 - spin_lock(&qi->head_lock); 334 334 - queue_depth = qi->queue_depth; 335 335 - spin_unlock(&qi->head_lock); 342 342 + queue_depth = atomic_read(&qi->queue_depth); 336 343 337 344 if (skb) 338 345 packet_len = skb->len; ··· 349 360 mmsg_vector->msg_hdr.msg_iovlen = iov_count; 350 361 mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; 351 362 mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; 363 363 + wmb(); /* Make the packet visible to the NAPI poll thread */ 352 364 queue_depth = vector_advancetail(qi, 1); 353 365 } else 354 366 goto drop; ··· 388 398 } 389 399 390 400 /* 391 391 - * Generic vector deque via sendmmsg with support for forming headers 401 401 + * Generic vector dequeue via sendmmsg with support for forming headers 392 402 * using transport specific callback. Allows GRE, L2TPv3, RAW and 393 403 * other transports to use a common dequeue procedure in vector mode 394 404 */ ··· 398 408 { 399 409 struct vector_private *vp = netdev_priv(qi->dev); 400 410 struct mmsghdr *send_from; 401 401 - int result = 0, send_len, queue_depth = qi->max_depth; 411 411 + int result = 0, send_len; 402 412 403 413 if (spin_trylock(&qi->head_lock)) { 404 404 - if (spin_trylock(&qi->tail_lock)) { 405 405 - /* update queue_depth to current value */ 406 406 - queue_depth = qi->queue_depth; 407 407 - spin_unlock(&qi->tail_lock); 408 408 - while (queue_depth > 0) { 409 409 - /* Calculate the start of the vector */ 410 410 - send_len = queue_depth; 411 411 - send_from = qi->mmsg_vector; 412 412 - send_from += qi->head; 413 413 - /* Adjust vector size if wraparound */ 414 414 - if (send_len + qi->head > qi->max_depth) 415 415 - send_len = qi->max_depth - qi->head; 416 416 - /* Try to TX as many packets as possible */ 417 417 - if (send_len > 0) { 418 418 - result = uml_vector_sendmmsg( 419 419 - vp->fds->tx_fd, 420 420 - send_from, 421 421 - send_len, 422 422 - 0 423 423 - ); 424 424 - vp->in_write_poll = 425 425 - (result != send_len); 426 426 - } 427 427 - /* For some of the sendmmsg error scenarios 428 428 - * we may end being unsure in the TX success 429 429 - * for all packets. It is safer to declare 430 430 - * them all TX-ed and blame the network. 414 414 + /* update queue_depth to current value */ 415 415 + while (atomic_read(&qi->queue_depth) > 0) { 416 416 + /* Calculate the start of the vector */ 417 417 + send_len = atomic_read(&qi->queue_depth); 418 418 + send_from = qi->mmsg_vector; 419 419 + send_from += qi->head; 420 420 + /* Adjust vector size if wraparound */ 421 421 + if (send_len + qi->head > qi->max_depth) 422 422 + send_len = qi->max_depth - qi->head; 423 423 + /* Try to TX as many packets as possible */ 424 424 + if (send_len > 0) { 425 425 + result = uml_vector_sendmmsg( 426 426 + vp->fds->tx_fd, 427 427 + send_from, 428 428 + send_len, 429 429 + 0 430 430 + ); 431 431 + vp->in_write_poll = 432 432 + (result != send_len); 433 433 + } 434 434 + /* For some of the sendmmsg error scenarios 435 435 + * we may end being unsure in the TX success 436 436 + * for all packets. It is safer to declare 437 437 + * them all TX-ed and blame the network. 438 438 + */ 439 439 + if (result < 0) { 440 440 + if (net_ratelimit()) 441 441 + netdev_err(vp->dev, "sendmmsg err=%i\n", 442 442 + result); 443 443 + vp->in_error = true; 444 444 + result = send_len; 445 445 + } 446 446 + if (result > 0) { 447 447 + consume_vector_skbs(qi, result); 448 448 + /* This is equivalent to an TX IRQ. 449 449 + * Restart the upper layers to feed us 450 450 + * more packets. 431 451 */ 432 432 - if (result < 0) { 433 433 - if (net_ratelimit()) 434 434 - netdev_err(vp->dev, "sendmmsg err=%i\n", 435 435 - result); 436 436 - vp->in_error = true; 437 437 - result = send_len; 438 438 - } 439 439 - if (result > 0) { 440 440 - queue_depth = 441 441 - consume_vector_skbs(qi, result); 442 442 - /* This is equivalent to an TX IRQ. 443 443 - * Restart the upper layers to feed us 444 444 - * more packets. 445 445 - */ 446 446 - if (result > vp->estats.tx_queue_max) 447 447 - vp->estats.tx_queue_max = result; 448 448 - vp->estats.tx_queue_running_average = 449 449 - (vp->estats.tx_queue_running_average + result) >> 1; 450 450 - } 451 451 - netif_wake_queue(qi->dev); 452 452 - /* if TX is busy, break out of the send loop, 453 453 - * poll write IRQ will reschedule xmit for us 454 454 - */ 455 455 - if (result != send_len) { 456 456 - vp->estats.tx_restart_queue++; 457 457 - break; 458 458 - } 452 452 + if (result > vp->estats.tx_queue_max) 453 453 + vp->estats.tx_queue_max = result; 454 454 + vp->estats.tx_queue_running_average = 455 455 + (vp->estats.tx_queue_running_average + result) >> 1; 456 456 + } 457 457 + netif_wake_queue(qi->dev); 458 458 + /* if TX is busy, break out of the send loop, 459 459 + * poll write IRQ will reschedule xmit for us. 460 460 + */ 461 461 + if (result != send_len) { 462 462 + vp->estats.tx_restart_queue++; 463 463 + break; 459 464 } 460 465 } 461 466 spin_unlock(&qi->head_lock); 462 467 } 463 463 - return queue_depth; 468 468 + return atomic_read(&qi->queue_depth); 464 469 } 465 470 466 471 /* Queue destructor. Deliberately stateless so we can use ··· 574 589 } 575 590 spin_lock_init(&result->head_lock); 576 591 spin_lock_init(&result->tail_lock); 577 577 - result->queue_depth = 0; 592 592 + atomic_set(&result->queue_depth, 0); 578 593 result->head = 0; 579 594 result->tail = 0; 580 595 return result; ··· 653 668 } 654 669 655 670 656 656 - /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ 671 671 + /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */ 657 672 658 673 static void prep_queue_for_rx(struct vector_queue *qi) 659 674 { 660 675 struct vector_private *vp = netdev_priv(qi->dev); 661 676 struct mmsghdr *mmsg_vector = qi->mmsg_vector; 662 677 void **skbuff_vector = qi->skbuff_vector; 663 663 - int i; 678 678 + int i, queue_depth; 664 679 665 665 - if (qi->queue_depth == 0) 680 680 + queue_depth = atomic_read(&qi->queue_depth); 681 681 + 682 682 + if (queue_depth == 0) 666 683 return; 667 667 - for (i = 0; i < qi->queue_depth; i++) { 684 684 + 685 685 + /* RX is always emptied 100% during each cycle, so we do not 686 686 + * have to do the tail wraparound math for it. 687 687 + */ 688 688 + 689 689 + qi->head = qi->tail = 0; 690 690 + 691 691 + for (i = 0; i < queue_depth; i++) { 668 692 /* it is OK if allocation fails - recvmmsg with NULL data in 669 693 * iov argument still performs an RX, just drops the packet 670 694 * This allows us stop faffing around with a "drop buffer" ··· 683 689 skbuff_vector++; 684 690 mmsg_vector++; 685 691 } 686 686 - qi->queue_depth = 0; 692 692 + atomic_set(&qi->queue_depth, 0); 687 693 } 688 694 689 695 static struct vector_device *find_device(int n) ··· 966 972 budget = qi->max_depth; 967 973 968 974 packet_count = uml_vector_recvmmsg( 969 969 - vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); 975 975 + vp->fds->rx_fd, qi->mmsg_vector, budget, 0); 970 976 971 977 if (packet_count < 0) 972 978 vp->in_error = true; ··· 979 985 * many do we need to prep the next time prep_queue_for_rx() is called. 980 986 */ 981 987 982 982 - qi->queue_depth = packet_count; 988 988 + atomic_add(packet_count, &qi->queue_depth); 983 989 984 990 for (i = 0; i < packet_count; i++) { 985 991 skb = (*skbuff_vector); ··· 1166 1172 1167 1173 if ((vp->options & VECTOR_TX) != 0) 1168 1174 tx_enqueued = (vector_send(vp->tx_queue) > 0); 1175 1175 + spin_lock(&vp->rx_queue->head_lock); 1169 1176 if ((vp->options & VECTOR_RX) > 0) 1170 1177 err = vector_mmsg_rx(vp, budget); 1171 1178 else { ··· 1174 1179 if (err > 0) 1175 1180 err = 1; 1176 1181 } 1182 1182 + spin_unlock(&vp->rx_queue->head_lock); 1177 1183 if (err > 0) 1178 1184 work_done += err; 1179 1185 1180 1186 if (tx_enqueued || err > 0) 1181 1187 napi_schedule(napi); 1182 1182 - if (work_done < budget) 1188 1188 + if (work_done <= budget) 1183 1189 napi_complete_done(napi, work_done); 1184 1190 return work_done; 1185 1191 } ··· 1221 1225 vp->rx_header_size, 1222 1226 MAX_IOV_SIZE 1223 1227 ); 1224 1224 - vp->rx_queue->queue_depth = get_depth(vp->parsed); 1228 1228 + atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed)); 1225 1229 } else { 1226 1230 vp->header_rxbuffer = kmalloc( 1227 1231 vp->rx_header_size, ··· 1463 1467 { 1464 1468 struct vector_private *vp = netdev_priv(dev); 1465 1469 1470 1470 + /* Stats are modified in the dequeue portions of 1471 1471 + * rx/tx which are protected by the head locks 1472 1472 + * grabbing these locks here ensures they are up 1473 1473 + * to date. 1474 1474 + */ 1475 1475 + 1476 1476 + spin_lock(&vp->tx_queue->head_lock); 1477 1477 + spin_lock(&vp->rx_queue->head_lock); 1466 1478 memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); 1479 1479 + spin_unlock(&vp->rx_queue->head_lock); 1480 1480 + spin_unlock(&vp->tx_queue->head_lock); 1467 1481 } 1468 1482 1469 1483 static int vector_get_coalesce(struct net_device *netdev,

+3 -1

arch/um/drivers/vector_kern.h

reviewed

··· 14 14 #include <linux/ctype.h> 15 15 #include <linux/workqueue.h> 16 16 #include <linux/interrupt.h> 17 17 + #include <asm/atomic.h> 17 18 18 19 #include "vector_user.h" 19 20 ··· 45 44 struct net_device *dev; 46 45 spinlock_t head_lock; 47 46 spinlock_t tail_lock; 48 48 - int queue_depth, head, tail, max_depth, max_iov_frags; 47 47 + atomic_t queue_depth; 48 48 + int head, tail, max_depth, max_iov_frags; 49 49 short options; 50 50 }; 51 51

+83

arch/um/drivers/vector_user.c

reviewed

··· 46 46 #define TRANS_FD "fd" 47 47 #define TRANS_FD_LEN strlen(TRANS_FD) 48 48 49 49 + #define TRANS_VDE "vde" 50 50 + #define TRANS_VDE_LEN strlen(TRANS_VDE) 51 51 + 49 52 #define VNET_HDR_FAIL "could not enable vnet headers on fd %d" 50 53 #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" 51 54 #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" ··· 437 434 return NULL; 438 435 } 439 436 437 437 + /* enough char to store an int type */ 438 438 + #define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2) 439 439 + #define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3) 440 440 + /* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */ 441 441 + #define VDE_MAX_ARGC 12 442 442 + #define VDE_SEQPACKET_HEAD "seqpacket://" 443 443 + #define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1) 444 444 + #define VDE_DEFAULT_DESCRIPTION "UML" 445 445 + 446 446 + static struct vector_fds *user_init_vde_fds(struct arglist *ifspec) 447 447 + { 448 448 + char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1]; 449 449 + char *argv[VDE_MAX_ARGC] = {"vde_plug"}; 450 450 + int argc = 1; 451 451 + int rv; 452 452 + int sv[2]; 453 453 + struct vector_fds *result = NULL; 454 454 + 455 455 + char *vnl = uml_vector_fetch_arg(ifspec,"vnl"); 456 456 + char *descr = uml_vector_fetch_arg(ifspec,"descr"); 457 457 + char *port = uml_vector_fetch_arg(ifspec,"port"); 458 458 + char *mode = uml_vector_fetch_arg(ifspec,"mode"); 459 459 + char *group = uml_vector_fetch_arg(ifspec,"group"); 460 460 + if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION; 461 461 + 462 462 + argv[argc++] = "--descr"; 463 463 + argv[argc++] = descr; 464 464 + if (port != NULL) { 465 465 + argv[argc++] = "--port2"; 466 466 + argv[argc++] = port; 467 467 + } 468 468 + if (mode != NULL) { 469 469 + argv[argc++] = "--mod2"; 470 470 + argv[argc++] = mode; 471 471 + } 472 472 + if (group != NULL) { 473 473 + argv[argc++] = "--group2"; 474 474 + argv[argc++] = group; 475 475 + } 476 476 + argv[argc++] = seqpacketvnl; 477 477 + argv[argc++] = vnl; 478 478 + argv[argc++] = NULL; 479 479 + 480 480 + rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv); 481 481 + if (rv < 0) { 482 482 + printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno); 483 483 + return NULL; 484 484 + } 485 485 + rv = os_set_exec_close(sv[0]); 486 486 + if (rv < 0) { 487 487 + printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno); 488 488 + goto vde_cleanup_sv; 489 489 + } 490 490 + snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]); 491 491 + 492 492 + run_helper(NULL, NULL, argv); 493 493 + 494 494 + close(sv[1]); 495 495 + 496 496 + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); 497 497 + if (result == NULL) { 498 498 + printk(UM_KERN_ERR "fd open: allocation failed"); 499 499 + goto vde_cleanup; 500 500 + } 501 501 + 502 502 + result->rx_fd = sv[0]; 503 503 + result->tx_fd = sv[0]; 504 504 + result->remote_addr_size = 0; 505 505 + result->remote_addr = NULL; 506 506 + return result; 507 507 + 508 508 + vde_cleanup_sv: 509 509 + close(sv[1]); 510 510 + vde_cleanup: 511 511 + close(sv[0]); 512 512 + return NULL; 513 513 + } 514 514 + 440 515 static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) 441 516 { 442 517 int rxfd = -1, txfd = -1; ··· 754 673 return user_init_unix_fds(parsed, ID_BESS); 755 674 if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) 756 675 return user_init_fd_fds(parsed); 676 676 + if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0) 677 677 + return user_init_vde_fds(parsed); 757 678 return NULL; 758 679 } 759 680

-7

arch/um/include/asm/pgtable.h

reviewed

··· 359 359 return pte; 360 360 } 361 361 362 362 - /* Clear a kernel PTE and flush it from the TLB */ 363 363 - #define kpte_clear_flush(ptep, vaddr) \ 364 364 - do { \ 365 365 - pte_clear(&init_mm, (vaddr), (ptep)); \ 366 366 - __flush_tlb_one((vaddr)); \ 367 367 - } while (0) 368 368 - 369 362 #endif

+5 -15

arch/um/include/asm/processor-generic.h

reviewed

··· 28 28 struct arch_thread arch; 29 29 jmp_buf switch_buf; 30 30 struct { 31 31 - int op; 32 32 - union { 33 33 - struct { 34 34 - int pid; 35 35 - } fork, exec; 36 36 - struct { 37 37 - int (*proc)(void *); 38 38 - void *arg; 39 39 - } thread; 40 40 - struct { 41 41 - void (*proc)(void *); 42 42 - void *arg; 43 43 - } cb; 44 44 - } u; 31 31 + struct { 32 32 + int (*proc)(void *); 33 33 + void *arg; 34 34 + } thread; 45 35 } request; 46 36 }; 47 37 ··· 41 51 .fault_addr = NULL, \ 42 52 .prev_sched = NULL, \ 43 53 .arch = INIT_ARCH_THREAD, \ 44 44 - .request = { 0 } \ 54 54 + .request = { } \ 45 55 } 46 56 47 57 /*

-8

arch/um/include/asm/sysrq.h

reviewed

··· 1 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 2 - #ifndef __UM_SYSRQ_H 3 3 - #define __UM_SYSRQ_H 4 4 - 5 5 - struct task_struct; 6 6 - extern void show_trace(struct task_struct* task, unsigned long *stack); 7 7 - 8 8 - #endif

+1 -4

arch/um/include/shared/skas/mm_id.h

reviewed

··· 7 7 #define __MM_ID_H 8 8 9 9 struct mm_id { 10 10 - union { 11 11 - int mm_fd; 12 12 - int pid; 13 13 - } u; 10 10 + int pid; 14 11 unsigned long stack; 15 12 int syscall_data_len; 16 13 };

-2

arch/um/include/shared/skas/skas.h

reviewed

··· 10 10 11 11 extern int userspace_pid[]; 12 12 13 13 - extern int user_thread(unsigned long stack, int flags); 14 13 extern void new_thread_handler(void); 15 14 extern void handle_syscall(struct uml_pt_regs *regs); 16 16 - extern long execute_syscall_skas(void *r); 17 15 extern unsigned long current_stub_stack(void); 18 16 extern struct mm_id *current_mm_id(void); 19 17 extern void current_mm_sync(void);

-3

arch/um/kernel/exec.c

reviewed

··· 35 35 PT_REGS_IP(regs) = eip; 36 36 PT_REGS_SP(regs) = esp; 37 37 clear_thread_flag(TIF_SINGLESTEP); 38 38 - #ifdef SUBARCH_EXECVE1 39 39 - SUBARCH_EXECVE1(regs->regs); 40 40 - #endif 41 38 } 42 39 EXPORT_SYMBOL(start_thread);

+4 -4

arch/um/kernel/process.c

reviewed

··· 109 109 schedule_tail(current->thread.prev_sched); 110 110 current->thread.prev_sched = NULL; 111 111 112 112 - fn = current->thread.request.u.thread.proc; 113 113 - arg = current->thread.request.u.thread.arg; 112 112 + fn = current->thread.request.thread.proc; 113 113 + arg = current->thread.request.thread.arg; 114 114 115 115 /* 116 116 * callback returns only if the kernel thread execs a process ··· 158 158 arch_copy_thread(&current->thread.arch, &p->thread.arch); 159 159 } else { 160 160 get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); 161 161 - p->thread.request.u.thread.proc = args->fn; 162 162 - p->thread.request.u.thread.arg = args->fn_arg; 161 161 + p->thread.request.thread.proc = args->fn; 162 162 + p->thread.request.thread.arg = args->fn_arg; 163 163 handler = new_thread_handler; 164 164 } 165 165

+1 -1

arch/um/kernel/reboot.c

reviewed

··· 29 29 t = find_lock_task_mm(p); 30 30 if (!t) 31 31 continue; 32 32 - pid = t->mm->context.id.u.pid; 32 32 + pid = t->mm->context.id.pid; 33 33 task_unlock(t); 34 34 os_kill_ptraced_process(pid, 1); 35 35 }

+6 -6

arch/um/kernel/skas/mmu.c

reviewed

··· 32 32 new_id->stack = stack; 33 33 34 34 block_signals_trace(); 35 35 - new_id->u.pid = start_userspace(stack); 35 35 + new_id->pid = start_userspace(stack); 36 36 unblock_signals_trace(); 37 37 38 38 - if (new_id->u.pid < 0) { 39 39 - ret = new_id->u.pid; 38 38 + if (new_id->pid < 0) { 39 39 + ret = new_id->pid; 40 40 goto out_free; 41 41 } 42 42 ··· 83 83 * whole UML suddenly dying. Also, cover negative and 84 84 * 1 cases, since they shouldn't happen either. 85 85 */ 86 86 - if (mmu->id.u.pid < 2) { 86 86 + if (mmu->id.pid < 2) { 87 87 printk(KERN_ERR "corrupt mm_context - pid = %d\n", 88 88 - mmu->id.u.pid); 88 88 + mmu->id.pid); 89 89 return; 90 90 } 91 91 - os_kill_ptraced_process(mmu->id.u.pid, 1); 91 91 + os_kill_ptraced_process(mmu->id.pid, 1); 92 92 93 93 free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); 94 94 }

+2 -2

arch/um/kernel/skas/process.c

reviewed

··· 39 39 40 40 init_new_thread_signals(); 41 41 42 42 - init_task.thread.request.u.thread.proc = start_kernel_proc; 43 43 - init_task.thread.request.u.thread.arg = NULL; 42 42 + init_task.thread.request.thread.proc = start_kernel_proc; 43 43 + init_task.thread.request.thread.arg = NULL; 44 44 return start_idle_thread(task_stack_page(&init_task), 45 45 &init_task.thread.switch_buf); 46 46 }

+20 -14

arch/um/kernel/skas/syscall.c

reviewed

··· 12 12 #include <sysdep/syscalls.h> 13 13 #include <linux/time-internal.h> 14 14 #include <asm/unistd.h> 15 15 + #include <asm/delay.h> 15 16 16 17 void handle_syscall(struct uml_pt_regs *r) 17 18 { 18 19 struct pt_regs *regs = container_of(r, struct pt_regs, regs); 19 20 int syscall; 20 20 - 21 21 - /* 22 22 - * If we have infinite CPU resources, then make every syscall also a 23 23 - * preemption point, since we don't have any other preemption in this 24 24 - * case, and kernel threads would basically never run until userspace 25 25 - * went to sleep, even if said userspace interacts with the kernel in 26 26 - * various ways. 27 27 - */ 28 28 - if (time_travel_mode == TT_MODE_INFCPU || 29 29 - time_travel_mode == TT_MODE_EXTERNAL) 30 30 - schedule(); 31 21 32 22 /* Initialize the syscall number and default return value. */ 33 23 UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); ··· 31 41 goto out; 32 42 33 43 syscall = UPT_SYSCALL_NR(r); 34 34 - if (syscall >= 0 && syscall < __NR_syscalls) 35 35 - PT_REGS_SET_SYSCALL_RETURN(regs, 36 36 - EXECUTE_SYSCALL(syscall, regs)); 44 44 + if (syscall >= 0 && syscall < __NR_syscalls) { 45 45 + unsigned long ret = EXECUTE_SYSCALL(syscall, regs); 46 46 + 47 47 + PT_REGS_SET_SYSCALL_RETURN(regs, ret); 48 48 + 49 49 + /* 50 50 + * An error value here can be some form of -ERESTARTSYS 51 51 + * and then we'd just loop. Make any error syscalls take 52 52 + * some time, so that it won't just loop if something is 53 53 + * not ready, and hopefully other things will make some 54 54 + * progress. 55 55 + */ 56 56 + if (IS_ERR_VALUE(ret) && 57 57 + (time_travel_mode == TT_MODE_INFCPU || 58 58 + time_travel_mode == TT_MODE_EXTERNAL)) { 59 59 + um_udelay(1); 60 60 + schedule(); 61 61 + } 62 62 + } 37 63 38 64 out: 39 65 syscall_trace_leave(regs);

-1

arch/um/kernel/sysrq.c

reviewed

··· 11 11 #include <linux/sched/debug.h> 12 12 #include <linux/sched/task_stack.h> 13 13 14 14 - #include <asm/sysrq.h> 15 14 #include <asm/stacktrace.h> 16 15 #include <os.h> 17 16

+1 -1

arch/um/kernel/time.c

reviewed

··· 839 839 if (get_current()->mm != NULL) 840 840 { 841 841 /* userspace - relay signal, results in correct userspace timers */ 842 842 - os_alarm_process(get_current()->mm->context.id.u.pid); 842 842 + os_alarm_process(get_current()->mm->context.id.pid); 843 843 } 844 844 845 845 (*timer_clockevent.event_handler)(&timer_clockevent);

+5 -9

arch/um/kernel/tlb.c

reviewed

··· 82 82 (x ? UM_PROT_EXEC : 0)); 83 83 if (pte_newpage(*pte)) { 84 84 if (pte_present(*pte)) { 85 85 - if (pte_newpage(*pte)) { 86 86 - __u64 offset; 87 87 - unsigned long phys = 88 88 - pte_val(*pte) & PAGE_MASK; 89 89 - int fd = phys_mapping(phys, &offset); 85 85 + __u64 offset; 86 86 + unsigned long phys = pte_val(*pte) & PAGE_MASK; 87 87 + int fd = phys_mapping(phys, &offset); 90 88 91 91 - ret = ops->mmap(ops->mm_idp, addr, 92 92 - PAGE_SIZE, prot, fd, 93 93 - offset); 94 94 - } 89 89 + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, 90 90 + prot, fd, offset); 95 91 } else 96 92 ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); 97 93 } else if (pte_newprot(*pte))

+6 -2

arch/um/os-Linux/file.c

reviewed

··· 528 528 ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, 529 529 void *data, size_t data_len) 530 530 { 531 531 - char buf[CMSG_SPACE(sizeof(*fds) * n_fds)]; 531 531 + #define MAX_RCV_FDS 2 532 532 + char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)]; 532 533 struct cmsghdr *cmsg; 533 534 struct iovec iov = { 534 535 .iov_base = data, ··· 539 538 .msg_iov = &iov, 540 539 .msg_iovlen = 1, 541 540 .msg_control = buf, 542 542 - .msg_controllen = sizeof(buf), 541 541 + .msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds), 543 542 }; 544 543 int n; 544 544 + 545 545 + if (n_fds > MAX_RCV_FDS) 546 546 + return -EINVAL; 545 547 546 548 n = recvmsg(fd, &msg, 0); 547 549 if (n < 0)

+1 -1

arch/um/os-Linux/skas/mem.c

reviewed

··· 78 78 { 79 79 struct stub_data *proc_data = (void *)mm_idp->stack; 80 80 int n, i; 81 81 - int err, pid = mm_idp->u.pid; 81 81 + int err, pid = mm_idp->pid; 82 82 83 83 n = ptrace_setregs(pid, syscall_regs); 84 84 if (n < 0) {

+1 -1

arch/um/os-Linux/skas/process.c

reviewed

··· 588 588 589 589 void __switch_mm(struct mm_id *mm_idp) 590 590 { 591 591 - userspace_pid[0] = mm_idp->u.pid; 591 591 + userspace_pid[0] = mm_idp->pid; 592 592 }

-1

arch/x86/um/sysrq_32.c

reviewed

··· 9 9 #include <linux/sched/debug.h> 10 10 #include <linux/kallsyms.h> 11 11 #include <asm/ptrace.h> 12 12 - #include <asm/sysrq.h> 13 12 14 13 /* This is declared by <linux/sched.h> */ 15 14 void show_regs(struct pt_regs *regs)

-1

arch/x86/um/sysrq_64.c

reviewed

··· 12 12 #include <linux/utsname.h> 13 13 #include <asm/current.h> 14 14 #include <asm/ptrace.h> 15 15 - #include <asm/sysrq.h> 16 15 17 16 void show_regs(struct pt_regs *regs) 18 17 {