Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'cxl-fixes-for-5.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull CXL memory class fixes from Dan Williams:
"A collection of fixes for the CXL memory class driver introduced in
this release cycle.

The driver was primarily developed on a work-in-progress QEMU
emulation of the interface and we have since found a couple places
where it hid spec compliance bugs in the driver, or had a spec
implementation bug itself.

The biggest change here is replacing a percpu_ref with an rwsem to
cleanup a couple bugs in the error unwind path during ioctl device
init. Lastly there were some minor cleanups to not export the
power-management sysfs-ABI for the ioctl device, use the proper sysfs
helper for emitting values, and prevent subtle bugs as new
administration commands are added to the supported list.

The bulk of it has appeared in -next save for the top commit which was
found today and validated on a fixed-up QEMU model.

Summary:

- Fix support for CXL memory devices with registers offset from the
BAR base.

- Fix the reporting of device capacity.

- Fix the driver commands list definition to be disconnected from the
UAPI command list.

- Replace percpu_ref with rwsem to fix initialization error path.

- Fix leaks in the driver initialization error path.

- Drop the power/ directory from CXL device sysfs.

- Use the recommended sysfs helper for attribute 'show'
implementations"

* tag 'cxl-fixes-for-5.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
cxl/mem: Fix memory device capacity probing
cxl/mem: Fix register block offset calculation
cxl/mem: Force array size of mem_commands[] to CXL_MEM_COMMAND_ID_MAX
cxl/mem: Disable cxl device power management
cxl/mem: Do not rely on device_add() side effects for dev_set_name() failures
cxl/mem: Fix synchronization mechanism for device removal vs ioctl operations
cxl/mem: Use sysfs_emit() for attribute show routines

+91 -65
+91 -65
drivers/cxl/mem.c
··· 4 4 #include <linux/security.h> 5 5 #include <linux/debugfs.h> 6 6 #include <linux/module.h> 7 + #include <linux/sizes.h> 7 8 #include <linux/mutex.h> 8 9 #include <linux/cdev.h> 9 10 #include <linux/idr.h> ··· 97 96 * @dev: driver core device object 98 97 * @cdev: char dev core object for ioctl operations 99 98 * @cxlm: pointer to the parent device driver data 100 - * @ops_active: active user of @cxlm in ops handlers 101 - * @ops_dead: completion when all @cxlm ops users have exited 102 99 * @id: id number of this memdev instance. 103 100 */ 104 101 struct cxl_memdev { 105 102 struct device dev; 106 103 struct cdev cdev; 107 104 struct cxl_mem *cxlm; 108 - struct percpu_ref ops_active; 109 - struct completion ops_dead; 110 105 int id; 111 106 }; 112 107 113 108 static int cxl_mem_major; 114 109 static DEFINE_IDA(cxl_memdev_ida); 110 + static DECLARE_RWSEM(cxl_memdev_rwsem); 115 111 static struct dentry *cxl_debugfs; 116 112 static bool cxl_raw_allow_all; 117 113 ··· 167 169 * table will be validated against the user's input. For example, if size_in is 168 170 * 0, and the user passed in 1, it is an error. 169 171 */ 170 - static struct cxl_mem_command mem_commands[] = { 172 + static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = { 171 173 CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), 172 174 #ifdef CONFIG_CXL_MEM_RAW_COMMANDS 173 175 CXL_CMD(RAW, ~0, ~0, 0), ··· 774 776 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, 775 777 unsigned long arg) 776 778 { 777 - struct cxl_memdev *cxlmd; 778 - struct inode *inode; 779 - int rc = -ENOTTY; 779 + struct cxl_memdev *cxlmd = file->private_data; 780 + int rc = -ENXIO; 780 781 781 - inode = file_inode(file); 782 - cxlmd = container_of(inode->i_cdev, typeof(*cxlmd), cdev); 783 - 784 - if (!percpu_ref_tryget_live(&cxlmd->ops_active)) 785 - return -ENXIO; 786 - 787 - rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 788 - 789 - percpu_ref_put(&cxlmd->ops_active); 782 + down_read(&cxl_memdev_rwsem); 783 + if (cxlmd->cxlm) 784 + rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 785 + up_read(&cxl_memdev_rwsem); 790 786 791 787 return rc; 788 + } 789 + 790 + static int cxl_memdev_open(struct inode *inode, struct file *file) 791 + { 792 + struct cxl_memdev *cxlmd = 793 + container_of(inode->i_cdev, typeof(*cxlmd), cdev); 794 + 795 + get_device(&cxlmd->dev); 796 + file->private_data = cxlmd; 797 + 798 + return 0; 799 + } 800 + 801 + static int cxl_memdev_release_file(struct inode *inode, struct file *file) 802 + { 803 + struct cxl_memdev *cxlmd = 804 + container_of(inode->i_cdev, typeof(*cxlmd), cdev); 805 + 806 + put_device(&cxlmd->dev); 807 + 808 + return 0; 792 809 } 793 810 794 811 static const struct file_operations cxl_memdev_fops = { 795 812 .owner = THIS_MODULE, 796 813 .unlocked_ioctl = cxl_memdev_ioctl, 814 + .open = cxl_memdev_open, 815 + .release = cxl_memdev_release_file, 797 816 .compat_ioctl = compat_ptr_ioctl, 798 817 .llseek = noop_llseek, 799 818 }; ··· 999 984 return NULL; 1000 985 } 1001 986 1002 - offset = ((u64)reg_hi << 32) | FIELD_GET(CXL_REGLOC_ADDR_MASK, reg_lo); 987 + offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); 1003 988 bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); 1004 989 1005 990 /* Basic sanity check that BAR is big enough */ ··· 1064 1049 { 1065 1050 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 1066 1051 1067 - percpu_ref_exit(&cxlmd->ops_active); 1068 1052 ida_free(&cxl_memdev_ida, cxlmd->id); 1069 1053 kfree(cxlmd); 1070 1054 } ··· 1080 1066 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 1081 1067 struct cxl_mem *cxlm = cxlmd->cxlm; 1082 1068 1083 - return sprintf(buf, "%.16s\n", cxlm->firmware_version); 1069 + return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version); 1084 1070 } 1085 1071 static DEVICE_ATTR_RO(firmware_version); 1086 1072 ··· 1090 1076 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 1091 1077 struct cxl_mem *cxlm = cxlmd->cxlm; 1092 1078 1093 - return sprintf(buf, "%zu\n", cxlm->payload_size); 1079 + return sysfs_emit(buf, "%zu\n", cxlm->payload_size); 1094 1080 } 1095 1081 static DEVICE_ATTR_RO(payload_max); 1096 1082 ··· 1101 1087 struct cxl_mem *cxlm = cxlmd->cxlm; 1102 1088 unsigned long long len = range_len(&cxlm->ram_range); 1103 1089 1104 - return sprintf(buf, "%#llx\n", len); 1090 + return sysfs_emit(buf, "%#llx\n", len); 1105 1091 } 1106 1092 1107 1093 static struct device_attribute dev_attr_ram_size = ··· 1114 1100 struct cxl_mem *cxlm = cxlmd->cxlm; 1115 1101 unsigned long long len = range_len(&cxlm->pmem_range); 1116 1102 1117 - return sprintf(buf, "%#llx\n", len); 1103 + return sysfs_emit(buf, "%#llx\n", len); 1118 1104 } 1119 1105 1120 1106 static struct device_attribute dev_attr_pmem_size = ··· 1164 1150 .groups = cxl_memdev_attribute_groups, 1165 1151 }; 1166 1152 1167 - static void cxlmdev_unregister(void *_cxlmd) 1153 + static void cxl_memdev_shutdown(struct cxl_memdev *cxlmd) 1154 + { 1155 + down_write(&cxl_memdev_rwsem); 1156 + cxlmd->cxlm = NULL; 1157 + up_write(&cxl_memdev_rwsem); 1158 + } 1159 + 1160 + static void cxl_memdev_unregister(void *_cxlmd) 1168 1161 { 1169 1162 struct cxl_memdev *cxlmd = _cxlmd; 1170 1163 struct device *dev = &cxlmd->dev; 1171 1164 1172 - percpu_ref_kill(&cxlmd->ops_active); 1173 1165 cdev_device_del(&cxlmd->cdev, dev); 1174 - wait_for_completion(&cxlmd->ops_dead); 1175 - cxlmd->cxlm = NULL; 1166 + cxl_memdev_shutdown(cxlmd); 1176 1167 put_device(dev); 1177 1168 } 1178 1169 1179 - static void cxlmdev_ops_active_release(struct percpu_ref *ref) 1180 - { 1181 - struct cxl_memdev *cxlmd = 1182 - container_of(ref, typeof(*cxlmd), ops_active); 1183 - 1184 - complete(&cxlmd->ops_dead); 1185 - } 1186 - 1187 - static int cxl_mem_add_memdev(struct cxl_mem *cxlm) 1170 + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm) 1188 1171 { 1189 1172 struct pci_dev *pdev = cxlm->pdev; 1190 1173 struct cxl_memdev *cxlmd; ··· 1191 1180 1192 1181 cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); 1193 1182 if (!cxlmd) 1194 - return -ENOMEM; 1195 - init_completion(&cxlmd->ops_dead); 1196 - 1197 - /* 1198 - * @cxlm is deallocated when the driver unbinds so operations 1199 - * that are using it need to hold a live reference. 1200 - */ 1201 - cxlmd->cxlm = cxlm; 1202 - rc = percpu_ref_init(&cxlmd->ops_active, cxlmdev_ops_active_release, 0, 1203 - GFP_KERNEL); 1204 - if (rc) 1205 - goto err_ref; 1183 + return ERR_PTR(-ENOMEM); 1206 1184 1207 1185 rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); 1208 1186 if (rc < 0) 1209 - goto err_id; 1187 + goto err; 1210 1188 cxlmd->id = rc; 1211 1189 1212 1190 dev = &cxlmd->dev; ··· 1204 1204 dev->bus = &cxl_bus_type; 1205 1205 dev->devt = MKDEV(cxl_mem_major, cxlmd->id); 1206 1206 dev->type = &cxl_memdev_type; 1207 - dev_set_name(dev, "mem%d", cxlmd->id); 1207 + device_set_pm_not_required(dev); 1208 1208 1209 1209 cdev = &cxlmd->cdev; 1210 1210 cdev_init(cdev, &cxl_memdev_fops); 1211 + return cxlmd; 1211 1212 1213 + err: 1214 + kfree(cxlmd); 1215 + return ERR_PTR(rc); 1216 + } 1217 + 1218 + static int cxl_mem_add_memdev(struct cxl_mem *cxlm) 1219 + { 1220 + struct cxl_memdev *cxlmd; 1221 + struct device *dev; 1222 + struct cdev *cdev; 1223 + int rc; 1224 + 1225 + cxlmd = cxl_memdev_alloc(cxlm); 1226 + if (IS_ERR(cxlmd)) 1227 + return PTR_ERR(cxlmd); 1228 + 1229 + dev = &cxlmd->dev; 1230 + rc = dev_set_name(dev, "mem%d", cxlmd->id); 1231 + if (rc) 1232 + goto err; 1233 + 1234 + /* 1235 + * Activate ioctl operations, no cxl_memdev_rwsem manipulation 1236 + * needed as this is ordered with cdev_add() publishing the device. 1237 + */ 1238 + cxlmd->cxlm = cxlm; 1239 + 1240 + cdev = &cxlmd->cdev; 1212 1241 rc = cdev_device_add(cdev, dev); 1213 1242 if (rc) 1214 - goto err_add; 1243 + goto err; 1215 1244 1216 - return devm_add_action_or_reset(dev->parent, cxlmdev_unregister, cxlmd); 1245 + return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister, 1246 + cxlmd); 1217 1247 1218 - err_add: 1219 - ida_free(&cxl_memdev_ida, cxlmd->id); 1220 - err_id: 1248 + err: 1221 1249 /* 1222 - * Theoretically userspace could have already entered the fops, 1223 - * so flush ops_active. 1250 + * The cdev was briefly live, shutdown any ioctl operations that 1251 + * saw that state. 1224 1252 */ 1225 - percpu_ref_kill(&cxlmd->ops_active); 1226 - wait_for_completion(&cxlmd->ops_dead); 1227 - percpu_ref_exit(&cxlmd->ops_active); 1228 - err_ref: 1229 - kfree(cxlmd); 1230 - 1253 + cxl_memdev_shutdown(cxlmd); 1254 + put_device(dev); 1231 1255 return rc; 1232 1256 } 1233 1257 ··· 1420 1396 */ 1421 1397 static int cxl_mem_identify(struct cxl_mem *cxlm) 1422 1398 { 1399 + /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ 1423 1400 struct cxl_mbox_identify { 1424 1401 char fw_revision[0x10]; 1425 1402 __le64 total_capacity; ··· 1449 1424 * For now, only the capacity is exported in sysfs 1450 1425 */ 1451 1426 cxlm->ram_range.start = 0; 1452 - cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) - 1; 1427 + cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1; 1453 1428 1454 1429 cxlm->pmem_range.start = 0; 1455 - cxlm->pmem_range.end = le64_to_cpu(id.persistent_capacity) - 1; 1430 + cxlm->pmem_range.end = 1431 + le64_to_cpu(id.persistent_capacity) * SZ_256M - 1; 1456 1432 1457 1433 memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision)); 1458 1434