Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux

Pull turbostat tool fixes from Len Brown:
"Just one minor kernel dependency in this batch -- added a #define to
msr-index.h"

* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
tools/power turbostat: update version number to 4.7
tools/power turbostat: allow running without cpu0
tools/power turbostat: correctly decode of ENERGY_PERFORMANCE_BIAS
tools/power turbostat: enable turbostat to support Knights Landing (KNL)
tools/power turbostat: correctly display more than 2 threads/core

+185 -40
+1
arch/x86/include/uapi/asm/msr-index.h
··· 140 140 #define MSR_CORE_C3_RESIDENCY 0x000003fc 141 141 #define MSR_CORE_C6_RESIDENCY 0x000003fd 142 142 #define MSR_CORE_C7_RESIDENCY 0x000003fe 143 + #define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff 143 144 #define MSR_PKG_C2_RESIDENCY 0x0000060d 144 145 #define MSR_PKG_C8_RESIDENCY 0x00000630 145 146 #define MSR_PKG_C9_RESIDENCY 0x00000631
+184 -40
tools/power/x86/turbostat/turbostat.c
··· 52 52 unsigned int skip_c1; 53 53 unsigned int do_nhm_cstates; 54 54 unsigned int do_snb_cstates; 55 + unsigned int do_knl_cstates; 55 56 unsigned int do_pc2; 56 57 unsigned int do_pc3; 57 58 unsigned int do_pc6; ··· 92 91 unsigned int do_ring_perf_limit_reasons; 93 92 unsigned int crystal_hz; 94 93 unsigned long long tsc_hz; 94 + int base_cpu; 95 95 96 96 #define RAPL_PKG (1 << 0) 97 97 /* 0x610 MSR_PKG_POWER_LIMIT */ ··· 318 316 319 317 if (do_nhm_cstates) 320 318 outp += sprintf(outp, " CPU%%c1"); 321 - if (do_nhm_cstates && !do_slm_cstates) 319 + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) 322 320 outp += sprintf(outp, " CPU%%c3"); 323 321 if (do_nhm_cstates) 324 322 outp += sprintf(outp, " CPU%%c6"); ··· 548 546 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 549 547 goto done; 550 548 551 - if (do_nhm_cstates && !do_slm_cstates) 549 + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) 552 550 outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); 553 551 if (do_nhm_cstates) 554 552 outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); ··· 1020 1018 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1021 1019 return 0; 1022 1020 1023 - if (do_nhm_cstates && !do_slm_cstates) { 1021 + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { 1024 1022 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 1025 1023 return -6; 1026 1024 } 1027 1025 1028 - if (do_nhm_cstates) { 1026 + if (do_nhm_cstates && !do_knl_cstates) { 1029 1027 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 1028 + return -7; 1029 + } else if (do_knl_cstates) { 1030 + if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) 1030 1031 return -7; 1031 1032 } 1032 1033 ··· 1155 1150 unsigned long long msr; 1156 1151 unsigned int ratio; 1157 1152 1158 - get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); 1153 + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); 1159 1154 1160 1155 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); 1161 1156 ··· 1167 1162 fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", 1168 1163 ratio, bclk, ratio * bclk); 1169 1164 1170 - get_msr(0, MSR_IA32_POWER_CTL, &msr); 1165 + get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 1171 1166 fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 1172 1167 msr, msr & 0x2 ? "EN" : "DIS"); 1173 1168 ··· 1180 1175 unsigned long long msr; 1181 1176 unsigned int ratio; 1182 1177 1183 - get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr); 1178 + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 1184 1179 1185 1180 fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); 1186 1181 ··· 1202 1197 unsigned long long msr; 1203 1198 unsigned int ratio; 1204 1199 1205 - get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr); 1200 + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 1206 1201 1207 1202 fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); 1208 1203 ··· 1254 1249 unsigned long long msr; 1255 1250 unsigned int ratio; 1256 1251 1257 - get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr); 1252 + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 1258 1253 1259 1254 fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 1260 1255 ··· 1301 1296 } 1302 1297 1303 1298 static void 1299 + dump_knl_turbo_ratio_limits(void) 1300 + { 1301 + int cores; 1302 + unsigned int ratio; 1303 + unsigned long long msr; 1304 + int delta_cores; 1305 + int delta_ratio; 1306 + int i; 1307 + 1308 + get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1309 + 1310 + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", 1311 + msr); 1312 + 1313 + /** 1314 + * Turbo encoding in KNL is as follows: 1315 + * [7:0] -- Base value of number of active cores of bucket 1. 1316 + * [15:8] -- Base value of freq ratio of bucket 1. 1317 + * [20:16] -- +ve delta of number of active cores of bucket 2. 1318 + * i.e. active cores of bucket 2 = 1319 + * active cores of bucket 1 + delta 1320 + * [23:21] -- Negative delta of freq ratio of bucket 2. 1321 + * i.e. freq ratio of bucket 2 = 1322 + * freq ratio of bucket 1 - delta 1323 + * [28:24]-- +ve delta of number of active cores of bucket 3. 1324 + * [31:29]-- -ve delta of freq ratio of bucket 3. 1325 + * [36:32]-- +ve delta of number of active cores of bucket 4. 1326 + * [39:37]-- -ve delta of freq ratio of bucket 4. 1327 + * [44:40]-- +ve delta of number of active cores of bucket 5. 1328 + * [47:45]-- -ve delta of freq ratio of bucket 5. 1329 + * [52:48]-- +ve delta of number of active cores of bucket 6. 1330 + * [55:53]-- -ve delta of freq ratio of bucket 6. 1331 + * [60:56]-- +ve delta of number of active cores of bucket 7. 1332 + * [63:61]-- -ve delta of freq ratio of bucket 7. 1333 + */ 1334 + cores = msr & 0xFF; 1335 + ratio = (msr >> 8) && 0xFF; 1336 + if (ratio > 0) 1337 + fprintf(stderr, 1338 + "%d * %.0f = %.0f MHz max turbo %d active cores\n", 1339 + ratio, bclk, ratio * bclk, cores); 1340 + 1341 + for (i = 16; i < 64; i = i + 8) { 1342 + delta_cores = (msr >> i) & 0x1F; 1343 + delta_ratio = (msr >> (i + 5)) && 0x7; 1344 + if (!delta_cores || !delta_ratio) 1345 + return; 1346 + cores = cores + delta_cores; 1347 + ratio = ratio - delta_ratio; 1348 + 1349 + /** -ve ratios will make successive ratio calculations 1350 + * negative. Hence return instead of carrying on. 1351 + */ 1352 + if (ratio > 0) 1353 + fprintf(stderr, 1354 + "%d * %.0f = %.0f MHz max turbo %d active cores\n", 1355 + ratio, bclk, ratio * bclk, cores); 1356 + } 1357 + } 1358 + 1359 + static void 1304 1360 dump_nhm_cst_cfg(void) 1305 1361 { 1306 1362 unsigned long long msr; 1307 1363 1308 - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 1364 + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 1309 1365 1310 1366 #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 1311 1367 #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) ··· 1447 1381 } 1448 1382 1449 1383 /* 1450 - * cpu_is_first_sibling_in_core(cpu) 1451 - * return 1 if given CPU is 1st HT sibling in the core 1384 + * get_cpu_position_in_core(cpu) 1385 + * return the position of the CPU among its HT siblings in the core 1386 + * return -1 if the sibling is not in list 1452 1387 */ 1453 - int cpu_is_first_sibling_in_core(int cpu) 1388 + int get_cpu_position_in_core(int cpu) 1454 1389 { 1455 - return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 1390 + char path[64]; 1391 + FILE *filep; 1392 + int this_cpu; 1393 + char character; 1394 + int i; 1395 + 1396 + sprintf(path, 1397 + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", 1398 + cpu); 1399 + filep = fopen(path, "r"); 1400 + if (filep == NULL) { 1401 + perror(path); 1402 + exit(1); 1403 + } 1404 + 1405 + for (i = 0; i < topo.num_threads_per_core; i++) { 1406 + fscanf(filep, "%d", &this_cpu); 1407 + if (this_cpu == cpu) { 1408 + fclose(filep); 1409 + return i; 1410 + } 1411 + 1412 + /* Account for no separator after last thread*/ 1413 + if (i != (topo.num_threads_per_core - 1)) 1414 + fscanf(filep, "%c", &character); 1415 + } 1416 + 1417 + fclose(filep); 1418 + return -1; 1456 1419 } 1457 1420 1458 1421 /* ··· 1507 1412 { 1508 1413 char path[80]; 1509 1414 FILE *filep; 1510 - int sib1, sib2; 1511 - int matches; 1415 + int sib1; 1416 + int matches = 0; 1512 1417 char character; 1418 + char str[100]; 1419 + char *ch; 1513 1420 1514 1421 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 1515 1422 filep = fopen_or_die(path, "r"); 1423 + 1516 1424 /* 1517 1425 * file format: 1518 - * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) 1519 - * otherwinse 1 sibling (self). 1426 + * A ',' separated or '-' separated set of numbers 1427 + * (eg 1-2 or 1,3,4,5) 1520 1428 */ 1521 - matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); 1429 + fscanf(filep, "%d%c\n", &sib1, &character); 1430 + fseek(filep, 0, SEEK_SET); 1431 + fgets(str, 100, filep); 1432 + ch = strchr(str, character); 1433 + while (ch != NULL) { 1434 + matches++; 1435 + ch = strchr(ch+1, character); 1436 + } 1522 1437 1523 1438 fclose(filep); 1524 - 1525 - if (matches == 3) 1526 - return 2; 1527 - else 1528 - return 1; 1439 + return matches+1; 1529 1440 } 1530 1441 1531 1442 /* ··· 1695 1594 void check_dev_msr() 1696 1595 { 1697 1596 struct stat sb; 1597 + char pathname[32]; 1698 1598 1699 - if (stat("/dev/cpu/0/msr", &sb)) 1599 + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 1600 + if (stat(pathname, &sb)) 1700 1601 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 1701 1602 err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); 1702 1603 } ··· 1711 1608 cap_user_data_t cap_data = &cap_data_data; 1712 1609 extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); 1713 1610 int do_exit = 0; 1611 + char pathname[32]; 1714 1612 1715 1613 /* check for CAP_SYS_RAWIO */ 1716 1614 cap_header->pid = getpid(); ··· 1726 1622 } 1727 1623 1728 1624 /* test file permissions */ 1729 - if (euidaccess("/dev/cpu/0/msr", R_OK)) { 1625 + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 1626 + if (euidaccess(pathname, R_OK)) { 1730 1627 do_exit++; 1731 1628 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); 1732 1629 } ··· 1809 1704 default: 1810 1705 return 0; 1811 1706 } 1812 - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 1707 + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 1813 1708 1814 1709 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 1815 1710 ··· 1858 1753 } 1859 1754 } 1860 1755 1756 + int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) 1757 + { 1758 + if (!genuine_intel) 1759 + return 0; 1760 + 1761 + if (family != 6) 1762 + return 0; 1763 + 1764 + switch (model) { 1765 + case 0x57: /* Knights Landing */ 1766 + return 1; 1767 + default: 1768 + return 0; 1769 + } 1770 + } 1861 1771 static void 1862 1772 dump_cstate_pstate_config_info(family, model) 1863 1773 { ··· 1889 1769 1890 1770 if (has_nhm_turbo_ratio_limit(family, model)) 1891 1771 dump_nhm_turbo_ratio_limits(); 1772 + 1773 + if (has_knl_turbo_ratio_limit(family, model)) 1774 + dump_knl_turbo_ratio_limits(); 1892 1775 1893 1776 dump_nhm_cst_cfg(); 1894 1777 } ··· 1924 1801 if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) 1925 1802 return 0; 1926 1803 1927 - switch (msr & 0x7) { 1804 + switch (msr & 0xF) { 1928 1805 case ENERGY_PERF_BIAS_PERFORMANCE: 1929 1806 epb_string = "performance"; 1930 1807 break; ··· 2048 1925 unsigned long long msr; 2049 1926 2050 1927 if (do_rapl & RAPL_PKG_POWER_INFO) 2051 - if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) 1928 + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 2052 1929 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 2053 1930 2054 1931 switch (model) { ··· 2073 1950 case 0x3F: /* HSX */ 2074 1951 case 0x4F: /* BDX */ 2075 1952 case 0x56: /* BDX-DE */ 1953 + case 0x57: /* KNL */ 2076 1954 return (rapl_dram_energy_units = 15.3 / 1000000); 2077 1955 default: 2078 1956 return (rapl_energy_units); ··· 2115 1991 case 0x3F: /* HSX */ 2116 1992 case 0x4F: /* BDX */ 2117 1993 case 0x56: /* BDX-DE */ 1994 + case 0x57: /* KNL */ 2118 1995 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; 2119 1996 break; 2120 1997 case 0x2D: ··· 2131 2006 } 2132 2007 2133 2008 /* units on package 0, verify later other packages match */ 2134 - if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) 2009 + if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 2135 2010 return; 2136 2011 2137 2012 rapl_power_units = 1.0 / (1 << (msr & 0xF)); ··· 2456 2331 return 0; 2457 2332 } 2458 2333 2334 + int is_knl(unsigned int family, unsigned int model) 2335 + { 2336 + if (!genuine_intel) 2337 + return 0; 2338 + switch (model) { 2339 + case 0x57: /* KNL */ 2340 + return 1; 2341 + } 2342 + return 0; 2343 + } 2344 + 2459 2345 #define SLM_BCLK_FREQS 5 2460 2346 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; 2461 2347 ··· 2476 2340 unsigned int i; 2477 2341 double freq; 2478 2342 2479 - if (get_msr(0, MSR_FSB_FREQ, &msr)) 2343 + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 2480 2344 fprintf(stderr, "SLM BCLK: unknown\n"); 2481 2345 2482 2346 i = msr & 0xf; ··· 2544 2408 if (!do_nhm_platform_info) 2545 2409 goto guess; 2546 2410 2547 - if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) 2411 + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 2548 2412 goto guess; 2549 2413 2550 2414 target_c_local = (msr >> 16) & 0xFF; ··· 2677 2541 do_c8_c9_c10 = has_hsw_msrs(family, model); 2678 2542 do_skl_residency = has_skl_msrs(family, model); 2679 2543 do_slm_cstates = is_slm(family, model); 2544 + do_knl_cstates = is_knl(family, model); 2680 2545 bclk = discover_bclk(family, model); 2681 2546 2682 2547 rapl_probe(family, model); ··· 2892 2755 2893 2756 my_package_id = get_physical_package_id(cpu_id); 2894 2757 my_core_id = get_core_id(cpu_id); 2895 - 2896 - if (cpu_is_first_sibling_in_core(cpu_id)) { 2897 - my_thread_id = 0; 2758 + my_thread_id = get_cpu_position_in_core(cpu_id); 2759 + if (!my_thread_id) 2898 2760 topo.num_cores++; 2899 - } else { 2900 - my_thread_id = 1; 2901 - } 2902 2761 2903 2762 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 2904 2763 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); ··· 2918 2785 for_all_proc_cpus(initialize_counters); 2919 2786 } 2920 2787 2788 + void set_base_cpu(void) 2789 + { 2790 + base_cpu = sched_getcpu(); 2791 + if (base_cpu < 0) 2792 + err(-ENODEV, "No valid cpus found"); 2793 + 2794 + if (debug > 1) 2795 + fprintf(stderr, "base_cpu = %d\n", base_cpu); 2796 + } 2797 + 2921 2798 void turbostat_init() 2922 2799 { 2800 + setup_all_buffers(); 2801 + set_base_cpu(); 2923 2802 check_dev_msr(); 2924 2803 check_permissions(); 2925 2804 process_cpuid(); 2926 2805 2927 - setup_all_buffers(); 2928 2806 2929 2807 if (debug) 2930 2808 for_all_cpus(print_epb, ODD_COUNTERS); ··· 3014 2870 } 3015 2871 3016 2872 void print_version() { 3017 - fprintf(stderr, "turbostat version 4.5 2 Apr, 2015" 2873 + fprintf(stderr, "turbostat version 4.7 27-May, 2015" 3018 2874 " - Len Brown <lenb@kernel.org>\n"); 3019 2875 } 3020 2876