Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amd/pm: correct mem_busy_percent display due to calculation errors

PMFW may return invalid values due to internal calculation errors.
so, the kmd driver must validate and sanitize the returned values to
prevent issues caused by firmware calculation errors.

For example, values 0xfffe (-2) and 0xffff (-1) are treated
as invalid and clamped to 0.

this applies to devices with CAB (Cache As Buffer) functionality.

Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4905
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Yang Wang and committed by
Alex Deucher
592713a8 95a599c8

+32 -15
+17
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
··· 2164 2164 smu_feature_list_clear_all(smu, SMU_FEATURE_LIST_ALLOWED); 2165 2165 } 2166 2166 2167 + /* 2168 + * smu_safe_u16_nn - Make u16 safe by filtering negative overflow errors 2169 + * @val: Input u16 value, may contain invalid negative overflows 2170 + * 2171 + * Convert u16 to non-negative value. Cast to s16 to detect negative values 2172 + * caused by calculation errors. Return 0 for negative errors, return 2173 + * original value if valid. 2174 + * 2175 + * Return: Valid u16 value or 0 2176 + */ 2177 + static inline u16 smu_safe_u16_nn(u16 val) 2178 + { 2179 + s16 tmp = (s16)val; 2180 + 2181 + return tmp < 0 ? 0 : val; 2182 + } 2183 + 2167 2184 #endif
+5 -5
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
··· 773 773 *value = metrics->AverageGfxclkFrequencyPreDs; 774 774 break; 775 775 case METRICS_AVERAGE_FCLK: 776 - if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD) 776 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD) 777 777 *value = metrics->AverageFclkFrequencyPostDs; 778 778 else 779 779 *value = metrics->AverageFclkFrequencyPreDs; 780 780 break; 781 781 case METRICS_AVERAGE_UCLK: 782 - if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD) 782 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD) 783 783 *value = metrics->AverageMemclkFrequencyPostDs; 784 784 else 785 785 *value = metrics->AverageMemclkFrequencyPreDs; ··· 800 800 *value = metrics->AverageGfxActivity; 801 801 break; 802 802 case METRICS_AVERAGE_MEMACTIVITY: 803 - *value = metrics->AverageUclkActivity; 803 + *value = smu_safe_u16_nn(metrics->AverageUclkActivity); 804 804 break; 805 805 case METRICS_AVERAGE_VCNACTIVITY: 806 806 *value = max(metrics->Vcn0ActivityPercentage, ··· 2085 2085 metrics->AvgTemperature[TEMP_VR_MEM1]); 2086 2086 2087 2087 gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; 2088 - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; 2088 + gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity); 2089 2089 gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, 2090 2090 metrics->Vcn1ActivityPercentage); 2091 2091 ··· 2102 2102 else 2103 2103 gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; 2104 2104 2105 - if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD) 2105 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD) 2106 2106 gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; 2107 2107 else 2108 2108 gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;
+5 -5
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 783 783 *value = metrics->AverageGfxclkFrequencyPreDs; 784 784 break; 785 785 case METRICS_AVERAGE_FCLK: 786 - if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD) 786 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD) 787 787 *value = metrics->AverageFclkFrequencyPostDs; 788 788 else 789 789 *value = metrics->AverageFclkFrequencyPreDs; 790 790 break; 791 791 case METRICS_AVERAGE_UCLK: 792 - if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD) 792 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD) 793 793 *value = metrics->AverageMemclkFrequencyPostDs; 794 794 else 795 795 *value = metrics->AverageMemclkFrequencyPreDs; ··· 814 814 *value = metrics->AverageGfxActivity; 815 815 break; 816 816 case METRICS_AVERAGE_MEMACTIVITY: 817 - *value = metrics->AverageUclkActivity; 817 + *value = smu_safe_u16_nn(metrics->AverageUclkActivity); 818 818 break; 819 819 case METRICS_AVERAGE_SOCKETPOWER: 820 820 *value = metrics->AverageSocketPower << 8; ··· 2091 2091 metrics->AvgTemperature[TEMP_VR_MEM1]); 2092 2092 2093 2093 gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; 2094 - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; 2094 + gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity); 2095 2095 gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, 2096 2096 metrics->Vcn1ActivityPercentage); 2097 2097 ··· 2104 2104 else 2105 2105 gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; 2106 2106 2107 - if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD) 2107 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD) 2108 2108 gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; 2109 2109 else 2110 2110 gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;
+5 -5
drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
··· 661 661 *value = metrics->AverageGfxclkFrequencyPreDs; 662 662 break; 663 663 case METRICS_AVERAGE_FCLK: 664 - if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) 664 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD) 665 665 *value = metrics->AverageFclkFrequencyPostDs; 666 666 else 667 667 *value = metrics->AverageFclkFrequencyPreDs; 668 668 break; 669 669 case METRICS_AVERAGE_UCLK: 670 - if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) 670 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD) 671 671 *value = metrics->AverageMemclkFrequencyPostDs; 672 672 else 673 673 *value = metrics->AverageMemclkFrequencyPreDs; ··· 688 688 *value = metrics->AverageGfxActivity; 689 689 break; 690 690 case METRICS_AVERAGE_MEMACTIVITY: 691 - *value = metrics->AverageUclkActivity; 691 + *value = smu_safe_u16_nn(metrics->AverageUclkActivity); 692 692 break; 693 693 case METRICS_AVERAGE_VCNACTIVITY: 694 694 *value = max(metrics->AverageVcn0ActivityPercentage, ··· 2147 2147 metrics->AvgTemperature[TEMP_VR_MEM1]); 2148 2148 2149 2149 gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; 2150 - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; 2150 + gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity); 2151 2151 gpu_metrics->average_mm_activity = max(metrics->AverageVcn0ActivityPercentage, 2152 2152 metrics->Vcn1ActivityPercentage); 2153 2153 ··· 2159 2159 else 2160 2160 gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; 2161 2161 2162 - if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) 2162 + if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD) 2163 2163 gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; 2164 2164 else 2165 2165 gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;