Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: support to find RAS bad pages via old TA

Old version of RAS TA doesn't support to convert MCA address stored on
eeprom to physical address (PA), support to find all bad pages in one
memory row by PA with old RAS TA. This approach is only suitable for
nps1 mode.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Tao Zhou and committed by
Alex Deucher
07dd49e1 b02ef407

+25 -3
+25 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 2765 2765 struct ras_err_handler_data *data; 2766 2766 struct ras_err_data err_data; 2767 2767 struct eeprom_table_record *err_rec; 2768 + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; 2768 2769 int ret = 0; 2769 2770 uint32_t i, j, loop_cnt = 1; 2770 - bool is_mca_add = true; 2771 + bool is_mca_add = true, find_pages_per_pa = false; 2771 2772 2772 2773 if (!con || !con->eh_data || !bps || pages <= 0) 2773 2774 return 0; ··· 2798 2797 } 2799 2798 2800 2799 loop_cnt = adev->umc.retire_unit; 2800 + if (adev->gmc.gmc_funcs->query_mem_partition_mode) 2801 + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); 2801 2802 } 2802 2803 2803 2804 for (i = 0; i < pages; i++) { 2804 2805 if (is_mca_add) { 2805 - if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) 2806 - goto free; 2806 + if (!find_pages_per_pa) { 2807 + if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) { 2808 + if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) { 2809 + /* may use old RAS TA, use PA to find pages in 2810 + * one row 2811 + */ 2812 + if (amdgpu_umc_pages_in_a_row(adev, &err_data, 2813 + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) 2814 + goto free; 2815 + else 2816 + find_pages_per_pa = true; 2817 + } else { 2818 + /* unsupported cases */ 2819 + goto free; 2820 + } 2821 + } 2822 + } else { 2823 + if (amdgpu_umc_pages_in_a_row(adev, &err_data, 2824 + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) 2825 + goto free; 2826 + } 2807 2827 2808 2828 err_rec = err_data.err_addr; 2809 2829 } else {