Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

gendwarfksyms: Add a separate pass to resolve FQNs

Using dwarf_getscopes_die to resolve fully-qualified names turns out to
be rather slow, and also results in duplicate scopes being processed,
which doesn't help. Simply adding an extra pass to resolve names for all
DIEs before processing exports is noticeably faster.

For the object files with the most exports in a defconfig+Rust build,
the performance improvement is consistently >50%:

rust/bindings.o: 1038 exports
before: 9.5980 +- 0.0183 seconds time elapsed ( +- 0.19% )
after: 4.3116 +- 0.0287 seconds time elapsed ( +- 0.67% )

rust/core.o: 424 exports
before: 5.3584 +- 0.0204 seconds time elapsed ( +- 0.38% )
after: 0.05348 +- 0.00129 seconds time elapsed ( +- 2.42% )
^ Not a mistake.

net/core/dev.o: 190 exports
before: 9.0507 +- 0.0297 seconds time elapsed ( +- 0.33% )
after: 3.2882 +- 0.0165 seconds time elapsed ( +- 0.50% )

rust/kernel.o: 129 exports
before: 6.8571 +- 0.0317 seconds time elapsed ( +- 0.46% )
after: 2.9096 +- 0.0316 seconds time elapsed ( +- 1.09% )

net/core/skbuff.o: 120 exports
before: 5.4805 +- 0.0291 seconds time elapsed ( +- 0.53% )
after: 2.0339 +- 0.0231 seconds time elapsed ( +- 1.14% )

drivers/gpu/drm/display/drm_dp_helper.o: 101 exports
before: 1.7877 +- 0.0187 seconds time elapsed ( +- 1.05% )
after: 0.69245 +- 0.00994 seconds time elapsed ( +- 1.44% )

net/core/sock.o: 97 exports
before: 5.8327 +- 0.0653 seconds time elapsed ( +- 1.12% )
after: 2.0784 +- 0.0291 seconds time elapsed ( +- 1.40% )

drivers/net/phy/phy_device.o: 95 exports
before: 3.0671 +- 0.0371 seconds time elapsed ( +- 1.21% )
after: 1.2127 +- 0.0207 seconds time elapsed ( +- 1.70% )

drivers/pci/pci.o: 93 exports
before: 1.1130 +- 0.0113 seconds time elapsed ( +- 1.01% )
after: 0.4848 +- 0.0127 seconds time elapsed ( +- 2.63% )

kernel/sched/core.o: 83 exports
before: 3.5092 +- 0.0223 seconds time elapsed ( +- 0.64% )
after: 1.1231 +- 0.0145 seconds time elapsed ( +- 1.29% )

Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8%
faster with this patch applied on my test system. Without Rust, there's
still a 10.4% improvement in build time when gendwarfksyms is used.

Note that symbol versions are unchanged with this patch.

Suggested-by: Giuliano Procida <gprocida@google.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>

authored by

Sami Tolvanen and committed by
Masahiro Yamada
10e9510a 80e54e84

+87 -73
+1 -1
scripts/gendwarfksyms/die.c
··· 6 6 #include <string.h> 7 7 #include "gendwarfksyms.h" 8 8 9 - #define DIE_HASH_BITS 15 9 + #define DIE_HASH_BITS 16 10 10 11 11 /* {die->addr, state} -> struct die * */ 12 12 static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
+83 -71
scripts/gendwarfksyms/dwarf.c
··· 3 3 * Copyright (C) 2024 Google LLC 4 4 */ 5 5 6 + #define _GNU_SOURCE 6 7 #include <assert.h> 7 8 #include <inttypes.h> 8 9 #include <stdarg.h> ··· 194 193 va_end(args); 195 194 } 196 195 197 - #define MAX_FQN_SIZE 64 198 - 199 - /* Get a fully qualified name from DWARF scopes */ 200 - static char *get_fqn(Dwarf_Die *die) 201 - { 202 - const char *list[MAX_FQN_SIZE]; 203 - Dwarf_Die *scopes = NULL; 204 - bool has_name = false; 205 - char *fqn = NULL; 206 - char *p; 207 - int count = 0; 208 - int len = 0; 209 - int res; 210 - int i; 211 - 212 - res = checkp(dwarf_getscopes_die(die, &scopes)); 213 - if (!res) { 214 - list[count] = get_name_attr(die); 215 - 216 - if (!list[count]) 217 - return NULL; 218 - 219 - len += strlen(list[count]); 220 - count++; 221 - 222 - goto done; 223 - } 224 - 225 - for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { 226 - if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) 227 - continue; 228 - 229 - list[count] = get_name_attr(&scopes[i]); 230 - 231 - if (list[count]) { 232 - has_name = true; 233 - } else { 234 - list[count] = "<anonymous>"; 235 - has_name = false; 236 - } 237 - 238 - len += strlen(list[count]); 239 - count++; 240 - 241 - if (i > 0) { 242 - list[count++] = "::"; 243 - len += 2; 244 - } 245 - } 246 - 247 - free(scopes); 248 - 249 - if (count == MAX_FQN_SIZE) 250 - warn("increase MAX_FQN_SIZE: reached the maximum"); 251 - 252 - /* Consider the DIE unnamed if the last scope doesn't have a name */ 253 - if (!has_name) 254 - return NULL; 255 - done: 256 - fqn = xmalloc(len + 1); 257 - *fqn = '\0'; 258 - 259 - p = fqn; 260 - for (i = 0; i < count; i++) 261 - p = stpcpy(p, list[i]); 262 - 263 - return fqn; 264 - } 265 - 266 196 static void update_fqn(struct die *cache, Dwarf_Die *die) 267 197 { 268 - if (!cache->fqn) 269 - cache->fqn = get_fqn(die) ?: ""; 198 + struct die *fqn; 199 + 200 + if (!cache->fqn) { 201 + if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &fqn) && 202 + *fqn->fqn) 203 + cache->fqn = xstrdup(fqn->fqn); 204 + else 205 + cache->fqn = ""; 206 + } 270 207 } 271 208 272 209 static void process_fqn(struct die *cache, Dwarf_Die *die) ··· 1087 1148 cache_free(&state.expansion_cache); 1088 1149 } 1089 1150 1151 + static int resolve_fqns(struct state *parent, struct die *unused, 1152 + Dwarf_Die *die) 1153 + { 1154 + struct state state; 1155 + struct die *cache; 1156 + const char *name; 1157 + bool use_prefix; 1158 + char *prefix = NULL; 1159 + char *fqn = ""; 1160 + int tag; 1161 + 1162 + if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &cache)) 1163 + return 0; 1164 + 1165 + tag = dwarf_tag(die); 1166 + 1167 + /* 1168 + * Only namespaces and structures need to pass a prefix to the next 1169 + * scope. 1170 + */ 1171 + use_prefix = tag == DW_TAG_namespace || tag == DW_TAG_class_type || 1172 + tag == DW_TAG_structure_type; 1173 + 1174 + state.expand.current_fqn = NULL; 1175 + name = get_name_attr(die); 1176 + 1177 + if (parent && parent->expand.current_fqn && (use_prefix || name)) { 1178 + /* 1179 + * The fqn for the current DIE, and if needed, a prefix for the 1180 + * next scope. 1181 + */ 1182 + if (asprintf(&prefix, "%s::%s", parent->expand.current_fqn, 1183 + name ? name : "<anonymous>") < 0) 1184 + error("asprintf failed"); 1185 + 1186 + if (use_prefix) 1187 + state.expand.current_fqn = prefix; 1188 + 1189 + /* 1190 + * Use fqn only if the DIE has a name. Otherwise fqn will 1191 + * remain empty. 1192 + */ 1193 + if (name) { 1194 + fqn = prefix; 1195 + /* prefix will be freed by die_map. */ 1196 + prefix = NULL; 1197 + } 1198 + } else if (name) { 1199 + /* No prefix from the previous scope. Use only the name. */ 1200 + fqn = xstrdup(name); 1201 + 1202 + if (use_prefix) 1203 + state.expand.current_fqn = fqn; 1204 + } 1205 + 1206 + /* If the DIE has a non-empty name, cache it. */ 1207 + if (*fqn) { 1208 + cache = die_map_get(die, DIE_FQN); 1209 + /* Move ownership of fqn to die_map. */ 1210 + cache->fqn = fqn; 1211 + cache->state = DIE_FQN; 1212 + } 1213 + 1214 + check(process_die_container(&state, NULL, die, resolve_fqns, 1215 + match_all)); 1216 + 1217 + free(prefix); 1218 + return 0; 1219 + } 1220 + 1090 1221 void process_cu(Dwarf_Die *cudie) 1091 1222 { 1223 + check(process_die_container(NULL, NULL, cudie, resolve_fqns, 1224 + match_all)); 1225 + 1092 1226 check(process_die_container(NULL, NULL, cudie, process_exported_symbols, 1093 1227 match_all)); 1094 1228
+2
scripts/gendwarfksyms/gendwarfksyms.h
··· 139 139 140 140 enum die_state { 141 141 DIE_INCOMPLETE, 142 + DIE_FQN, 142 143 DIE_UNEXPANDED, 143 144 DIE_COMPLETE, 144 145 DIE_SYMBOL, ··· 171 170 { 172 171 switch (state) { 173 172 CASE_CONST_TO_STR(DIE_INCOMPLETE) 173 + CASE_CONST_TO_STR(DIE_FQN) 174 174 CASE_CONST_TO_STR(DIE_UNEXPANDED) 175 175 CASE_CONST_TO_STR(DIE_COMPLETE) 176 176 CASE_CONST_TO_STR(DIE_SYMBOL)
+1 -1
scripts/gendwarfksyms/types.c
··· 248 248 warn("found incomplete cache entry: %p", cache); 249 249 return NULL; 250 250 } 251 - if (cache->state == DIE_SYMBOL) 251 + if (cache->state == DIE_SYMBOL || cache->state == DIE_FQN) 252 252 return NULL; 253 253 if (!cache->fqn || !*cache->fqn) 254 254 return NULL;