Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf powerpc: Process the DTL entries in queue and deliver samples

Create samples from DTL entries for displaying in 'perf report'
and 'perf script'.

When the different PERF_RECORD_XX records are processed from perf
session, powerpc_vpadtl_process_event() will be invoked.

For each of the PERF_RECORD_XX record, compare the timestamp of perf
record with timestamp of top element in the auxtrace heap.

Process the auxtrace queue if the timestamp of element from heap is
lower than timestamp from entry in perf record.

Sometimes it could happen that one buffer is only partially processed.

if the timestamp of occurrence of another event is more than currently
processed element in the queue, it will move on to next perf record.

So keep track of position of buffer to continue processing next time.

Update the timestamp of the auxtrace heap with the timestamp of last
processed entry from the auxtrace buffer.

Generate perf sample for each entry in the dispatch trace log.

Fill in the sample details:
- sample ip is picked from srr0 field of dtl_entry
- sample cpu is picked from processor_id of dtl_entry
- sample id is from sample_id of powerpc_vpadtl
- cpumode is set to PERF_RECORD_MISC_KERNEL
- Additionally save the details in raw_data of sample.

This is to print the relevant fields in perf_sample__fprintf_synth()
when called from builtin-script

The sample is processed by calling perf_session__deliver_synth_event()
so that it gets included in perf report.

Sample Output:

./perf record -a -e sched:*,vpa_dtl/dtl_all/ -c 1000000000 sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.300 MB perf.data ]

./perf report

# Samples: 321 of event 'vpa-dtl'
# Event count (approx.): 321
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..............................
#
100.00% 100.00% swapper [kernel.kallsyms] [k] plpar_hcall_norets_notrace

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
Tested-by: Tejas Manhas <tejas05@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Cc: Aboorva Devarajan <aboorvad@linux.ibm.com>
Cc: Aditya Bodkhe <Aditya.Bodkhe1@ibm.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Athira Rajeev and committed by
Arnaldo Carvalho de Melo
8644834a cd1c3b73

+175
+175
tools/perf/util/powerpc-vpadtl.c
··· 167 167 powerpc_vpadtl_dump(vpa, buf, len); 168 168 } 169 169 170 + /* 171 + * Generate perf sample for each entry in the dispatch trace log. 172 + * - sample ip is picked from srr0 field of powerpc_vpadtl_entry 173 + * - sample cpu is logical cpu. 174 + * - cpumode is set to PERF_RECORD_MISC_KERNEL 175 + * - Additionally save the details in raw_data of sample. This 176 + * is to print the relevant fields in perf_sample__fprintf_synth() 177 + * when called from builtin-script 178 + */ 179 + static int powerpc_vpadtl_sample(struct powerpc_vpadtl_entry *record, 180 + struct powerpc_vpadtl *vpa, u64 save, int cpu) 181 + { 182 + struct perf_sample sample; 183 + union perf_event event; 184 + 185 + sample.ip = be64_to_cpu(record->srr0); 186 + sample.period = 1; 187 + sample.cpu = cpu; 188 + sample.id = vpa->sample_id; 189 + sample.callchain = NULL; 190 + sample.branch_stack = NULL; 191 + memset(&event, 0, sizeof(event)); 192 + sample.cpumode = PERF_RECORD_MISC_KERNEL; 193 + sample.time = save; 194 + sample.raw_data = record; 195 + sample.raw_size = sizeof(record); 196 + event.sample.header.type = PERF_RECORD_SAMPLE; 197 + event.sample.header.misc = sample.cpumode; 198 + event.sample.header.size = sizeof(struct perf_event_header); 199 + 200 + if (perf_session__deliver_synth_event(vpa->session, &event, &sample)) { 201 + pr_debug("Failed to create sample for dtl entry\n"); 202 + return -1; 203 + } 204 + 205 + return 0; 206 + } 207 + 170 208 static int powerpc_vpadtl_get_buffer(struct powerpc_vpadtl_queue *vpaq) 171 209 { 172 210 struct auxtrace_buffer *buffer = vpaq->buffer; ··· 276 238 vpaq->buf_len = 0; 277 239 278 240 return 1; 241 + } 242 + 243 + static int powerpc_vpadtl_decode_all(struct powerpc_vpadtl_queue *vpaq) 244 + { 245 + int ret; 246 + unsigned char *buf; 247 + 248 + if (!vpaq->buf_len || vpaq->pkt_len == vpaq->size) { 249 + ret = powerpc_vpadtl_get_buffer(vpaq); 250 + if (ret <= 0) 251 + return ret; 252 + } 253 + 254 + if (vpaq->buffer) { 255 + buf = vpaq->buffer->data; 256 + buf += vpaq->pkt_len; 257 + vpaq->dtl = (struct powerpc_vpadtl_entry *)buf; 258 + if ((long long)be64_to_cpu(vpaq->dtl->timebase) <= 0) { 259 + if (vpaq->pkt_len != dtl_entry_size && vpaq->buf_len) { 260 + vpaq->pkt_len += dtl_entry_size; 261 + vpaq->buf_len -= dtl_entry_size; 262 + } 263 + return -1; 264 + } 265 + vpaq->pkt_len += dtl_entry_size; 266 + vpaq->buf_len -= dtl_entry_size; 267 + } else { 268 + return 0; 269 + } 270 + 271 + return 1; 272 + } 273 + 274 + static int powerpc_vpadtl_run_decoder(struct powerpc_vpadtl_queue *vpaq, u64 *timestamp) 275 + { 276 + struct powerpc_vpadtl *vpa = vpaq->vpa; 277 + struct powerpc_vpadtl_entry *record; 278 + int ret; 279 + unsigned long long vpaq_timestamp; 280 + 281 + while (1) { 282 + ret = powerpc_vpadtl_decode_all(vpaq); 283 + if (!ret) { 284 + pr_debug("All data in the queue has been processed.\n"); 285 + return 1; 286 + } 287 + 288 + /* 289 + * Error is detected when decoding VPA PMU trace. Continue to 290 + * the next trace data and find out more dtl entries. 291 + */ 292 + if (ret < 0) 293 + continue; 294 + 295 + record = vpaq->dtl; 296 + 297 + vpaq_timestamp = powerpc_vpadtl_timestamp(vpaq); 298 + 299 + /* Update timestamp for the last record */ 300 + if (vpaq_timestamp > vpaq->timestamp) 301 + vpaq->timestamp = vpaq_timestamp; 302 + 303 + /* 304 + * If the timestamp of the queue is later than timestamp of the 305 + * coming perf event, bail out so can allow the perf event to 306 + * be processed ahead. 307 + */ 308 + if (vpaq->timestamp >= *timestamp) { 309 + *timestamp = vpaq->timestamp; 310 + vpaq->pkt_len -= dtl_entry_size; 311 + vpaq->buf_len += dtl_entry_size; 312 + return 0; 313 + } 314 + 315 + ret = powerpc_vpadtl_sample(record, vpa, vpaq_timestamp, vpaq->cpu); 316 + if (ret) 317 + continue; 318 + } 319 + return 0; 320 + } 321 + 322 + /* 323 + * For each of the PERF_RECORD_XX record, compare the timestamp 324 + * of perf record with timestamp of top element in the auxtrace heap. 325 + * Process the auxtrace queue if the timestamp of element from heap is 326 + * lower than timestamp from entry in perf record. 327 + * 328 + * Update the timestamp of the auxtrace heap with the timestamp 329 + * of last processed entry from the auxtrace buffer. 330 + */ 331 + static int powerpc_vpadtl_process_queues(struct powerpc_vpadtl *vpa, u64 timestamp) 332 + { 333 + unsigned int queue_nr; 334 + u64 ts; 335 + int ret; 336 + 337 + while (1) { 338 + struct auxtrace_queue *queue; 339 + struct powerpc_vpadtl_queue *vpaq; 340 + 341 + if (!vpa->heap.heap_cnt) 342 + return 0; 343 + 344 + if (vpa->heap.heap_array[0].ordinal >= timestamp) 345 + return 0; 346 + 347 + queue_nr = vpa->heap.heap_array[0].queue_nr; 348 + queue = &vpa->queues.queue_array[queue_nr]; 349 + vpaq = queue->priv; 350 + 351 + auxtrace_heap__pop(&vpa->heap); 352 + 353 + if (vpa->heap.heap_cnt) { 354 + ts = vpa->heap.heap_array[0].ordinal + 1; 355 + if (ts > timestamp) 356 + ts = timestamp; 357 + } else { 358 + ts = timestamp; 359 + } 360 + 361 + ret = powerpc_vpadtl_run_decoder(vpaq, &ts); 362 + if (ret < 0) { 363 + auxtrace_heap__add(&vpa->heap, queue_nr, ts); 364 + return ret; 365 + } 366 + 367 + if (!ret) { 368 + ret = auxtrace_heap__add(&vpa->heap, queue_nr, ts); 369 + if (ret < 0) 370 + return ret; 371 + } else { 372 + vpaq->on_heap = false; 373 + } 374 + } 375 + return 0; 279 376 } 280 377 281 378 static struct powerpc_vpadtl_queue *powerpc_vpadtl__alloc_queue(struct powerpc_vpadtl *vpa, ··· 526 353 err = powerpc_vpadtl__update_queues(vpa); 527 354 if (err) 528 355 return err; 356 + 357 + err = powerpc_vpadtl_process_queues(vpa, sample->time); 529 358 } 530 359 531 360 return err;