Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/radeon: add IB and fence dispatch functions for CIK gfx (v7)

For gfx ring only. Compute is still todo.

v2: add documentation
v3: update to latest reset changes, integrate emit update patch.
v4: fix count on wait_reg_mem for HDP flush
v5: use old hdp flush method for fence
v6: set valid bit for IB
v7: cleanup for release

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+251 -3
+134
drivers/gpu/drm/radeon/cik.c
··· 1493 1493 } 1494 1494 1495 1495 /* 1496 + * GPU scratch registers helpers function. 1497 + */ 1498 + /** 1499 + * cik_scratch_init - setup driver info for CP scratch regs 1500 + * 1501 + * @rdev: radeon_device pointer 1502 + * 1503 + * Set up the number and offset of the CP scratch registers. 1504 + * NOTE: use of CP scratch registers is a legacy inferface and 1505 + * is not used by default on newer asics (r6xx+). On newer asics, 1506 + * memory buffers are used for fences rather than scratch regs. 1507 + */ 1508 + static void cik_scratch_init(struct radeon_device *rdev) 1509 + { 1510 + int i; 1511 + 1512 + rdev->scratch.num_reg = 7; 1513 + rdev->scratch.reg_base = SCRATCH_REG0; 1514 + for (i = 0; i < rdev->scratch.num_reg; i++) { 1515 + rdev->scratch.free[i] = true; 1516 + rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 1517 + } 1518 + } 1519 + 1520 + /** 1521 + * cik_fence_ring_emit - emit a fence on the gfx ring 1522 + * 1523 + * @rdev: radeon_device pointer 1524 + * @fence: radeon fence object 1525 + * 1526 + * Emits a fence sequnce number on the gfx ring and flushes 1527 + * GPU caches. 1528 + */ 1529 + void cik_fence_ring_emit(struct radeon_device *rdev, 1530 + struct radeon_fence *fence) 1531 + { 1532 + struct radeon_ring *ring = &rdev->ring[fence->ring]; 1533 + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 1534 + 1535 + /* EVENT_WRITE_EOP - flush caches, send int */ 1536 + radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 1537 + radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 1538 + EOP_TC_ACTION_EN | 1539 + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 1540 + EVENT_INDEX(5))); 1541 + radeon_ring_write(ring, addr & 0xfffffffc); 1542 + radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 1543 + radeon_ring_write(ring, fence->seq); 1544 + radeon_ring_write(ring, 0); 1545 + /* HDP flush */ 1546 + /* We should be using the new WAIT_REG_MEM special op packet here 1547 + * but it causes the CP to hang 1548 + */ 1549 + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1550 + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 1551 + WRITE_DATA_DST_SEL(0))); 1552 + radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 1553 + radeon_ring_write(ring, 0); 1554 + radeon_ring_write(ring, 0); 1555 + } 1556 + 1557 + void cik_semaphore_ring_emit(struct radeon_device *rdev, 1558 + struct radeon_ring *ring, 1559 + struct radeon_semaphore *semaphore, 1560 + bool emit_wait) 1561 + { 1562 + uint64_t addr = semaphore->gpu_addr; 1563 + unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 1564 + 1565 + radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 1566 + radeon_ring_write(ring, addr & 0xffffffff); 1567 + radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 1568 + } 1569 + 1570 + /* 1571 + * IB stuff 1572 + */ 1573 + /** 1574 + * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 1575 + * 1576 + * @rdev: radeon_device pointer 1577 + * @ib: radeon indirect buffer object 1578 + * 1579 + * Emits an DE (drawing engine) or CE (constant engine) IB 1580 + * on the gfx ring. IBs are usually generated by userspace 1581 + * acceleration drivers and submitted to the kernel for 1582 + * sheduling on the ring. This function schedules the IB 1583 + * on the gfx ring for execution by the GPU. 1584 + */ 1585 + void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 1586 + { 1587 + struct radeon_ring *ring = &rdev->ring[ib->ring]; 1588 + u32 header, control = INDIRECT_BUFFER_VALID; 1589 + 1590 + if (ib->is_const_ib) { 1591 + /* set switch buffer packet before const IB */ 1592 + radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 1593 + radeon_ring_write(ring, 0); 1594 + 1595 + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 1596 + } else { 1597 + u32 next_rptr; 1598 + if (ring->rptr_save_reg) { 1599 + next_rptr = ring->wptr + 3 + 4; 1600 + radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1601 + radeon_ring_write(ring, ((ring->rptr_save_reg - 1602 + PACKET3_SET_UCONFIG_REG_START) >> 2)); 1603 + radeon_ring_write(ring, next_rptr); 1604 + } else if (rdev->wb.enabled) { 1605 + next_rptr = ring->wptr + 5 + 4; 1606 + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1607 + radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 1608 + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 1609 + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 1610 + radeon_ring_write(ring, next_rptr); 1611 + } 1612 + 1613 + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 1614 + } 1615 + 1616 + control |= ib->length_dw | 1617 + (ib->vm ? (ib->vm->id << 24) : 0); 1618 + 1619 + radeon_ring_write(ring, header); 1620 + radeon_ring_write(ring, 1621 + #ifdef __BIG_ENDIAN 1622 + (2 << 0) | 1623 + #endif 1624 + (ib->gpu_addr & 0xFFFFFFFC)); 1625 + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 1626 + radeon_ring_write(ring, control); 1627 + } 1628 + 1629 + /* 1496 1630 * CP. 1497 1631 * On CIK, gfx and compute now have independant command processors. 1498 1632 *
+117 -3
drivers/gpu/drm/radeon/cikd.h
··· 188 188 189 189 #define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 190 190 191 + #define GPU_HDP_FLUSH_REQ 0x54DC 192 + #define GPU_HDP_FLUSH_DONE 0x54E0 193 + #define CP0 (1 << 0) 194 + #define CP1 (1 << 1) 195 + #define CP2 (1 << 2) 196 + #define CP3 (1 << 3) 197 + #define CP4 (1 << 4) 198 + #define CP5 (1 << 5) 199 + #define CP6 (1 << 6) 200 + #define CP7 (1 << 7) 201 + #define CP8 (1 << 8) 202 + #define CP9 (1 << 9) 203 + #define SDMA0 (1 << 10) 204 + #define SDMA1 (1 << 11) 205 + 191 206 #define GRBM_CNTL 0x8000 192 207 #define GRBM_READ_TIMEOUT(x) ((x) << 0) 193 208 ··· 507 492 # define RASTER_CONFIG_RB_MAP_2 2 508 493 # define RASTER_CONFIG_RB_MAP_3 3 509 494 495 + #define VGT_EVENT_INITIATOR 0x28a90 496 + # define SAMPLE_STREAMOUTSTATS1 (1 << 0) 497 + # define SAMPLE_STREAMOUTSTATS2 (2 << 0) 498 + # define SAMPLE_STREAMOUTSTATS3 (3 << 0) 499 + # define CACHE_FLUSH_TS (4 << 0) 500 + # define CACHE_FLUSH (6 << 0) 501 + # define CS_PARTIAL_FLUSH (7 << 0) 502 + # define VGT_STREAMOUT_RESET (10 << 0) 503 + # define END_OF_PIPE_INCR_DE (11 << 0) 504 + # define END_OF_PIPE_IB_END (12 << 0) 505 + # define RST_PIX_CNT (13 << 0) 506 + # define VS_PARTIAL_FLUSH (15 << 0) 507 + # define PS_PARTIAL_FLUSH (16 << 0) 508 + # define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0) 509 + # define ZPASS_DONE (21 << 0) 510 + # define CACHE_FLUSH_AND_INV_EVENT (22 << 0) 511 + # define PERFCOUNTER_START (23 << 0) 512 + # define PERFCOUNTER_STOP (24 << 0) 513 + # define PIPELINESTAT_START (25 << 0) 514 + # define PIPELINESTAT_STOP (26 << 0) 515 + # define PERFCOUNTER_SAMPLE (27 << 0) 516 + # define SAMPLE_PIPELINESTAT (30 << 0) 517 + # define SO_VGT_STREAMOUT_FLUSH (31 << 0) 518 + # define SAMPLE_STREAMOUTSTATS (32 << 0) 519 + # define RESET_VTX_CNT (33 << 0) 520 + # define VGT_FLUSH (36 << 0) 521 + # define BOTTOM_OF_PIPE_TS (40 << 0) 522 + # define DB_CACHE_FLUSH_AND_INV (42 << 0) 523 + # define FLUSH_AND_INV_DB_DATA_TS (43 << 0) 524 + # define FLUSH_AND_INV_DB_META (44 << 0) 525 + # define FLUSH_AND_INV_CB_DATA_TS (45 << 0) 526 + # define FLUSH_AND_INV_CB_META (46 << 0) 527 + # define CS_DONE (47 << 0) 528 + # define PS_DONE (48 << 0) 529 + # define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0) 530 + # define THREAD_TRACE_START (51 << 0) 531 + # define THREAD_TRACE_STOP (52 << 0) 532 + # define THREAD_TRACE_FLUSH (54 << 0) 533 + # define THREAD_TRACE_FINISH (55 << 0) 534 + # define PIXEL_PIPE_STAT_CONTROL (56 << 0) 535 + # define PIXEL_PIPE_STAT_DUMP (57 << 0) 536 + # define PIXEL_PIPE_STAT_RESET (58 << 0) 537 + 510 538 #define SCRATCH_REG0 0x30100 511 539 #define SCRATCH_REG1 0x30104 512 540 #define SCRATCH_REG2 0x30108 ··· 565 507 #define CP_SEM_WAIT_TIMER 0x301BC 566 508 567 509 #define CP_SEM_INCOMPLETE_TIMER_CNTL 0x301C8 510 + 511 + #define CP_WAIT_REG_MEM_TIMEOUT 0x301D0 568 512 569 513 #define GRBM_GFX_INDEX 0x30800 570 514 #define INSTANCE_INDEX(x) ((x) << 0) ··· 657 597 #define PACKET3_DRAW_INDEX_OFFSET_2 0x35 658 598 #define PACKET3_DRAW_PREAMBLE 0x36 659 599 #define PACKET3_WRITE_DATA 0x37 600 + #define WRITE_DATA_DST_SEL(x) ((x) << 8) 601 + /* 0 - register 602 + * 1 - memory (sync - via GRBM) 603 + * 2 - gl2 604 + * 3 - gds 605 + * 4 - reserved 606 + * 5 - memory (async - direct) 607 + */ 608 + #define WR_ONE_ADDR (1 << 16) 609 + #define WR_CONFIRM (1 << 20) 610 + #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 611 + /* 0 - LRU 612 + * 1 - Stream 613 + */ 614 + #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 615 + /* 0 - me 616 + * 1 - pfp 617 + * 2 - ce 618 + */ 660 619 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 661 620 #define PACKET3_MEM_SEMAPHORE 0x39 621 + # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) 622 + # define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ 623 + # define PACKET3_SEM_CLIENT_CODE ((x) << 24) /* 0 = CP, 1 = CB, 2 = DB */ 624 + # define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) 625 + # define PACKET3_SEM_SEL_WAIT (0x7 << 29) 662 626 #define PACKET3_COPY_DW 0x3B 663 627 #define PACKET3_WAIT_REG_MEM 0x3C 628 + #define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) 629 + /* 0 - always 630 + * 1 - < 631 + * 2 - <= 632 + * 3 - == 633 + * 4 - != 634 + * 5 - >= 635 + * 6 - > 636 + */ 637 + #define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) 638 + /* 0 - reg 639 + * 1 - mem 640 + */ 641 + #define WAIT_REG_MEM_OPERATION(x) ((x) << 6) 642 + /* 0 - wait_reg_mem 643 + * 1 - wr_wait_wr_reg 644 + */ 645 + #define WAIT_REG_MEM_ENGINE(x) ((x) << 8) 646 + /* 0 - me 647 + * 1 - pfp 648 + */ 664 649 #define PACKET3_INDIRECT_BUFFER 0x3F 650 + #define INDIRECT_BUFFER_TCL2_VOLATILE (1 << 22) 651 + #define INDIRECT_BUFFER_VALID (1 << 23) 652 + #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) 653 + /* 0 - LRU 654 + * 1 - Stream 655 + * 2 - Bypass 656 + */ 665 657 #define PACKET3_COPY_DATA 0x40 666 658 #define PACKET3_PFP_SYNC_ME 0x42 667 659 #define PACKET3_SURFACE_SYNC 0x43 ··· 758 646 #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ 759 647 #define EOP_TCL1_ACTION_EN (1 << 16) 760 648 #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ 761 - #define CACHE_POLICY(x) ((x) << 25) 649 + #define EOP_CACHE_POLICY(x) ((x) << 25) 762 650 /* 0 - LRU 763 651 * 1 - Stream 764 652 * 2 - Bypass 765 653 */ 766 - #define TCL2_VOLATILE (1 << 27) 654 + #define EOP_TCL2_VOLATILE (1 << 27) 767 655 #define DATA_SEL(x) ((x) << 29) 768 656 /* 0 - discard 769 657 * 1 - send low 32bit data ··· 805 693 #define PACKET3_SET_SH_REG_OFFSET 0x77 806 694 #define PACKET3_SET_QUEUE_REG 0x78 807 695 #define PACKET3_SET_UCONFIG_REG 0x79 696 + #define PACKET3_SET_UCONFIG_REG_START 0x00030000 697 + #define PACKET3_SET_UCONFIG_REG_END 0x00031000 808 698 #define PACKET3_SCRATCH_RAM_WRITE 0x7D 809 699 #define PACKET3_SCRATCH_RAM_READ 0x7E 810 700 #define PACKET3_LOAD_CONST_RAM 0x80 ··· 816 702 #define PACKET3_INCREMENT_DE_COUNTER 0x85 817 703 #define PACKET3_WAIT_ON_CE_COUNTER 0x86 818 704 #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 819 - 705 + #define PACKET3_SWITCH_BUFFER 0x8B 820 706 821 707 #endif