diff options
| author | Greg Kroah-Hartman <gregkh@google.com> | 2019-11-14 14:39:48 +0800 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@google.com> | 2019-11-14 14:39:48 +0800 |
| commit | ef0b39d33a38be556d95301c1b97e5d8afac1081 (patch) | |
| tree | 36ee8a8f25f1f294c7f3f747a3b8c1821d2decdc /drivers/gpu | |
| parent | e4575a2d22e1a6ff335ee354bc2a081639b5e99f (diff) | |
| parent | 6186d66524c25c70d634206dd460bd6388e7e9f9 (diff) | |
Merge 4.4.201 into android-4.4-p
Changes in 4.4.201
CDC-NCM: handle incomplete transfer of MTU
net: fix data-race in neigh_event_send()
NFC: fdp: fix incorrect free object
NFC: st21nfca: fix double free
qede: fix NULL pointer deref in __qede_remove()
nfc: netlink: fix double device reference drop
ALSA: bebob: fix to detect configured source of sampling clock for Focusrite Saffire Pro i/o series
ALSA: hda/ca0132 - Fix possible workqueue stall
mm, vmstat: hide /proc/pagetypeinfo from normal users
dump_stack: avoid the livelock of the dump_lock
perf tools: Fix time sorting
drm/radeon: fix si_enable_smc_cac() failed issue
ceph: fix use-after-free in __ceph_remove_cap()
iio: imu: adis16480: make sure provided frequency is positive
netfilter: nf_tables: Align nft_expr private data to 64-bit
netfilter: ipset: Fix an error code in ip_set_sockfn_get()
can: usb_8dev: fix use-after-free on disconnect
can: c_can: c_can_poll(): only read status register after status IRQ
can: peak_usb: fix a potential out-of-sync while decoding packets
can: gs_usb: gs_can_open(): prevent memory leak
can: peak_usb: fix slab info leak
drivers: usb: usbip: Add missing break statement to switch
configfs: fix a deadlock in configfs_symlink()
PCI: tegra: Enable Relaxed Ordering only for Tegra20 & Tegra30
scsi: qla2xxx: fixup incorrect usage of host_byte
scsi: lpfc: Honor module parameter lpfc_use_adisc
ipvs: move old_secure_tcp into struct netns_ipvs
bonding: fix unexpected IFF_BONDING bit unset
usb: fsl: Check memory resource before releasing it
usb: gadget: udc: atmel: Fix interrupt storm in FIFO mode.
usb: gadget: composite: Fix possible double free memory bug
usb: gadget: configfs: fix concurrent issue between composite APIs
perf/x86/amd/ibs: Fix reading of the IBS OpData register and thus precise RIP validity
USB: Skip endpoints with 0 maxpacket length
scsi: qla2xxx: stop timer in shutdown path
net: hisilicon: Fix "Trying to free already-free IRQ"
NFSv4: Don't allow a cached open with a revoked delegation
igb: Fix constant media auto sense switching when no cable is connected
e1000: fix memory leaks
can: flexcan: disable completely the ECC mechanism
mm/filemap.c: don't initiate writeback if mapping has no dirty pages
cgroup,writeback: don't switch wbs immediately on dead wbs if the memcg is dead
net: prevent load/store tearing on sk->sk_stamp
drm/i915/gtt: Add read only pages to gen8_pte_encode
drm/i915/gtt: Read-only pages for insert_entries on bdw+
drm/i915/gtt: Disable read-only support under GVT
drm/i915: Rename gen7 cmdparser tables
drm/i915: Disable Secure Batches for gen6+
drm/i915: Remove Master tables from cmdparser
drm/i915: Add support for mandatory cmdparsing
drm/i915: Support ro ppgtt mapped cmdparser shadow buffers
drm/i915: Allow parsing of unsized batches
drm/i915: Add gen9 BCS cmdparsing
drm/i915/cmdparser: Add support for backward jumps
drm/i915/cmdparser: Ignore Length operands during command matching
drm/i915: Lower RM timeout to avoid DSI hard hangs
drm/i915/gen8+: Add RC6 CTX corruption WA
drm/i915/cmdparser: Fix jump whitelist clearing
Linux 4.4.201
Change-Id: Ifc1fa5b9734f244745b862c6dbf7e34b73245806
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Diffstat (limited to 'drivers/gpu')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_cmd_parser.c | 418 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_dma.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_drv.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 31 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_context.c | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 118 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 53 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_reg.h | 11 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/intel_display.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/intel_drv.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 173 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/radeon/si_dpm.c | 1 |
15 files changed, 638 insertions, 208 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index db58c8d664c2..6188b70c2790 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -50,13 +50,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -81,9 +79,9 @@ * in the per-ring command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ #define STD_MI_OPCODE_MASK 0xFF800000 @@ -94,7 +92,7 @@ #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), (opm) }, \ + .cmd = { (op) & (opm), (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -109,14 +107,13 @@ #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -146,7 +143,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -213,7 +210,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, R ), CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), @@ -229,7 +226,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -273,7 +270,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -311,7 +308,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -345,10 +342,62 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = ~SMI, + .expected = (MI_BATCH_PPGTT_HSW | 1), + }}, ), +}; + #undef CMD #undef SMI #undef S3D @@ -359,40 +408,44 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { #undef R #undef W #undef B -#undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -426,6 +479,10 @@ struct drm_i915_reg_descriptor { #define REG64(addr) \ REG32(addr), REG32(addr + sizeof(u32)) +#define REG64_IDX(_reg, idx) \ + { .addr = _reg(idx) }, \ + { .addr = _reg ## _UDW(idx) } + static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG64(GPGPU_THREADS_DISPATCHED), REG64(HS_INVOCATION_COUNT), @@ -479,17 +536,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG32(BCS_SWCTRL), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), }; #undef REG64 @@ -550,6 +617,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + static bool validate_cmds_sorted(struct intel_engine_cs *ring, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) @@ -608,9 +686,7 @@ static bool check_sorted(int ring_id, static bool validate_regs_sorted(struct intel_engine_cs *ring) { - return check_sorted(ring->id, ring->reg_table, ring->reg_count) && - check_sorted(ring->id, ring->master_reg_table, - ring->master_reg_count); + return check_sorted(ring->id, ring->reg_table, ring->reg_count); } struct cmd_node { @@ -691,63 +767,61 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring) int cmd_table_count; int ret; - if (!IS_GEN7(ring->dev)) + if (!IS_GEN7(ring->dev) && !(IS_GEN9(ring->dev) && ring->id == BCS)) return 0; switch (ring->id) { case RCS: if (IS_HASWELL(ring->dev)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } ring->reg_table = gen7_render_regs; ring->reg_count = ARRAY_SIZE(gen7_render_regs); - if (IS_HASWELL(ring->dev)) { - ring->master_reg_table = hsw_master_regs; - ring->master_reg_count = ARRAY_SIZE(hsw_master_regs); - } else { - ring->master_reg_table = ivb_master_regs; - ring->master_reg_count = ARRAY_SIZE(ivb_master_regs); - } - ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case BCS: - if (IS_HASWELL(ring->dev)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN9(ring->dev)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + ring->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + ring->requires_cmd_parser = 1; + } + else if (IS_HASWELL(ring->dev)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - ring->reg_table = gen7_blt_regs; - ring->reg_count = ARRAY_SIZE(gen7_blt_regs); - - if (IS_HASWELL(ring->dev)) { - ring->master_reg_table = hsw_master_regs; - ring->master_reg_count = ARRAY_SIZE(hsw_master_regs); + if (IS_GEN9(ring->dev)) { + ring->reg_table = gen9_blt_regs; + ring->reg_count = ARRAY_SIZE(gen9_blt_regs); } else { - ring->master_reg_table = ivb_master_regs; - ring->master_reg_count = ARRAY_SIZE(ivb_master_regs); + ring->reg_table = gen7_blt_regs; + ring->reg_count = ARRAY_SIZE(gen7_blt_regs); } - ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; @@ -769,7 +843,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring) return ret; } - ring->needs_cmd_parser = true; + ring->using_cmd_parser = true; return 0; } @@ -783,7 +857,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring) */ void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring) { - if (!ring->needs_cmd_parser) + if (!ring->using_cmd_parser) return; fini_hash_table(ring); @@ -949,30 +1023,9 @@ unpin_src: return ret ? ERR_PTR(ret) : dst; } -/** - * i915_needs_cmd_parser() - should a given ring use software command parsing? - * @ring: the ring in question - * - * Only certain platforms require software batch buffer command parsing, and - * only when enabled via module parameter. - * - * Return: true if the ring requires software command parsing - */ -bool i915_needs_cmd_parser(struct intel_engine_cs *ring) -{ - if (!ring->needs_cmd_parser) - return false; - - if (!USES_PPGTT(ring->dev)) - return false; - - return (i915.enable_cmd_parser == 1); -} - -static bool check_cmd(const struct intel_engine_cs *ring, +static int check_cmd(const struct intel_engine_cs *ring, const struct drm_i915_cmd_descriptor *desc, const u32 *cmd, u32 length, - const bool is_master, bool *oacontrol_set) { if (desc->flags & CMD_DESC_REJECT) { @@ -980,12 +1033,6 @@ static bool check_cmd(const struct intel_engine_cs *ring, return false; } - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); - return false; - } - if (desc->flags & CMD_DESC_REGISTER) { /* * Get the distance between individual register offset @@ -1002,11 +1049,6 @@ static bool check_cmd(const struct intel_engine_cs *ring, find_reg(ring->reg_table, ring->reg_count, reg_addr); - if (!reg && is_master) - reg = find_reg(ring->master_reg_table, - ring->master_reg_count, - reg_addr); - if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", reg_addr, *cmd, ring->id); @@ -1091,16 +1133,113 @@ static bool check_cmd(const struct intel_engine_cs *ring, return true; } +static int check_bbstart(struct intel_context *ctx, + u32 *cmd, u64 offset, u32 length, + u32 batch_len, + u64 batch_start, + u64 shadow_batch_start) +{ + + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (CMDPARSER_USES_GGTT(ctx->i915)) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64*)(cmd+1); + jump_offset = jump_target - batch_start; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_len) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_len is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (ctx->jump_whitelist_cmds <= target_cmd_index) { + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); + return -EINVAL; + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static void init_whitelist(struct intel_context *ctx, u32 batch_len) +{ + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); + const u32 exact_size = BITS_TO_LONGS(batch_cmds); + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); + unsigned long *next_whitelist; + + if (CMDPARSER_USES_GGTT(ctx->i915)) + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { + bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +again: + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); + if (next_whitelist) { + kfree(ctx->jump_whitelist); + ctx->jump_whitelist = next_whitelist; + ctx->jump_whitelist_cmds = + next_size * BITS_PER_BYTE * sizeof(long); + return; + } + + if (next_size > exact_size) { + next_size = exact_size; + goto again; + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; +} + #define LENGTH_BIAS 2 /** * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ctx: the context in which the batch is to execute * @ring: the ring on which the batch is to execute * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question + * @user_batch_start: Canonical base address of original user batch * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? + * @shadow_batch_obj: copy of the batch buffer in question + * @shadow_batch_start: Canonical base address of shadow_batch_obj * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1108,14 +1247,16 @@ static bool check_cmd(const struct intel_engine_cs *ring, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int i915_parse_cmds(struct intel_engine_cs *ring, +int i915_parse_cmds(struct intel_context *ctx, + struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master) + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start) { - u32 *cmd, *batch_base, *batch_end; + u32 *cmd, *batch_base, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = { 0 }; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ int ret = 0; @@ -1127,6 +1268,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring, return PTR_ERR(batch_base); } + init_whitelist(ctx, batch_len); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra @@ -1150,16 +1293,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring, break; } - /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - break; - } - if (desc->flags & CMD_DESC_FIXED) length = desc->length.fixed; else @@ -1174,13 +1307,23 @@ int i915_parse_cmds(struct intel_engine_cs *ring, break; } - if (!check_cmd(ring, desc, cmd, length, is_master, - &oacontrol_set)) { - ret = -EINVAL; + if (!check_cmd(ring, desc, cmd, length, &oacontrol_set)) { + ret = CMDPARSER_USES_GGTT(ring->dev) ? -EINVAL : -EACCES; break; } + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(ctx, cmd, offset, length, + batch_len, user_batch_start, + shadow_batch_start); + break; + } + + if (ctx->jump_whitelist_cmds > offset) + set_bit(offset, ctx->jump_whitelist); + cmd += length; + offset += length; } if (oacontrol_set) { @@ -1206,7 +1349,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, * * Return: the current version number of the cmd parser */ -int i915_cmd_parser_get_version(void) +int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) { /* * Command parser version history @@ -1218,6 +1361,7 @@ int i915_cmd_parser_get_version(void) * 3. Allow access to the GPGPU_THREADS_DISPATCHED register. * 4. L3 atomic chicken bits of HSW_SCRATCH1 and HSW_ROW_CHICKEN3. * 5. GPGPU dispatch compute indirect registers. + * 10. Gen9 only - Supports the new ppgtt based BLIT parser */ - return 5; + return CMDPARSER_USES_GGTT(dev_priv) ? 5 : 10; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 61fcb3b22297..7b61078c2330 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -133,7 +133,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); + value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN); break; case I915_PARAM_HAS_PINNED_BATCHES: value = 1; @@ -145,7 +145,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_CMD_PARSER_VERSION: - value = i915_cmd_parser_get_version(); + value = i915_cmd_parser_get_version(dev_priv); break; case I915_PARAM_HAS_COHERENT_PHYS_GTT: value = 1; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a6ad938f44a6..697b2499c7a1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -698,6 +698,8 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) return ret; } + i915_rc6_ctx_wa_suspend(dev_priv); + pci_disable_device(drm_dev->pdev); /* * During hibernation on some platforms the BIOS may try to access @@ -849,6 +851,8 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_sanitize(dev); intel_power_domains_init_hw(dev_priv); + i915_rc6_ctx_wa_resume(dev_priv); + return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6fca39e1c419..5799356f6b6b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -891,6 +891,12 @@ struct intel_context { int pin_count; } engine[I915_NUM_RINGS]; + /* jump_whitelist: Bit array for tracking cmds during cmdparsing */ + unsigned long *jump_whitelist; + + /* jump_whitelist_cmds: No of cmd slots available */ + uint32_t jump_whitelist_cmds; + struct list_head link; }; @@ -1153,6 +1159,7 @@ struct intel_gen6_power_mgmt { bool client_boost; bool enabled; + bool ctx_corrupted; struct delayed_work delayed_resume_work; unsigned boosts; @@ -2539,6 +2546,9 @@ struct drm_i915_cmd_table { #define HAS_BSD2(dev) (INTEL_INFO(dev)->ring_mask & BSD2_RING) #define HAS_BLT(dev) (INTEL_INFO(dev)->ring_mask & BLT_RING) #define HAS_VEBOX(dev) (INTEL_INFO(dev)->ring_mask & VEBOX_RING) + +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_INFO(dev_priv)->gen < 6) + #define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc) #define HAS_WT(dev) ((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \ __I915__(dev)->ellc_size) @@ -2553,8 +2563,18 @@ struct drm_i915_cmd_table { #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv) + /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev)) + +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev) \ + (IS_BROADWELL(dev) || INTEL_INFO(dev)->gen == 9) + /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts * even when in MSI mode. This results in spurious interrupt warnings if the @@ -3276,16 +3296,19 @@ void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ -int i915_cmd_parser_get_version(void); +int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring); bool i915_needs_cmd_parser(struct intel_engine_cs *ring); -int i915_parse_cmds(struct intel_engine_cs *ring, +int i915_parse_cmds(struct intel_context *cxt, + struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master); + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start); + /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0433d25f9d23..20fb0ee1df4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -157,6 +157,8 @@ void i915_gem_context_free(struct kref *ctx_ref) if (i915.enable_execlists) intel_lr_context_free(ctx); + kfree(ctx->jump_whitelist); + /* * This context is going away and we need to remove all VMAs still * around. This is to handle imported shared objects for which @@ -246,6 +248,9 @@ __create_hw_context(struct drm_device *dev, ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD; + ctx->jump_whitelist = NULL; + ctx->jump_whitelist_cmds = 0; + return ctx; err_out: diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c66307c58a7f..78cdcf0f987b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1123,17 +1123,52 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +static struct i915_vma* +shadow_batch_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_address_space *pin_vm = vm; + u64 flags; + int ret; + + /* + * PPGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + pin_vm = &dev_priv->gtt.base; + } else if (vm->has_read_only) { + flags = PIN_USER; + obj->gt_ro = 1; + } else { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return ERR_PTR(-EINVAL); + } + + ret = i915_gem_object_pin(obj, pin_vm, 0, flags); + if (ret) + return ERR_PTR(ret); + else + return i915_gem_obj_to_vma(obj, pin_vm); +} + static struct drm_i915_gem_object* -i915_gem_execbuffer_parse(struct intel_engine_cs *ring, +i915_gem_execbuffer_parse(struct intel_context *ctx, + struct intel_engine_cs *ring, struct drm_i915_gem_exec_object2 *shadow_exec_entry, struct eb_vmas *eb, + struct i915_address_space *vm, struct drm_i915_gem_object *batch_obj, u32 batch_start_offset, - u32 batch_len, - bool is_master) + u32 batch_len) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; + struct i915_vma *user_vma = list_entry(eb->vmas.prev, + typeof(*user_vma), exec_list); + u64 batch_start; + u64 shadow_batch_start; int ret; shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool, @@ -1141,24 +1176,34 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring, if (IS_ERR(shadow_batch_obj)) return shadow_batch_obj; - ret = i915_parse_cmds(ring, + vma = shadow_batch_pin(shadow_batch_obj, vm); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + batch_start = user_vma->node.start + batch_start_offset; + + shadow_batch_start = vma->node.start; + + ret = i915_parse_cmds(ctx, + ring, batch_obj, - shadow_batch_obj, + batch_start, batch_start_offset, batch_len, - is_master); - if (ret) - goto err; - - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); - if (ret) + shadow_batch_obj, + shadow_batch_start); + if (ret) { + WARN_ON(vma->pin_count == 0); + vma->pin_count--; goto err; + } i915_gem_object_unpin_pages(shadow_batch_obj); memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma->exec_entry = shadow_exec_entry; vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; drm_gem_object_reference(&shadow_batch_obj->base); @@ -1170,7 +1215,14 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring, err: i915_gem_object_unpin_pages(shadow_batch_obj); - if (ret == -EACCES) /* unhandled chained batch */ + + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. + * For PPGTT backing however, we have no choice but to forcibly + * reject unsafe buffers + */ + if (CMDPARSER_USES_GGTT(batch_obj->base.dev) && (ret == -EACCES)) return batch_obj; else return ERR_PTR(ret); @@ -1322,6 +1374,13 @@ eb_get_batch(struct eb_vmas *eb) return vma->obj; } +static inline bool use_cmdparser(const struct intel_engine_cs *ring, + u32 batch_len) +{ + return ring->requires_cmd_parser || + (ring->using_cmd_parser && batch_len && USES_PPGTT(ring->dev)); +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1351,6 +1410,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(dev_priv)) + return -EPERM; + if (!file->is_master || !capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1489,16 +1552,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } params->args_batch_start_offset = args->batch_start_offset; - if (i915_needs_cmd_parser(ring) && args->batch_len) { + if (use_cmdparser(ring, args->batch_len)) { struct drm_i915_gem_object *parsed_batch_obj; - parsed_batch_obj = i915_gem_execbuffer_parse(ring, + u32 batch_off = args->batch_start_offset; + u32 batch_len = args->batch_len; + if (batch_len == 0) + batch_len = batch_obj->base.size - batch_off; + + parsed_batch_obj = i915_gem_execbuffer_parse(ctx, ring, &shadow_exec_entry, - eb, + eb, vm, batch_obj, - args->batch_start_offset, - args->batch_len, - file->is_master); + batch_off, + batch_len); if (IS_ERR(parsed_batch_obj)) { ret = PTR_ERR(parsed_batch_obj); goto err; @@ -1508,18 +1575,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * parsed_batch_obj == batch_obj means batch not fully parsed: * Accept, but don't promote to secure. */ - if (parsed_batch_obj != batch_obj) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - dispatch_flags |= I915_DISPATCH_SECURE; + if (CMDPARSER_USES_GGTT(dev_priv)) + dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; batch_obj = parsed_batch_obj; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b37fe0df743e..65a53ee398b8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -119,7 +119,8 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) (enable_ppgtt == 0 || !has_aliasing_ppgtt)) return 0; - if (enable_ppgtt == 1) + /* Full PPGTT is required by the Gen9 cmdparser */ + if (enable_ppgtt == 1 && INTEL_INFO(dev)->gen != 9) return 1; if (enable_ppgtt == 2 && has_full_ppgtt) @@ -152,7 +153,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma, { u32 pte_flags = 0; - /* Currently applicable only to VLV */ + /* Applicable to VLV, and gen8+ */ + pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -172,11 +174,14 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) static gen8_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, - bool valid) + bool valid, u32 flags) { gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; pte |= addr; + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; + switch (level) { case I915_CACHE_NONE: pte |= PPAT_UNCACHED_INDEX; @@ -460,7 +465,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, gen8_pte_t scratch_pte; scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), - I915_CACHE_LLC, true); + I915_CACHE_LLC, true, 0); fill_px(vm->dev, pt, scratch_pte); } @@ -757,8 +762,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), - I915_CACHE_LLC, use_scratch); + gen8_pte_t scratch_pte = + gen8_pte_encode(px_dma(vm->scratch_page), + I915_CACHE_LLC, use_scratch, 0); if (!USES_FULL_48BIT_PPGTT(vm->dev)) { gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, @@ -779,7 +785,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, struct sg_page_iter *sg_iter, uint64_t start, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, + u32 flags) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -799,7 +806,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, pt_vaddr[pte] = gen8_pte_encode(sg_page_iter_dma_address(sg_iter), - cache_level, true); + cache_level, true, flags); if (++pte == GEN8_PTES) { kunmap_px(ppgtt, pt_vaddr); pt_vaddr = NULL; @@ -820,7 +827,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -830,7 +837,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, if (!USES_FULL_48BIT_PPGTT(vm->dev)) { gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, - cache_level); + cache_level, flags); } else { struct i915_page_directory_pointer *pdp; uint64_t templ4, pml4e; @@ -838,7 +845,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) { gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, - start, cache_level); + start, cache_level, flags); } } } @@ -1447,7 +1454,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) uint64_t start = ppgtt->base.start; uint64_t length = ppgtt->base.total; gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), - I915_CACHE_LLC, true); + I915_CACHE_LLC, true, 0); if (!USES_FULL_48BIT_PPGTT(vm->dev)) { gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); @@ -1515,6 +1522,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + + /* + * From bdw, there is support for read-only pages in the PPGTT. + * + * XXX GVT is not honouring the lack of RW in the PTE bits. + */ + ppgtt->base.has_read_only = !intel_vgpu_active(ppgtt->base.dev); + ppgtt->debug_dump = gen8_dump_ppgtt; if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { @@ -2343,7 +2358,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, - enum i915_cache_level level, u32 unused) + enum i915_cache_level level, u32 flags) { struct drm_i915_private *dev_priv = vm->dev->dev_private; unsigned first_entry = start >> PAGE_SHIFT; @@ -2357,7 +2372,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, addr = sg_dma_address(sg_iter.sg) + (sg_iter.sg_pgoffset << PAGE_SHIFT); gen8_set_pte(>t_entries[i], - gen8_pte_encode(addr, level, true)); + gen8_pte_encode(addr, level, true, flags)); i++; } @@ -2370,7 +2385,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, */ if (i != 0) WARN_ON(readq(>t_entries[i-1]) - != gen8_pte_encode(addr, level, true)); + != gen8_pte_encode(addr, level, true, flags)); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2444,7 +2459,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), I915_CACHE_LLC, - use_scratch); + use_scratch, 0); for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); readl(gtt_base); @@ -2510,7 +2525,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (ret) return ret; - /* Currently applicable only to VLV */ + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ + pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -2653,6 +2669,9 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, i915_address_space_init(ggtt_vm, dev_priv); ggtt_vm->total += PAGE_SIZE; + /* Only VLV supports read-only GGTT mappings */ + ggtt_vm->has_read_only = IS_VALLEYVIEW(dev_priv); + if (intel_vgpu_active(dev)) { ret = intel_vgt_balloon(dev); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a216397ead52..d36f2d77576a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -307,6 +307,9 @@ struct i915_address_space { */ struct list_head inactive_list; + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cace154bbdc0..603d8cdfc5f1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -170,6 +170,8 @@ #define ECOCHK_PPGTT_WT_HSW (0x2<<3) #define ECOCHK_PPGTT_WB_HSW (0x3<<3) +#define GEN8_RC6_CTX_INFO 0x8504 + #define GAC_ECO_BITS 0x14090 #define ECOBITS_SNB_BIT (1<<13) #define ECOBITS_PPGTT_CACHE64B (3<<8) @@ -511,6 +513,10 @@ */ #define BCS_SWCTRL 0x22200 +/* There are 16 GPR registers */ +#define BCS_GPR(n) (0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) (0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED 0x2290 #define HS_INVOCATION_COUNT 0x2300 #define DS_INVOCATION_COUNT 0x2308 @@ -1567,6 +1573,7 @@ enum skl_disp_power_wells { #define RING_IMR(base) ((base)+0xa8) #define RING_HWSTAM(base) ((base)+0x98) #define RING_TIMESTAMP(base) ((base)+0x358) +#define RING_TIMESTAMP_UDW(base) ((base) + 0x358 + 4) #define TAIL_ADDR 0x001FFFF8 #define HEAD_WRAP_COUNT 0xFFE00000 #define HEAD_WRAP_ONE 0x00200000 @@ -5704,6 +5711,10 @@ enum skl_disp_power_wells { #define GAMMA_MODE_MODE_12BIT (2 << 0) #define GAMMA_MODE_MODE_SPLIT (3 << 0) +/* Display Internal Timeout Register */ +#define RM_TIMEOUT 0x42060 +#define MMIO_TIMEOUT_US(us) ((us) << 0) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f5d07bb3511..a9166ff48a26 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10747,6 +10747,10 @@ void intel_mark_busy(struct drm_device *dev) return; intel_runtime_pm_get(dev_priv); + + if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)) + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + i915_update_gfx_val(dev_priv); if (INTEL_INFO(dev)->gen >= 6) gen6_rps_busy(dev_priv); @@ -10765,6 +10769,11 @@ void intel_mark_idle(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 6) gen6_rps_idle(dev->dev_private); + if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)) { + i915_rc6_ctx_wa_check(dev_priv); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + } + intel_runtime_pm_put(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 722aa159cd28..78503e481313 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1410,6 +1410,9 @@ void intel_enable_gt_powersave(struct drm_device *dev); void intel_disable_gt_powersave(struct drm_device *dev); void intel_suspend_gt_powersave(struct drm_device *dev); void intel_reset_gt_powersave(struct drm_device *dev); +bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); +void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); +void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); void gen6_update_ring_freq(struct drm_device *dev); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fd4690ed93c0..81bd84f9156b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -66,6 +66,14 @@ static void bxt_init_clock_gating(struct drm_device *dev) */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void i915_pineview_get_mem_freq(struct drm_device *dev) @@ -4591,30 +4599,42 @@ void intel_set_rps(struct drm_device *dev, u8 val) gen6_set_rps(dev, val); } -static void gen9_disable_rps(struct drm_device *dev) +static void gen9_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen9_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE(GEN9_PG_ENABLE, 0); } -static void gen6_disable_rps(struct drm_device *dev) +static void gen6_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE(GEN6_RPNSWREQ, 1 << 31); } -static void cherryview_disable_rps(struct drm_device *dev) +static void cherryview_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_device *dev) +static void valleyview_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -4818,7 +4838,8 @@ static void gen9_enable_rc6(struct drm_device *dev) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + if (!dev_priv->rps.ctx_corrupted && + intel_enable_rc6(dev) & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off"); @@ -4841,7 +4862,7 @@ static void gen9_enable_rc6(struct drm_device *dev) * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. */ if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) || - ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0))) + INTEL_INFO(dev)->gen == 9) I915_WRITE(GEN9_PG_ENABLE, 0); else I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? @@ -4884,7 +4905,8 @@ static void gen8_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ /* 3: Enable RC6 */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + if (!dev_priv->rps.ctx_corrupted && + intel_enable_rc6(dev) & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev, rc6_mask); if (IS_BROADWELL(dev)) @@ -6128,10 +6150,101 @@ static void intel_init_emon(struct drm_device *dev) dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); } +static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) +{ + return !I915_READ(GEN8_RC6_CTX_INFO); +} + +static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) +{ + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (i915_rc6_ctx_corrupted(i915)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + i915->rps.ctx_corrupted = true; + intel_runtime_pm_get(i915); + } +} + +static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) +{ + if (i915->rps.ctx_corrupted) { + intel_runtime_pm_put(i915); + i915->rps.ctx_corrupted = false; + } +} + +/** + * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA + * @i915: i915 device + * + * Perform any steps needed to clean up the RC6 CTX WA before system suspend. + */ +void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) +{ + if (i915->rps.ctx_corrupted) + intel_runtime_pm_put(i915); +} + +/** + * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @i915: i915 device + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) +{ + if (!i915->rps.ctx_corrupted) + return; + + if (i915_rc6_ctx_corrupted(i915)) { + intel_runtime_pm_get(i915); + return; + } + + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + i915->rps.ctx_corrupted = false; +} + +static void intel_disable_rc6(struct drm_device *dev); + +/** + * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @i915: i915 device + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. + * + * Return false if no context corruption has happened since the last call of + * this function, true otherwise. +*/ +bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) +{ + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return false; + + if (i915->rps.ctx_corrupted) + return false; + + if (!i915_rc6_ctx_corrupted(i915)) + return false; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_disable_rc6(i915->dev); + i915->rps.ctx_corrupted = true; + intel_runtime_pm_get_noresume(i915); + + return true; +} + void intel_init_gt_powersave(struct drm_device *dev) { i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); + i915_rc6_ctx_wa_init(to_i915(dev)); + if (IS_CHERRYVIEW(dev)) cherryview_init_gt_powersave(dev); else if (IS_VALLEYVIEW(dev)) @@ -6144,6 +6257,8 @@ void intel_cleanup_gt_powersave(struct drm_device *dev) return; else if (IS_VALLEYVIEW(dev)) valleyview_cleanup_gt_powersave(dev); + + i915_rc6_ctx_wa_cleanup(to_i915(dev)); } static void gen6_suspend_rps(struct drm_device *dev) @@ -6176,6 +6291,38 @@ void intel_suspend_gt_powersave(struct drm_device *dev) gen6_rps_idle(dev_priv); } +static void __intel_disable_rc6(struct drm_device *dev) +{ + if (INTEL_INFO(dev)->gen >= 9) + gen9_disable_rc6(dev); + else if (IS_CHERRYVIEW(dev)) + cherryview_disable_rc6(dev); + else if (IS_VALLEYVIEW(dev)) + valleyview_disable_rc6(dev); + else + gen6_disable_rc6(dev); +} + +static void intel_disable_rc6(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + mutex_lock(&dev_priv->rps.hw_lock); + __intel_disable_rc6(dev); + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void intel_disable_rps(struct drm_device *dev) +{ + if (IS_CHERRYVIEW(dev) || IS_VALLEYVIEW(dev)) + return; + + if (INTEL_INFO(dev)->gen >= 9) + gen9_disable_rps(dev); + else + gen6_disable_rps(dev); +} + void intel_disable_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -6186,16 +6333,12 @@ void intel_disable_gt_powersave(struct drm_device *dev) intel_suspend_gt_powersave(dev); mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_INFO(dev)->gen >= 9) - gen9_disable_rps(dev); - else if (IS_CHERRYVIEW(dev)) - cherryview_disable_rps(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_disable_rps(dev); - else - gen6_disable_rps(dev); + + __intel_disable_rc6(dev); + intel_disable_rps(dev); dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9d48443bca2e..df6547f60a5c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2058,6 +2058,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) static int intel_alloc_ringbuffer_obj(struct drm_device *dev, struct intel_ringbuffer *ringbuf) { + struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_address_space *vm = &dev_priv->gtt.base; struct drm_i915_gem_object *obj; obj = NULL; @@ -2068,8 +2070,12 @@ static int intel_alloc_ringbuffer_obj(struct drm_device *dev, if (obj == NULL) return -ENOMEM; - /* mark ring buffers as read-only from GPU side by default */ - obj->gt_ro = 1; + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + obj->gt_ro = 1; ringbuf->obj = obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 49fa41dc0eb6..56c872b89a92 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -314,7 +314,8 @@ struct intel_engine_cs { volatile u32 *cpu_page; } scratch; - bool needs_cmd_parser; + bool using_cmd_parser; + bool requires_cmd_parser; /* * Table of commands the command parser needs to know about diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 892d0a71d766..57724a3afe78 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1956,6 +1956,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: |
