diff options
| author | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:18:27 +0200 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:19:04 +0200 |
| commit | 6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch) | |
| tree | 021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /drivers/gpu/drm/nouveau/nv50_pm.c | |
| parent | 682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff) | |
| parent | a385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff) | |
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree),
to prepare for tooling changes, and also to pick up v3.4 MM
changes that the uprobes code needs to take care of.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/nouveau/nv50_pm.c')
| -rw-r--r-- | drivers/gpu/drm/nouveau/nv50_pm.c | 397 |
1 files changed, 257 insertions, 140 deletions
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c index ec5481dfcd82..d020ed4979b4 100644 --- a/drivers/gpu/drm/nouveau/nv50_pm.c +++ b/drivers/gpu/drm/nouveau/nv50_pm.c @@ -28,6 +28,7 @@ #include "nouveau_hw.h" #include "nouveau_pm.h" #include "nouveau_hwsq.h" +#include "nv50_display.h" enum clk_src { clk_src_crystal, @@ -352,17 +353,13 @@ nv50_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl) } struct nv50_pm_state { + struct nouveau_pm_level *perflvl; + struct hwsq_ucode eclk_hwsq; struct hwsq_ucode mclk_hwsq; u32 mscript; - - u32 emast; - u32 nctrl; - u32 ncoef; - u32 sctrl; - u32 scoef; - - u32 amast; - u32 pdivs; + u32 mmast; + u32 mctrl; + u32 mcoef; }; static u32 @@ -415,40 +412,153 @@ clk_same(u32 a, u32 b) return ((a / 1000) == (b / 1000)); } +static void +mclk_precharge(struct nouveau_mem_exec_func *exec) +{ + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + + hwsq_wr32(hwsq, 0x1002d4, 0x00000001); +} + +static void +mclk_refresh(struct nouveau_mem_exec_func *exec) +{ + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + + hwsq_wr32(hwsq, 0x1002d0, 0x00000001); +} + +static void +mclk_refresh_auto(struct nouveau_mem_exec_func *exec, bool enable) +{ + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + + hwsq_wr32(hwsq, 0x100210, enable ? 0x80000000 : 0x00000000); +} + +static void +mclk_refresh_self(struct nouveau_mem_exec_func *exec, bool enable) +{ + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + + hwsq_wr32(hwsq, 0x1002dc, enable ? 0x00000001 : 0x00000000); +} + +static void +mclk_wait(struct nouveau_mem_exec_func *exec, u32 nsec) +{ + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + + if (nsec > 1000) + hwsq_usec(hwsq, (nsec + 500) / 1000); +} + +static u32 +mclk_mrg(struct nouveau_mem_exec_func *exec, int mr) +{ + if (mr <= 1) + return nv_rd32(exec->dev, 0x1002c0 + ((mr - 0) * 4)); + if (mr <= 3) + return nv_rd32(exec->dev, 0x1002e0 + ((mr - 2) * 4)); + return 0; +} + +static void +mclk_mrs(struct nouveau_mem_exec_func *exec, int mr, u32 data) +{ + struct drm_nouveau_private *dev_priv = exec->dev->dev_private; + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + + if (mr <= 1) { + if (dev_priv->vram_rank_B) + hwsq_wr32(hwsq, 0x1002c8 + ((mr - 0) * 4), data); + hwsq_wr32(hwsq, 0x1002c0 + ((mr - 0) * 4), data); + } else + if (mr <= 3) { + if (dev_priv->vram_rank_B) + hwsq_wr32(hwsq, 0x1002e8 + ((mr - 2) * 4), data); + hwsq_wr32(hwsq, 0x1002e0 + ((mr - 2) * 4), data); + } +} + +static void +mclk_clock_set(struct nouveau_mem_exec_func *exec) +{ + struct nv50_pm_state *info = exec->priv; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + u32 ctrl = nv_rd32(exec->dev, 0x004008); + + info->mmast = nv_rd32(exec->dev, 0x00c040); + info->mmast &= ~0xc0000000; /* get MCLK_2 from HREF */ + info->mmast |= 0x0000c000; /* use MCLK_2 as MPLL_BYPASS clock */ + + hwsq_wr32(hwsq, 0xc040, info->mmast); + hwsq_wr32(hwsq, 0x4008, ctrl | 0x00000200); /* bypass MPLL */ + if (info->mctrl & 0x80000000) + hwsq_wr32(hwsq, 0x400c, info->mcoef); + hwsq_wr32(hwsq, 0x4008, info->mctrl); +} + +static void +mclk_timing_set(struct nouveau_mem_exec_func *exec) +{ + struct drm_device *dev = exec->dev; + struct nv50_pm_state *info = exec->priv; + struct nouveau_pm_level *perflvl = info->perflvl; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; + int i; + + for (i = 0; i < 9; i++) { + u32 reg = 0x100220 + (i * 4); + u32 val = nv_rd32(dev, reg); + if (val != perflvl->timing.reg[i]) + hwsq_wr32(hwsq, reg, perflvl->timing.reg[i]); + } +} + static int -calc_mclk(struct drm_device *dev, u32 freq, struct hwsq_ucode *hwsq) +calc_mclk(struct drm_device *dev, struct nouveau_pm_level *perflvl, + struct nv50_pm_state *info) { struct drm_nouveau_private *dev_priv = dev->dev_private; + u32 crtc_mask = nv50_display_active_crtcs(dev); + struct nouveau_mem_exec_func exec = { + .dev = dev, + .precharge = mclk_precharge, + .refresh = mclk_refresh, + .refresh_auto = mclk_refresh_auto, + .refresh_self = mclk_refresh_self, + .wait = mclk_wait, + .mrg = mclk_mrg, + .mrs = mclk_mrs, + .clock_set = mclk_clock_set, + .timing_set = mclk_timing_set, + .priv = info + }; + struct hwsq_ucode *hwsq = &info->mclk_hwsq; struct pll_lims pll; - u32 mast = nv_rd32(dev, 0x00c040); - u32 ctrl = nv_rd32(dev, 0x004008); - u32 coef = nv_rd32(dev, 0x00400c); - u32 orig = ctrl; - u32 crtc_mask = 0; int N, M, P; - int ret, i; + int ret; /* use pcie refclock if possible, otherwise use mpll */ - ctrl &= ~0x81ff0200; - if (clk_same(freq, read_clk(dev, clk_src_href))) { - ctrl |= 0x00000200 | (pll.log2p_bias << 19); + info->mctrl = nv_rd32(dev, 0x004008); + info->mctrl &= ~0x81ff0200; + if (clk_same(perflvl->memory, read_clk(dev, clk_src_href))) { + info->mctrl |= 0x00000200 | (pll.log2p_bias << 19); } else { - ret = calc_pll(dev, 0x4008, &pll, freq, &N, &M, &P); + ret = calc_pll(dev, 0x4008, &pll, perflvl->memory, &N, &M, &P); if (ret == 0) return -EINVAL; - ctrl |= 0x80000000 | (P << 22) | (P << 16); - ctrl |= pll.log2p_bias << 19; - coef = (N << 8) | M; - } - - mast &= ~0xc0000000; /* get MCLK_2 from HREF */ - mast |= 0x0000c000; /* use MCLK_2 as MPLL_BYPASS clock */ - - /* determine active crtcs */ - for (i = 0; i < 2; i++) { - if (nv_rd32(dev, NV50_PDISPLAY_CRTC_C(i, CLOCK))) - crtc_mask |= (1 << i); + info->mctrl |= 0x80000000 | (P << 22) | (P << 16); + info->mctrl |= pll.log2p_bias << 19; + info->mcoef = (N << 8) | M; } /* build the ucode which will reclock the memory for us */ @@ -462,25 +572,10 @@ calc_mclk(struct drm_device *dev, u32 freq, struct hwsq_ucode *hwsq) hwsq_setf(hwsq, 0x10, 0); /* disable bus access */ hwsq_op5f(hwsq, 0x00, 0x01); /* no idea :s */ - /* prepare memory controller */ - hwsq_wr32(hwsq, 0x1002d4, 0x00000001); /* precharge banks and idle */ - hwsq_wr32(hwsq, 0x1002d0, 0x00000001); /* force refresh */ - hwsq_wr32(hwsq, 0x100210, 0x00000000); /* stop the automatic refresh */ - hwsq_wr32(hwsq, 0x1002dc, 0x00000001); /* start self refresh mode */ - - /* reclock memory */ - hwsq_wr32(hwsq, 0xc040, mast); - hwsq_wr32(hwsq, 0x4008, orig | 0x00000200); /* bypass MPLL */ - hwsq_wr32(hwsq, 0x400c, coef); - hwsq_wr32(hwsq, 0x4008, ctrl); - - /* restart memory controller */ - hwsq_wr32(hwsq, 0x1002d4, 0x00000001); /* precharge banks and idle */ - hwsq_wr32(hwsq, 0x1002dc, 0x00000000); /* stop self refresh mode */ - hwsq_wr32(hwsq, 0x100210, 0x80000000); /* restart automatic refresh */ - hwsq_usec(hwsq, 12); /* wait for the PLL to stabilize */ - - hwsq_usec(hwsq, 48); /* may be unnecessary: causes flickering */ + ret = nouveau_mem_exec(&exec, perflvl); + if (ret) + return ret; + hwsq_setf(hwsq, 0x10, 1); /* enable bus access */ hwsq_op5f(hwsq, 0x00, 0x00); /* no idea, reverse of 0x00, 0x01? */ if (dev_priv->chipset >= 0x92) @@ -494,10 +589,11 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv50_pm_state *info; + struct hwsq_ucode *hwsq; struct pll_lims pll; + u32 out, mast, divs, ctrl; int clk, ret = -EINVAL; int N, M, P1, P2; - u32 out; if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) @@ -506,54 +602,44 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); + info->perflvl = perflvl; - /* core: for the moment at least, always use nvpll */ - clk = calc_pll(dev, 0x4028, &pll, perflvl->core, &N, &M, &P1); - if (clk == 0) - goto error; + /* memory: build hwsq ucode which we'll use to reclock memory. + * use pcie refclock if possible, otherwise use mpll */ + info->mclk_hwsq.len = 0; + if (perflvl->memory) { + ret = calc_mclk(dev, perflvl, info); + if (ret) + goto error; + info->mscript = perflvl->memscript; + } - info->emast = 0x00000003; - info->nctrl = 0x80000000 | (P1 << 19) | (P1 << 16); - info->ncoef = (N << 8) | M; + divs = read_div(dev); + mast = info->mmast; - /* shader: tie to nvclk if possible, otherwise use spll. have to be - * very careful that the shader clock is at least twice the core, or - * some chipsets will be very unhappy. i expect most or all of these - * cases will be handled by tying to nvclk, but it's possible there's - * corners - */ - if (P1-- && perflvl->shader == (perflvl->core << 1)) { - info->emast |= 0x00000020; - info->sctrl = 0x00000000 | (P1 << 19) | (P1 << 16); - info->scoef = nv_rd32(dev, 0x004024); - } else { - clk = calc_pll(dev, 0x4020, &pll, perflvl->shader, &N, &M, &P1); - if (clk == 0) - goto error; + /* start building HWSQ script for engine reclocking */ + hwsq = &info->eclk_hwsq; + hwsq_init(hwsq); + hwsq_setf(hwsq, 0x10, 0); /* disable bus access */ + hwsq_op5f(hwsq, 0x00, 0x01); /* wait for access disabled? */ - info->emast |= 0x00000030; - info->sctrl = 0x80000000 | (P1 << 19) | (P1 << 16); - info->scoef = (N << 8) | M; + /* vdec/dom6: switch to "safe" clocks temporarily */ + if (perflvl->vdec) { + mast &= ~0x00000c00; + divs &= ~0x00000700; } - /* memory: build hwsq ucode which we'll use to reclock memory */ - info->mclk_hwsq.len = 0; - if (perflvl->memory) { - clk = calc_mclk(dev, perflvl->memory, &info->mclk_hwsq); - if (clk < 0) { - ret = clk; - goto error; - } - - info->mscript = perflvl->memscript; + if (perflvl->dom6) { + mast &= ~0x0c000000; + divs &= ~0x00000007; } + hwsq_wr32(hwsq, 0x00c040, mast); + /* vdec: avoid modifying xpll until we know exactly how the other * clock domains work, i suspect at least some of them can also be * tied to xpll... */ - info->amast = nv_rd32(dev, 0x00c040); - info->pdivs = read_div(dev); if (perflvl->vdec) { /* see how close we can get using nvclk as a source */ clk = calc_div(perflvl->core, perflvl->vdec, &P1); @@ -566,16 +652,14 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) out = calc_div(out, perflvl->vdec, &P2); /* select whichever gets us closest */ - info->amast &= ~0x00000c00; - info->pdivs &= ~0x00000700; if (abs((int)perflvl->vdec - clk) <= abs((int)perflvl->vdec - out)) { if (dev_priv->chipset != 0x98) - info->amast |= 0x00000c00; - info->pdivs |= P1 << 8; + mast |= 0x00000c00; + divs |= P1 << 8; } else { - info->amast |= 0x00000800; - info->pdivs |= P2 << 8; + mast |= 0x00000800; + divs |= P2 << 8; } } @@ -583,21 +667,82 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) * of the host clock frequency */ if (perflvl->dom6) { - info->amast &= ~0x0c000000; if (clk_same(perflvl->dom6, read_clk(dev, clk_src_href))) { - info->amast |= 0x00000000; + mast |= 0x00000000; } else if (clk_same(perflvl->dom6, read_clk(dev, clk_src_hclk))) { - info->amast |= 0x08000000; + mast |= 0x08000000; } else { clk = read_clk(dev, clk_src_hclk) * 3; clk = calc_div(clk, perflvl->dom6, &P1); - info->amast |= 0x0c000000; - info->pdivs = (info->pdivs & ~0x00000007) | P1; + mast |= 0x0c000000; + divs |= P1; } } + /* vdec/dom6: complete switch to new clocks */ + switch (dev_priv->chipset) { + case 0x92: + case 0x94: + case 0x96: + hwsq_wr32(hwsq, 0x004800, divs); + break; + default: + hwsq_wr32(hwsq, 0x004700, divs); + break; + } + + hwsq_wr32(hwsq, 0x00c040, mast); + + /* core/shader: make sure sclk/nvclk are disconnected from their + * PLLs (nvclk to dom6, sclk to hclk) + */ + if (dev_priv->chipset < 0x92) + mast = (mast & ~0x001000b0) | 0x00100080; + else + mast = (mast & ~0x000000b3) | 0x00000081; + + hwsq_wr32(hwsq, 0x00c040, mast); + + /* core: for the moment at least, always use nvpll */ + clk = calc_pll(dev, 0x4028, &pll, perflvl->core, &N, &M, &P1); + if (clk == 0) + goto error; + + ctrl = nv_rd32(dev, 0x004028) & ~0xc03f0100; + mast &= ~0x00100000; + mast |= 3; + + hwsq_wr32(hwsq, 0x004028, 0x80000000 | (P1 << 19) | (P1 << 16) | ctrl); + hwsq_wr32(hwsq, 0x00402c, (N << 8) | M); + + /* shader: tie to nvclk if possible, otherwise use spll. have to be + * very careful that the shader clock is at least twice the core, or + * some chipsets will be very unhappy. i expect most or all of these + * cases will be handled by tying to nvclk, but it's possible there's + * corners + */ + ctrl = nv_rd32(dev, 0x004020) & ~0xc03f0100; + + if (P1-- && perflvl->shader == (perflvl->core << 1)) { + hwsq_wr32(hwsq, 0x004020, (P1 << 19) | (P1 << 16) | ctrl); + hwsq_wr32(hwsq, 0x00c040, 0x00000020 | mast); + } else { + clk = calc_pll(dev, 0x4020, &pll, perflvl->shader, &N, &M, &P1); + if (clk == 0) + goto error; + ctrl |= 0x80000000; + + hwsq_wr32(hwsq, 0x004020, (P1 << 19) | (P1 << 16) | ctrl); + hwsq_wr32(hwsq, 0x004024, (N << 8) | M); + hwsq_wr32(hwsq, 0x00c040, 0x00000030 | mast); + } + + hwsq_setf(hwsq, 0x10, 1); /* enable bus access */ + hwsq_op5f(hwsq, 0x00, 0x00); /* wait for access enabled? */ + hwsq_fini(hwsq); + return info; error: kfree(info); @@ -605,23 +750,24 @@ error: } static int -prog_mclk(struct drm_device *dev, struct hwsq_ucode *hwsq) +prog_hwsq(struct drm_device *dev, struct hwsq_ucode *hwsq) { struct drm_nouveau_private *dev_priv = dev->dev_private; u32 hwsq_data, hwsq_kick; int i; - if (dev_priv->chipset < 0x90) { + if (dev_priv->chipset < 0x94) { hwsq_data = 0x001400; hwsq_kick = 0x00000003; } else { hwsq_data = 0x080000; hwsq_kick = 0x00000001; } - /* upload hwsq ucode */ nv_mask(dev, 0x001098, 0x00000008, 0x00000000); nv_wr32(dev, 0x001304, 0x00000000); + if (dev_priv->chipset >= 0x92) + nv_wr32(dev, 0x001318, 0x00000000); for (i = 0; i < hwsq->len / 4; i++) nv_wr32(dev, hwsq_data + (i * 4), hwsq->ptr.u32[i]); nv_mask(dev, 0x001098, 0x00000018, 0x00000018); @@ -645,20 +791,19 @@ prog_mclk(struct drm_device *dev, struct hwsq_ucode *hwsq) int nv50_pm_clocks_set(struct drm_device *dev, void *data) { - struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv50_pm_state *info = data; struct bit_entry M; - int ret = 0; + int ret = -EBUSY; /* halt and idle execution engines */ nv_mask(dev, 0x002504, 0x00000001, 0x00000001); if (!nv_wait(dev, 0x002504, 0x00000010, 0x00000010)) - goto error; + goto resume; + if (!nv_wait(dev, 0x00251c, 0x0000003f, 0x0000003f)) + goto resume; - /* memory: it is *very* important we change this first, the ucode - * we build in pre() now has hardcoded 0xc040 values, which can't - * change before we execute it or the engine clocks may end up - * messed up. + /* program memory clock, if necessary - must come before engine clock + * reprogramming due to how we construct the hwsq scripts in pre() */ if (info->mclk_hwsq.len) { /* execute some scripts that do ??? from the vbios.. */ @@ -672,42 +817,14 @@ nv50_pm_clocks_set(struct drm_device *dev, void *data) nouveau_bios_init_exec(dev, info->mscript); } - ret = prog_mclk(dev, &info->mclk_hwsq); + ret = prog_hwsq(dev, &info->mclk_hwsq); if (ret) goto resume; } - /* reclock vdec/dom6 */ - nv_mask(dev, 0x00c040, 0x00000c00, 0x00000000); - switch (dev_priv->chipset) { - case 0x92: - case 0x94: - case 0x96: - nv_mask(dev, 0x004800, 0x00000707, info->pdivs); - break; - default: - nv_mask(dev, 0x004700, 0x00000707, info->pdivs); - break; - } - nv_mask(dev, 0x00c040, 0x0c000c00, info->amast); + /* program engine clocks */ + ret = prog_hwsq(dev, &info->eclk_hwsq); - /* core/shader: make sure sclk/nvclk are disconnected from their - * plls (nvclk to dom6, sclk to hclk), modify the plls, and - * reconnect sclk/nvclk to their new clock source - */ - if (dev_priv->chipset < 0x92) - nv_mask(dev, 0x00c040, 0x001000b0, 0x00100080); /* grrr! */ - else - nv_mask(dev, 0x00c040, 0x000000b3, 0x00000081); - nv_mask(dev, 0x004020, 0xc03f0100, info->sctrl); - nv_wr32(dev, 0x004024, info->scoef); - nv_mask(dev, 0x004028, 0xc03f0100, info->nctrl); - nv_wr32(dev, 0x00402c, info->ncoef); - nv_mask(dev, 0x00c040, 0x00100033, info->emast); - - goto resume; -error: - ret = -EBUSY; resume: nv_mask(dev, 0x002504, 0x00000001, 0x00000000); kfree(info); |
