diff --git a/radeon-git-upstream-fixes2.patch b/radeon-git-upstream-fixes2.patch index fb8953b..ff76c92 100644 --- a/radeon-git-upstream-fixes2.patch +++ b/radeon-git-upstream-fixes2.patch @@ -1,3 +1,319 @@ +commit c3532268875fd24e6519bea2fb1b814d612bbdb4 +Author: Dave Airlie +Date: Wed May 7 02:37:18 2008 +1000 + + radeon: fix zaphod EXA with texture video + +commit ffc437f3606ab8ceba1ff152e4bb08988a58b54c +Author: Dave Airlie +Date: Wed May 7 02:30:28 2008 +1000 + + avivo: fix zaphod cursor in theory + +commit e36ef14e3a1087e1fe41baa26ade2937f396001f +Author: Dave Airlie +Date: Wed May 7 01:39:28 2008 +1000 + + radeon: fix textured-xv on zaphod + +commit 8fc19bee27c0f151d2ab3354f6ac0992b358436d +Author: Dave Airlie +Date: Wed May 7 01:38:24 2008 +1000 + + radeon: zaphod: fix render accel for EXA + +commit fc41b9042a5220a8419cc7b69ca3850cae6b903c +Author: Dave Airlie +Date: Wed May 7 01:32:23 2008 +1000 + + radeon: fix EXA pixmap offset on zaphod + +commit 4568cb820d567c6909a4be956d7e79b91232535e +Author: Dave Airlie +Date: Wed May 7 01:19:39 2008 +1000 + + radeon: zaphod fixes for pciaccess not allowing multiple MMIO maps + +commit ca81fa086b21633a7fd926833fb6d1d4fa080646 +Author: Dave Airlie +Date: Wed May 7 01:12:01 2008 +1000 + + radeon: zaphod fix for cursor on second head + + We don't need to add fbOffset here as the mmap we have of the framebuffer + starts half way. + +commit 24b60c8965f6a0b3f0c2bb1e7236b4d6642c5918 +Author: Julien Cristau +Date: Fri May 2 15:30:45 2008 -0400 + + Add a test for __GLIBC__ to CD_Common_Types.h. + + Atombios redefines the standard types but the definitions conflict + with the ones from glibc (causes build failures on GNU/Hurd + and GNU/kFreeBSD). + +commit f051359ac09c6b9416e39b9ca7d9dc0880aa1557 +Author: thegraze +Date: Fri May 2 14:02:16 2008 -0400 + + ATOM: add support for DragonFlyBSD + +commit 3d469cbc3225d890a895dac7cbc1ab7e08054b48 +Author: Alex Deucher +Date: Wed Apr 30 18:33:04 2008 -0400 + + RADEON: lock the cursors when updating + + this should fix occasional corruption seen when updating + the cursor. + +commit 445b71021843665ba32f37b2ce5c9d2857c07cc7 +Author: Alex Deucher +Date: Tue Apr 29 21:01:41 2008 -0400 + + RADEON: assorted fixes + + - free rotate pixmaps on VT switch + - save crtc/output status so we only turn on + crtcs/outputs if they are off + - show/hide cursors when changing modes + +commit 070cce5255a5c311f9d8b85ec54bd56655014933 +Author: Stephan Wolf +Date: Mon Apr 28 11:26:37 2008 -0400 + + R3xx+: further fix for IGP chips + + see bug 15538 + +commit 211e0041c7fc2df494b77428553943a2b526ee4e +Author: Alex Deucher +Date: Sun Apr 27 21:08:00 2008 -0400 + + IGP: fix EXA composite corruption + +commit 656b06bdde129ca4fc370f5a2cf7311c9179b0ff +Author: Alex Deucher +Date: Sun Apr 27 20:20:49 2008 -0400 + + RADEON: remove duplicate register define + + Also add more bit defs to wait_until register + +commit 8a9820a3aa49bc667f90ac291a27e4d7b4ae38b3 +Author: Alex Deucher +Date: Sun Apr 27 19:02:22 2008 -0400 + + RADEON: decrease crtc/output verbosity + +commit c5d62fa0e8f52c3264ff9db3ff10cdf5a806bfc0 +Author: Owen Taylor +Date: Thu Apr 17 13:14:53 2008 +0200 + + Emulate repeats by drawing in tiles + + When we can't turn on hardware repeats, because the texture + is non-power-of-two, or has padding at the ends of lines, + try to draw the image in multiple tiles rather than falling + back to software. (We can only do this when there is no + transform.) + +commit eeb7b74bb6c813b0e3afa4b704f6ffb0d0aab92b +Author: Owen Taylor +Date: Thu Apr 17 13:14:25 2008 +0200 + + Turn on wrapping when repeating on R100 + R200 + + Actually enable repeats for R100 and R200. This corresponds + to a R300 change made in the patch in: + http://bugs.freedesktop.org/show_bug.cgi?id=15333 + +commit e511f39dfef503006cf249b9f6934091eaade9b5 +Author: Alex Deucher +Date: Thu Apr 17 05:04:34 2008 -0400 + + R300+: move more common code into init3d() + + - pre-load r3xx tex instructions + - setup RS instructions in init3d() + +commit 99435b7c18d931ea620044d0fdb4cc93dfcc6331 +Author: Owen Taylor +Date: Thu Apr 17 02:46:11 2008 -0400 + + Radeon: Omit mask coordinates + + Adapted from Owen's patch on bug 15546 + This fixes the slowness with aatext on r300 + and may speed up other chips marginally. + +commit 37614e1db9a595fbe8a21d7a045895e11d272db9 +Author: Alex Deucher +Date: Tue Apr 15 09:48:16 2008 -0400 + + fix up some things from the last commit + +commit 1286fe5ce1c77453d57817b9b26b1bdb32ca7bc8 +Author: Alex Deucher +Date: Mon Apr 14 20:02:14 2008 -0400 + + R300+: properly setup vap_cntl + + this fixes tcl/pvs on RV515 among other things + +commit f72a4b805db26f10f69330b88459cbeae661189b +Author: Alex Deucher +Date: Mon Apr 14 14:10:40 2008 -0400 + + EXA: Don't wait for 3D idle after each Composite() + + wait in CompositeDone() instead + +commit 4cd4acf1092aeb696b086a382a033aee471d2de9 +Author: Alex Deucher +Date: Mon Apr 14 11:50:59 2008 -0400 + + R300: move more common code to init3d() + +commit 3c523c9a07402e17dff588fad842224c57e98223 +Author: Alex Deucher +Date: Mon Apr 14 11:21:42 2008 -0400 + + R3xx+: 3D engine documentation and minor cleanups + + - document the R300 exa/textured video code + - minor cleanups of textured video code to clarify meaning + +commit ce025bbb2496d4de94b8d4ac450c64441b64ee04 +Author: Alex Deucher +Date: Sat Apr 12 21:22:03 2008 -0400 + + R300+: consolidate some tcl/non-tcl paths + + - Move more code to init3d() + - MMIO textured video seems more reliable now on newer chips + +commit 11b54a319c7c9dd52e3fb13372697059dafe1cd3 +Author: Alex Deucher +Date: Sat Apr 12 16:50:22 2008 -0400 + + R3xx+: fix XAA + textured video on non-TCL path + +commit dd15a2f5906725116b8cd9954243099055e88e37 +Author: Alex Deucher +Date: Sat Apr 12 16:49:03 2008 -0400 + + R3xx+: more fixes to 2D/3D engine init + +commit f3e68d4b7afd2e23675bf6361c496814c9cb4b94 +Author: Alex Deucher +Date: Fri Apr 11 10:59:07 2008 -0400 + + Fix exa glyph corruption on newer chips + +commit b59686d6427cbf8b35e36b020cbbc6a0c5149b22 +Author: Alex Deucher +Date: Fri Apr 11 10:15:25 2008 -0400 + + R300+: pre-load vertex programs in init3D() + +commit acc5833a35ad6c29a57f659607afb27eebdc2ea5 +Author: Alex Deucher +Date: Thu Apr 10 17:52:52 2008 -0400 + + R3xx+: consolidate more tcl code + +commit 6f8f75bd19ef1919c0291141675be2d0e29b3251 +Author: Alex Deucher +Date: Thu Apr 10 17:08:50 2008 -0400 + + R3xx+: consolidate some common 3D code + +commit 4b9234e1c4f7c7f419cb4245d64f3f9756c98bb6 +Author: Alex Deucher +Date: Thu Apr 10 16:58:22 2008 -0400 + + R3xx+: tcl wip + +commit 865c463e3afb4759758f569132be8bf1386da5cc +Author: Alex Deucher +Date: Thu Apr 10 16:51:04 2008 -0400 + + R300+: textured video tcl cleanup + +commit 79c8d4ca36a1c3e5fe759d4ccc379c36af8f1676 +Author: Alex Deucher +Date: Thu Apr 10 16:28:18 2008 -0400 + + RADEON: cleanup + +commit c4821a287d29a65f3bcb7d60dc72ec13c0384008 +Author: Alex Deucher +Date: Thu Apr 10 16:20:17 2008 -0400 + + Revert "R3xx/R5xx: move more VAP, etc. state setup into common init3d() function" + + This reverts commit 305a3310963a5dd07b3495015b06aa8c7c4e6b02. + + Conflicts: + + src/radeon_commonfuncs.c + src/radeon_exa_render.c + src/radeon_textured_videofuncs.c + +commit 0032c80bf30bab189204e3e6929e18a19d753138 +Author: Alex Deucher +Date: Thu Apr 10 14:35:00 2008 -0400 + + RADEON: store tcl status in driver rec + +commit 9e2ffe66d106abe34a670d2edc9905ed62c485e8 +Author: Alex Deucher +Date: Thu Apr 10 14:24:04 2008 -0400 + + R3xx+: use the right register for engine flush + +commit e1a9f26c2d2cbca9ad159e723ec95b95be1ef349 +Author: Alex Deucher +Date: Thu Apr 10 14:12:15 2008 -0400 + + R3xx+: minor textured video fixes + + - set shader output swizzling correctly + - flush the right cache register on r3xx+ + +commit d79040906cd25bd494feb5901f465bbd050aa923 +Author: Alex Deucher +Date: Thu Apr 10 13:59:58 2008 -0400 + + R3xx+: EXA/textured video fixes + + - get pipe config based on GB_PIPE_SELECT where applicable + (adapted from a similar patch from Dave) + - only flush the dst cache after submitting vertices, freeing + the cache lines stalls the pipe + - no need to wait for 3D idle after submitting vertices + - fix PURGE_CACHE() and PURGE_ZCACHE() for r3xx+ + - fix depth 16 with EXA composite + +commit 0a96173cc38e506728d4c3f2dd383ba56e856578 +Author: Michel Dänzer +Date: Mon Apr 7 18:15:34 2008 +0200 + + Increase default CP timeout. + + Helps avoid spurious timeouts causing problems, see + http://bugs.freedesktop.org/show_bug.cgi?id=15203 . + +commit 255fbf465f5e7db2609a5a151bfa810249db52a0 +Author: Owen W. Taylor +Date: Thu Apr 3 02:25:41 2008 -0400 + + Fix rendering of transformed sources for REPEAT_NONE with EXA on >= R300. + + Use the border color when possible, otherwise fall back to software. + commit bc0407e53237d7968808110bc0243076377acf6e Author: Alex Deucher Date: Fri Apr 4 18:40:16 2008 -0400 @@ -958,10 +1274,25 @@ Date: Wed Jan 16 14:55:05 2008 -0500 hardware limits, they're just heuristics until we can resize the front buffer properly. diff --git a/configure.ac b/configure.ac -index 2412d4f..b644348 100644 +index 2412d4f..ab8bd97 100644 --- a/configure.ac +++ b/configure.ac -@@ -232,6 +232,11 @@ if test "x$XSERVER_LIBPCIACCESS" = xyes; then +@@ -176,6 +176,14 @@ AC_CHECK_DECL(xf86_crtc_clip_video_helper, + #include "xf86i2c.h" + #include "xf86Crtc.h"]) + ++AC_CHECK_DECL(xf86RotateFreeShadow, ++ [AC_DEFINE(HAVE_FREE_SHADOW, 1, [have new FreeShadow API])], ++ [], ++ [#include ++ #include ++ #include ]) ++ ++ + AC_CHECK_DECL(XSERVER_LIBPCIACCESS, + [XSERVER_LIBPCIACCESS=yes],[XSERVER_LIBPCIACCESS=no], + [#include "xorg-server.h"]) +@@ -232,6 +240,11 @@ if test "x$XSERVER_LIBPCIACCESS" = xyes; then fi AM_CONDITIONAL(XSERVER_LIBPCIACCESS, test "x$XSERVER_LIBPCIACCESS" = xyes) @@ -1002,7 +1333,7 @@ index 86be965..ac6ea40 100644 Enable DRI support. This option allows you to enable to disable the DRI. The default is diff --git a/src/AtomBios/includes/CD_Common_Types.h b/src/AtomBios/includes/CD_Common_Types.h -index 44a0b35..bc18c42 100644 +index 44a0b35..c60b652 100644 --- a/src/AtomBios/includes/CD_Common_Types.h +++ b/src/AtomBios/includes/CD_Common_Types.h @@ -37,6 +37,10 @@ Revision History: @@ -1016,6 +1347,17 @@ index 44a0b35..bc18c42 100644 #ifndef LINUX #if _MSC_EXTENSIONS +@@ -47,7 +51,9 @@ Revision History: + // typedef __int64 int64_t; + typedef unsigned __int32 uint32_t; + typedef __int32 int32_t; +-#elif defined (__linux__) || defined (__NetBSD__) || defined(__sun) || defined(__OpenBSD__) || defined (__FreeBSD__) ++#elif defined (__linux__) || defined (__NetBSD__) \ ++ || defined(__sun) || defined(__OpenBSD__) \ ++ || defined (__FreeBSD__) || defined(__DragonFly__) || defined(__GLIBC__) + typedef unsigned int uint32_t; + typedef int int32_t; + #else diff --git a/src/Makefile.am b/src/Makefile.am index 70c05e5..5333495 100644 --- a/src/Makefile.am @@ -1108,7 +1450,7 @@ index c249333..f0eb147 100644 static XF86ModuleVersionInfo ATIVersionRec = diff --git a/src/atombios_crtc.c b/src/atombios_crtc.c -index bc2df18..51981a7 100644 +index bc2df18..bab56b2 100644 --- a/src/atombios_crtc.c +++ b/src/atombios_crtc.c @@ -1,10 +1,5 @@ @@ -1243,7 +1585,7 @@ index bc2df18..51981a7 100644 sclock = temp; /* disable spread spectrum clocking for now -- thanks Hedy Lamarr */ -@@ -193,6 +215,20 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) +@@ -193,25 +215,86 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) "crtc(%d) PLL : refdiv %u, fbdiv 0x%X(%u), pdiv %u\n", radeon_crtc->crtc_id, (unsigned int)ref_div, (unsigned int)fb_div, (unsigned int)fb_div, (unsigned int)post_div); @@ -1263,8 +1605,9 @@ index bc2df18..51981a7 100644 + atombios_get_command_table_version(info->atomBIOS, index, &major, &minor); - ErrorF("table is %d %d\n", major, minor); -@@ -200,18 +236,65 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) +- ErrorF("table is %d %d\n", major, minor); ++ /*ErrorF("table is %d %d\n", major, minor);*/ + switch(major) { case 1: switch(minor) { case 1: @@ -1445,7 +1788,7 @@ index bc2df18..51981a7 100644 /* if this is called during ScreenInit() we don't have pScrn->pScreen yet */ if (pScrn->pScreen) diff --git a/src/atombios_output.c b/src/atombios_output.c -index 07d212f..a00d87f 100644 +index 07d212f..d8e88ca 100644 --- a/src/atombios_output.c +++ b/src/atombios_output.c @@ -235,7 +235,7 @@ atombios_external_tmds_setup(xf86OutputPtr output, DisplayModePtr mode) @@ -1716,7 +2059,7 @@ index 07d212f..a00d87f 100644 static AtomBiosResult atombios_display_device_control(atomBiosHandlePtr atomBIOS, int device, Bool state) { -@@ -452,24 +697,78 @@ atombios_device_dpms(xf86OutputPtr output, int device, int mode) +@@ -452,40 +697,94 @@ atombios_device_dpms(xf86OutputPtr output, int device, int mode) } } @@ -1764,46 +2107,83 @@ index 07d212f..a00d87f 100644 { RADEONOutputPrivatePtr radeon_output = output->driver_private; + RADEONInfoPtr info = RADEONPTR(output->scrn); ++ ++ /*ErrorF("output dpms %d\n", mode);*/ - ErrorF("AGD: output dpms %d\n", mode); - - if (radeon_output->MonType == MT_LCD) { +- ErrorF("AGD: output dpms %d\n", mode); +- +- if (radeon_output->MonType == MT_LCD) { - if (radeon_output->devices & ATOM_DEVICE_LCD1_SUPPORT) - atombios_device_dpms(output, ATOM_DEVICE_LCD1_SUPPORT, mode); -+ if (radeon_output->devices & ATOM_DEVICE_LCD1_SUPPORT) { -+ if (IS_DCE3_VARIANT) -+ atombios_output_dig_dpms(output, mode, 2); -+ else -+ atombios_device_dpms(output, ATOM_DEVICE_LCD1_SUPPORT, mode); -+ } - } else if (radeon_output->MonType == MT_DFP) { - ErrorF("AGD: tmds dpms\n"); +- } else if (radeon_output->MonType == MT_DFP) { +- ErrorF("AGD: tmds dpms\n"); - if (radeon_output->devices & ATOM_DEVICE_DFP1_SUPPORT) - atombios_device_dpms(output, ATOM_DEVICE_DFP1_SUPPORT, mode); - else if (radeon_output->devices & ATOM_DEVICE_DFP2_SUPPORT) - atombios_device_dpms(output, ATOM_DEVICE_DFP2_SUPPORT, mode); - else if (radeon_output->devices & ATOM_DEVICE_DFP3_SUPPORT) - atombios_device_dpms(output, ATOM_DEVICE_DFP3_SUPPORT, mode); -+ if (radeon_output->devices & ATOM_DEVICE_DFP1_SUPPORT) { -+ if (IS_DCE3_VARIANT) -+ atombios_output_dig_dpms(output, mode, 1); -+ else -+ atombios_device_dpms(output, ATOM_DEVICE_DFP1_SUPPORT, mode); -+ } else if (radeon_output->devices & ATOM_DEVICE_DFP2_SUPPORT) { -+ if (IS_DCE3_VARIANT) -+ return; // fixme -+ else -+ atombios_device_dpms(output, ATOM_DEVICE_DFP2_SUPPORT, mode); -+ } else if (radeon_output->devices & ATOM_DEVICE_DFP3_SUPPORT) { -+ if (IS_DCE3_VARIANT) -+ atombios_output_dig_dpms(output, mode, 2); -+ else -+ atombios_device_dpms(output, ATOM_DEVICE_DFP3_SUPPORT, mode); -+ } - } else if (radeon_output->MonType == MT_CRT) { - ErrorF("AGD: dac dpms\n"); - if (radeon_output->devices & ATOM_DEVICE_CRT1_SUPPORT) -@@ -498,6 +797,7 @@ atombios_set_output_crtc_source(xf86OutputPtr output) +- } else if (radeon_output->MonType == MT_CRT) { +- ErrorF("AGD: dac dpms\n"); +- if (radeon_output->devices & ATOM_DEVICE_CRT1_SUPPORT) +- atombios_device_dpms(output, ATOM_DEVICE_CRT1_SUPPORT, mode); +- else if (radeon_output->devices & ATOM_DEVICE_CRT2_SUPPORT) +- atombios_device_dpms(output, ATOM_DEVICE_CRT2_SUPPORT, mode); +- } else if (radeon_output->MonType == MT_CV) { +- ErrorF("AGD: cv dpms\n"); +- if (radeon_output->devices & ATOM_DEVICE_CV_SUPPORT) +- atombios_device_dpms(output, ATOM_DEVICE_CV_SUPPORT, mode); +- } else if (0 /*radeon_output->MonType == MT_STV || +- radeon_output->MonType == MT_CTV*/) { +- ErrorF("AGD: tv dpms\n"); +- if (radeon_output->devices & ATOM_DEVICE_TV1_SUPPORT) +- atombios_device_dpms(output, ATOM_DEVICE_TV1_SUPPORT, mode); +- } ++ if (radeon_output->MonType == MT_LCD) { ++ if (radeon_output->devices & ATOM_DEVICE_LCD1_SUPPORT) { ++ if (IS_DCE3_VARIANT) ++ atombios_output_dig_dpms(output, mode, 2); ++ else ++ atombios_device_dpms(output, ATOM_DEVICE_LCD1_SUPPORT, mode); ++ } ++ } else if (radeon_output->MonType == MT_DFP) { ++ /*ErrorF("tmds dpms\n");*/ ++ if (radeon_output->devices & ATOM_DEVICE_DFP1_SUPPORT) { ++ if (IS_DCE3_VARIANT) ++ atombios_output_dig_dpms(output, mode, 1); ++ else ++ atombios_device_dpms(output, ATOM_DEVICE_DFP1_SUPPORT, mode); ++ } else if (radeon_output->devices & ATOM_DEVICE_DFP2_SUPPORT) { ++ if (IS_DCE3_VARIANT) ++ return; // fixme ++ else ++ atombios_device_dpms(output, ATOM_DEVICE_DFP2_SUPPORT, mode); ++ } else if (radeon_output->devices & ATOM_DEVICE_DFP3_SUPPORT) { ++ if (IS_DCE3_VARIANT) ++ atombios_output_dig_dpms(output, mode, 2); ++ else ++ atombios_device_dpms(output, ATOM_DEVICE_DFP3_SUPPORT, mode); ++ } ++ } else if (radeon_output->MonType == MT_CRT) { ++ /*ErrorF("AGD: dac dpms\n");*/ ++ if (radeon_output->devices & ATOM_DEVICE_CRT1_SUPPORT) ++ atombios_device_dpms(output, ATOM_DEVICE_CRT1_SUPPORT, mode); ++ else if (radeon_output->devices & ATOM_DEVICE_CRT2_SUPPORT) ++ atombios_device_dpms(output, ATOM_DEVICE_CRT2_SUPPORT, mode); ++ } else if (radeon_output->MonType == MT_CV) { ++ /*ErrorF("AGD: cv dpms\n");*/ ++ if (radeon_output->devices & ATOM_DEVICE_CV_SUPPORT) ++ atombios_device_dpms(output, ATOM_DEVICE_CV_SUPPORT, mode); ++ } else if (0 /*radeon_output->MonType == MT_STV || ++ radeon_output->MonType == MT_CTV*/) { ++ /*ErrorF("AGD: tv dpms\n");*/ ++ if (radeon_output->devices & ATOM_DEVICE_TV1_SUPPORT) ++ atombios_device_dpms(output, ATOM_DEVICE_TV1_SUPPORT, mode); ++ } + + } + +@@ -498,15 +797,13 @@ atombios_set_output_crtc_source(xf86OutputPtr output) AtomBiosArgRec data; unsigned char *space; SELECT_CRTC_SOURCE_PS_ALLOCATION crtc_src_param; @@ -1811,16 +2191,17 @@ index 07d212f..a00d87f 100644 int index = GetIndexIntoMasterTable(COMMAND, SelectCRTC_Source); int major, minor; -@@ -505,15 +805,14 @@ atombios_set_output_crtc_source(xf86OutputPtr output) - - ErrorF("select crtc source table is %d %d\n", major, minor); + atombios_get_command_table_version(info->atomBIOS, index, &major, &minor); +- ErrorF("select crtc source table is %d %d\n", major, minor); +- - crtc_src_param.ucCRTC = radeon_crtc->crtc_id; - crtc_src_param.ucDevice = 0; -- ++ /*ErrorF("select crtc source table is %d %d\n", major, minor);*/ + switch(major) { case 1: { - switch(minor) { +@@ -514,6 +811,8 @@ atombios_set_output_crtc_source(xf86OutputPtr output) case 0: case 1: default: @@ -1834,7 +2215,7 @@ index 07d212f..a00d87f 100644 crtc_src_param.ucDevice = ATOM_DEVICE_CV_INDEX; } + data.exec.pspace = &crtc_src_param; -+ ErrorF("device sourced: 0x%x\n", crtc_src_param.ucDevice); ++ /*ErrorF("device sourced: 0x%x\n", crtc_src_param.ucDevice);*/ + break; + case 2: + crtc_src_param2.ucCRTC = radeon_crtc->crtc_id; @@ -1872,7 +2253,7 @@ index 07d212f..a00d87f 100644 + } + + data.exec.pspace = &crtc_src_param2; -+ ErrorF("device sourced: 0x%x\n", crtc_src_param2.ucEncoderID); ++ /*ErrorF("device sourced: 0x%x\n", crtc_src_param2.ucEncoderID);*/ break; } break; @@ -1985,6 +2366,15 @@ index 07d212f..a00d87f 100644 data.exec.index = GetIndexIntoMasterTable(COMMAND, DAC_LoadDetection); data.exec.dataSpace = (void *)&space; +@@ -679,7 +1049,7 @@ atombios_dac_detect(ScrnInfoPtr pScrn, xf86OutputPtr output) + bios_0_scratch = INREG(R600_BIOS_0_SCRATCH); + else + bios_0_scratch = INREG(RADEON_BIOS_0_SCRATCH); +- ErrorF("DAC connect %08X\n", (unsigned int)bios_0_scratch); ++ /*ErrorF("DAC connect %08X\n", (unsigned int)bios_0_scratch);*/ + + if (radeon_output->devices & ATOM_DEVICE_CRT1_SUPPORT) { + if (bios_0_scratch & ATOM_S0_CRT1_MASK) diff --git a/src/legacy_crtc.c b/src/legacy_crtc.c index 06ad60c..5ef86ce 100644 --- a/src/legacy_crtc.c @@ -2167,7 +2557,7 @@ index 06ad60c..5ef86ce 100644 /* if this is called during ScreenInit() we don't have pScrn->pScreen yet */ if (pScrn->pScreen) diff --git a/src/legacy_output.c b/src/legacy_output.c -index 0de13df..0d6e4f1 100644 +index 0de13df..a65a41e 100644 --- a/src/legacy_output.c +++ b/src/legacy_output.c @@ -103,6 +103,12 @@ RADEONRestoreFPRegisters(ScrnInfoPtr pScrn, RADEONSavePtr restore) @@ -2218,6 +2608,15 @@ index 0de13df..0d6e4f1 100644 for (o = 0; o < xf86_config->num_output; o++) { if (output == xf86_config->output[o]) { break; +@@ -685,7 +701,7 @@ RADEONEnableDisplay(xf86OutputPtr output, BOOL bEnable) + } + + if (bEnable) { +- ErrorF("enable montype: %d\n", radeon_output->MonType); ++ /*ErrorF("enable montype: %d\n", radeon_output->MonType);*/ + if (radeon_output->MonType == MT_CRT) { + if (radeon_output->DACType == DAC_PRIMARY) { + info->output_crt1 |= (1 << o); @@ -716,6 +732,13 @@ RADEONEnableDisplay(xf86OutputPtr output, BOOL bEnable) tmp |= (RADEON_FP_FPON | RADEON_FP_TMDS_EN); OUTREG(RADEON_FP_GEN_CNTL, tmp); @@ -2247,6 +2646,15 @@ index 0de13df..0d6e4f1 100644 } } else if (radeon_output->MonType == MT_LCD) { info->output_lcd1 |= (1 << o); +@@ -744,7 +775,7 @@ RADEONEnableDisplay(xf86OutputPtr output, BOOL bEnable) + radeon_output->tv_on = TRUE; + } + } else { +- ErrorF("disable montype: %d\n", radeon_output->MonType); ++ /*ErrorF("disable montype: %d\n", radeon_output->MonType);*/ + if (radeon_output->MonType == MT_CRT) { + if (radeon_output->DACType == DAC_PRIMARY) { + info->output_crt1 &= ~(1 << o); @@ -780,6 +811,13 @@ RADEONEnableDisplay(xf86OutputPtr output, BOOL bEnable) tmp &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN); OUTREG(RADEON_FP_GEN_CNTL, tmp); @@ -2422,7 +2830,7 @@ index 5a2191a..fc340e7 100644 +"0x9612","RS780_9612","RS780",,1,,,1,"ATI Radeon HD 3200 Graphics" +"0x9613","RS780_9613","RS780",,1,,,1,"ATI Radeon 3100 Graphics" diff --git a/src/radeon.h b/src/radeon.h -index aba3c0f..ef62883 100644 +index aba3c0f..feff48f 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -167,7 +167,8 @@ typedef enum { @@ -2487,7 +2895,17 @@ index aba3c0f..ef62883 100644 /* * Errata workarounds */ -@@ -745,152 +769,208 @@ do { \ +@@ -736,6 +760,9 @@ typedef struct { + + Bool r600_shadow_fb; + void *fb_shadow; ++ ++ int num_gb_pipes; ++ Bool has_tcl; + } RADEONInfoRec, *RADEONInfoPtr; + + #define RADEONWaitForFifo(pScrn, entries) \ +@@ -745,152 +772,208 @@ do { \ info->fifo_slots -= entries; \ } while (0) @@ -2830,7 +3248,7 @@ index aba3c0f..ef62883 100644 extern void RADEONAdjustCrtcRegistersForTV(ScrnInfoPtr pScrn, RADEONSavePtr save, DisplayModePtr mode, xf86OutputPtr output); extern void RADEONAdjustPLLRegistersForTV(ScrnInfoPtr pScrn, RADEONSavePtr save, -@@ -901,47 +981,18 @@ extern void RADEONAdjustPLL2RegistersForTV(ScrnInfoPtr pScrn, RADEONSavePtr save +@@ -901,47 +984,18 @@ extern void RADEONAdjustPLL2RegistersForTV(ScrnInfoPtr pScrn, RADEONSavePtr save DisplayModePtr mode, xf86OutputPtr output); extern void RADEONInitTVRegisters(xf86OutputPtr output, RADEONSavePtr save, DisplayModePtr mode, BOOL IsPrimary); @@ -2886,7 +3304,7 @@ index aba3c0f..ef62883 100644 #define RADEONCP_START(pScrn, info) \ do { \ -@@ -998,11 +1049,18 @@ do { \ +@@ -998,11 +1052,18 @@ do { \ info->needCacheFlush = FALSE; \ } \ RADEON_WAIT_UNTIL_IDLE(); \ @@ -2910,11 +3328,141 @@ index aba3c0f..ef62883 100644 info->CPInUse = TRUE; \ } \ } while (0) +@@ -1130,15 +1191,27 @@ do { \ + #define RADEON_PURGE_CACHE() \ + do { \ + BEGIN_RING(2); \ +- OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ +- OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ ++ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ ++ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ ++ OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ ++ } else { \ ++ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ ++ OUT_RING(R300_RB3D_DC_FLUSH_ALL); \ ++ } \ + ADVANCE_RING(); \ + } while (0) + + #define RADEON_PURGE_ZCACHE() \ + do { \ +- OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ +- OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ ++ BEGIN_RING(2); \ ++ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ ++ OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ ++ OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ ++ } else { \ ++ OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ ++ OUT_RING(R300_ZC_FLUSH_ALL); \ ++ } \ ++ ADVANCE_RING(); \ + } while (0) + + #endif /* XF86DRI */ diff --git a/src/radeon_accel.c b/src/radeon_accel.c -index 8b2f167..015d176 100644 +index 8b2f167..9e7ea7a 100644 --- a/src/radeon_accel.c +++ b/src/radeon_accel.c -@@ -390,8 +390,12 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) +@@ -158,17 +158,32 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn) + unsigned char *RADEONMMIO = info->MMIO; + int i; + +- OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, +- RADEON_RB3D_DC_FLUSH_ALL, +- ~RADEON_RB3D_DC_FLUSH_ALL); +- for (i = 0; i < RADEON_TIMEOUT; i++) { +- if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY)) +- break; +- } +- if (i == RADEON_TIMEOUT) { +- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, +- "DC flush timeout: %x\n", +- (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT)); ++ if (info->ChipFamily <= CHIP_FAMILY_RV280) { ++ OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, ++ RADEON_RB3D_DC_FLUSH_ALL, ++ ~RADEON_RB3D_DC_FLUSH_ALL); ++ for (i = 0; i < RADEON_TIMEOUT; i++) { ++ if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY)) ++ break; ++ } ++ if (i == RADEON_TIMEOUT) { ++ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, ++ "DC flush timeout: %x\n", ++ (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT)); ++ } ++ } else { ++ OUTREGP(R300_RB2D_DSTCACHE_CTLSTAT, ++ R300_RB2D_DC_FLUSH_ALL, ++ ~R300_RB2D_DC_FLUSH_ALL); ++ for (i = 0; i < RADEON_TIMEOUT; i++) { ++ if (!(INREG(R300_RB2D_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY)) ++ break; ++ } ++ if (i == RADEON_TIMEOUT) { ++ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, ++ "DC flush timeout: %x\n", ++ (unsigned int)INREG(R300_RB2D_DSTCACHE_CTLSTAT)); ++ } + } + } + +@@ -355,7 +370,52 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) + info->CurrentLayout.pixel_code, + info->CurrentLayout.bitsPerPixel); + +- OUTREG(RADEON_RB3D_CNTL, 0); ++ if ((info->ChipFamily == CHIP_FAMILY_RV410) || ++ (info->ChipFamily == CHIP_FAMILY_R420) || ++ (info->ChipFamily == CHIP_FAMILY_RS690) || ++ (info->ChipFamily == CHIP_FAMILY_RS740) || ++ (info->ChipFamily == CHIP_FAMILY_RS400) || ++ IS_R500_3D) { ++ uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT); ++ if (info->num_gb_pipes == 0) { ++ info->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; ++ xf86DrvMsg(pScrn->scrnIndex, X_INFO, ++ "%s: num pipes is %d\n", __FUNCTION__, info->num_gb_pipes); ++ } ++ if (IS_R500_3D) ++ OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); ++ } else { ++ if (info->num_gb_pipes == 0) { ++ if ((info->ChipFamily == CHIP_FAMILY_R300) || ++ (info->ChipFamily == CHIP_FAMILY_R350)) { ++ /* R3xx chips */ ++ info->num_gb_pipes = 2; ++ } else { ++ /* RV3xx chips */ ++ info->num_gb_pipes = 1; ++ } ++ } ++ } ++ ++ if (IS_R300_3D | IS_R500_3D) { ++ CARD32 gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16); ++ ++ switch(info->num_gb_pipes) { ++ case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; ++ case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; ++ case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; ++ default: ++ case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; ++ } ++ ++ OUTREG(R300_GB_TILE_CONFIG, gb_tile_config); ++ OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); ++ OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG); ++ OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) | ++ R300_DC_AUTOFLUSH_ENABLE | ++ R300_DC_DC_DISABLE_IGNORE_PE)); ++ } else ++ OUTREG(RADEON_RB3D_CNTL, 0); + + RADEONEngineReset(pScrn); + +@@ -390,8 +450,12 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) info->sc_bottom = RADEON_DEFAULT_SC_BOTTOM_MAX; info->re_top_left = 0x00000000; @@ -2954,7 +3502,7 @@ index e3b37c1..bda15ff 100644 a->SetupForCPUToScreenTexture2 = diff --git a/src/radeon_atombios.c b/src/radeon_atombios.c -index 88c220b..224aae3 100644 +index 88c220b..b17b53c 100644 --- a/src/radeon_atombios.c +++ b/src/radeon_atombios.c @@ -35,6 +35,8 @@ @@ -3936,6 +4484,109 @@ index 88c220b..224aae3 100644 # ifdef ATOM_BIOS_PARSER static AtomBiosResult rhdAtomExec (atomBiosHandlePtr handle, +@@ -2673,7 +2071,7 @@ CailDelayMicroSeconds(VOID *CAIL, UINT32 delay) + + usleep(delay); + +- DEBUGP(xf86DrvMsg(((atomBiosHandlePtr)CAIL)->scrnIndex,X_INFO,"Delay %i usec\n",delay)); ++ /*DEBUGP(xf86DrvMsg(((atomBiosHandlePtr)CAIL)->scrnIndex,X_INFO,"Delay %i usec\n",delay));*/ + } + + UINT32 +@@ -2686,7 +2084,7 @@ CailReadATIRegister(VOID* CAIL, UINT32 idx) + CAILFUNC(CAIL); + + ret = INREG(idx << 2); +- DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx << 2,ret)); ++ /*DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx << 2,ret));*/ + return ret; + } + +@@ -2699,7 +2097,7 @@ CailWriteATIRegister(VOID *CAIL, UINT32 idx, UINT32 data) + CAILFUNC(CAIL); + + OUTREG(idx << 2,data); +- DEBUGP(ErrorF("%s(%x,%x)\n",__func__,idx << 2,data)); ++ /*DEBUGP(ErrorF("%s(%x,%x)\n",__func__,idx << 2,data));*/ + } + + UINT32 +@@ -2714,10 +2112,10 @@ CailReadFBData(VOID* CAIL, UINT32 idx) + if (((atomBiosHandlePtr)CAIL)->fbBase) { + CARD8 *FBBase = (CARD8*)info->FB; + ret = *((CARD32*)(FBBase + (((atomBiosHandlePtr)CAIL)->fbBase) + idx)); +- DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx,ret)); ++ /*DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx,ret));*/ + } else if (((atomBiosHandlePtr)CAIL)->scratchBase) { + ret = *(CARD32*)((CARD8*)(((atomBiosHandlePtr)CAIL)->scratchBase) + idx); +- DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx,ret)); ++ /*DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx,ret));*/ + } else { + xf86DrvMsg(((atomBiosHandlePtr)CAIL)->scrnIndex,X_ERROR, + "%s: no fbbase set\n",__func__); +@@ -2731,7 +2129,7 @@ CailWriteFBData(VOID *CAIL, UINT32 idx, UINT32 data) + { + CAILFUNC(CAIL); + +- DEBUGP(ErrorF("%s(%x,%x)\n",__func__,idx,data)); ++ /*DEBUGP(ErrorF("%s(%x,%x)\n",__func__,idx,data));*/ + if (((atomBiosHandlePtr)CAIL)->fbBase) { + CARD8 *FBBase = (CARD8*) + RADEONPTR(xf86Screens[((atomBiosHandlePtr)CAIL)->scrnIndex])->FB; +@@ -2752,7 +2150,7 @@ CailReadMC(VOID *CAIL, ULONG Address) + CAILFUNC(CAIL); + + ret = INMC(pScrn, Address); +- DEBUGP(ErrorF("%s(%x) = %x\n",__func__,Address,ret)); ++ /*DEBUGP(ErrorF("%s(%x) = %x\n",__func__,Address,ret));*/ + return ret; + } + +@@ -2762,7 +2160,7 @@ CailWriteMC(VOID *CAIL, ULONG Address, ULONG data) + ScrnInfoPtr pScrn = xf86Screens[((atomBiosHandlePtr)CAIL)->scrnIndex]; + + CAILFUNC(CAIL); +- DEBUGP(ErrorF("%s(%x,%x)\n",__func__,Address,data)); ++ /*DEBUGP(ErrorF("%s(%x,%x)\n",__func__,Address,data));*/ + OUTMC(pScrn, Address, data); + } + +@@ -2808,7 +2206,7 @@ CailReadPCIConfigData(VOID*CAIL, VOID* ret, UINT32 idx,UINT16 size) + return; + break; + } +- DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx,*(unsigned int*)ret)); ++ /*DEBUGP(ErrorF("%s(%x) = %x\n",__func__,idx,*(unsigned int*)ret));*/ + + } + +@@ -2818,7 +2216,7 @@ CailWritePCIConfigData(VOID*CAIL,VOID*src,UINT32 idx,UINT16 size) + PCITAG tag = ((atomBiosHandlePtr)CAIL)->PciTag; + + CAILFUNC(CAIL); +- DEBUGP(ErrorF("%s(%x,%x)\n",__func__,idx,(*(unsigned int*)src))); ++ /*DEBUGP(ErrorF("%s(%x,%x)\n",__func__,idx,(*(unsigned int*)src)));*/ + switch (size) { + case 8: + pciWriteByte(tag,idx << 2,*(CARD8*)src); +@@ -2846,7 +2244,7 @@ CailReadPLL(VOID *CAIL, ULONG Address) + CAILFUNC(CAIL); + + ret = RADEONINPLL(pScrn, Address); +- DEBUGP(ErrorF("%s(%x) = %x\n",__func__,Address,ret)); ++ /*DEBUGP(ErrorF("%s(%x) = %x\n",__func__,Address,ret));*/ + return ret; + } + +@@ -2856,7 +2254,7 @@ CailWritePLL(VOID *CAIL, ULONG Address,ULONG Data) + ScrnInfoPtr pScrn = xf86Screens[((atomBiosHandlePtr)CAIL)->scrnIndex]; + CAILFUNC(CAIL); + +- DEBUGP(ErrorF("%s(%x,%x)\n",__func__,Address,Data)); ++ /*DEBUGP(ErrorF("%s(%x,%x)\n",__func__,Address,Data));*/ + RADEONOUTPLL(pScrn, Address, Data); + } + diff --git a/src/radeon_atombios.h b/src/radeon_atombios.h index 9cb279e..955f2e4 100644 --- a/src/radeon_atombios.h @@ -4235,10 +4886,16 @@ index e6890be..b668823 100644 { -1, NULL } }; diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c -index 0250aef..5c9eae1 100644 +index 0250aef..9a450f9 100644 --- a/src/radeon_commonfuncs.c +++ b/src/radeon_commonfuncs.c -@@ -60,7 +60,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +@@ -55,53 +55,49 @@ + static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) + { + RADEONInfoPtr info = RADEONPTR(pScrn); +- CARD32 gb_tile_config; ++ CARD32 gb_tile_config, su_reg_dest, vap_cntl; + ACCEL_PREAMBLE(); info->texW[0] = info->texH[0] = info->texW[1] = info->texH[1] = 1; @@ -4247,38 +4904,478 @@ index 0250aef..5c9eae1 100644 BEGIN_ACCEL(3); OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); -@@ -84,7 +84,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - /* R3xx chips */ - gb_tile_config |= R300_PIPE_COUNT_R300; - } else if ((info->ChipFamily == CHIP_FAMILY_RV410) || + OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE); +- OUT_ACCEL_REG(R300_WAIT_UNTIL, R300_WAIT_2D_IDLECLEAN | R300_WAIT_3D_IDLECLEAN); ++ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); + FINISH_ACCEL(); + + gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16); + +- if ((info->Chipset == PCI_CHIP_RV410_5E4C) || +- (info->Chipset == PCI_CHIP_RV410_5E4F)) { +- /* RV410 SE chips */ +- gb_tile_config |= R300_PIPE_COUNT_RV350; +- } else if ((info->ChipFamily == CHIP_FAMILY_RV350) || +- (info->ChipFamily == CHIP_FAMILY_RV380) || +- (info->ChipFamily == CHIP_FAMILY_RS400)) { +- /* RV3xx, RS4xx chips */ +- gb_tile_config |= R300_PIPE_COUNT_RV350; +- } else if ((info->ChipFamily == CHIP_FAMILY_R300) || +- (info->ChipFamily == CHIP_FAMILY_R350)) { +- /* R3xx chips */ +- gb_tile_config |= R300_PIPE_COUNT_R300; +- } else if ((info->ChipFamily == CHIP_FAMILY_RV410) || - (info->ChipFamily == CHIP_FAMILY_RS690)) { -+ (info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740)) { - /* RV4xx, RS6xx chips */ - gb_tile_config |= R300_PIPE_COUNT_R420_3P; - } else { -@@ -148,7 +149,20 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0); +- /* RV4xx, RS6xx chips */ +- gb_tile_config |= R300_PIPE_COUNT_R420_3P; +- } else { +- /* R4xx, R5xx chips */ +- gb_tile_config |= R300_PIPE_COUNT_R420; ++ switch(info->num_gb_pipes) { ++ case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; ++ case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; ++ case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; ++ default: ++ case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; + } + +- BEGIN_ACCEL(3); ++ BEGIN_ACCEL(5); + OUT_ACCEL_REG(R300_GB_TILE_CONFIG, gb_tile_config); ++ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); ++ OUT_ACCEL_REG(R300_DST_PIPE_CONFIG, R300_PIPE_AUTO_CONFIG); + OUT_ACCEL_REG(R300_GB_SELECT, 0); + OUT_ACCEL_REG(R300_GB_ENABLE, 0); FINISH_ACCEL(); -+ /* setup the VAP */ - BEGIN_ACCEL(5); -+ /* disable TCL/PVS */ -+ OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); -+ OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS); -+ OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) | -+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) | -+ (4 << R300_PVS_NUM_FPUS_SHIFT) | -+ (5 << R300_VF_MAX_VTX_NUM_SHIFT))); -+ OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); -+ OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0); -+ FINISH_ACCEL(); ++ if (IS_R500_3D) { ++ su_reg_dest = ((1 << info->num_gb_pipes) - 1); ++ BEGIN_ACCEL(2); ++ OUT_ACCEL_REG(R500_SU_REG_DEST, su_reg_dest); ++ OUT_ACCEL_REG(R500_VAP_INDEX_OFFSET, 0); ++ FINISH_ACCEL(); ++ } + -+ BEGIN_ACCEL(4); - OUT_ACCEL_REG(R300_US_W_FMT, 0); - OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED | + BEGIN_ACCEL(3); + OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); + OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE); +- OUT_ACCEL_REG(R300_WAIT_UNTIL, R300_WAIT_2D_IDLECLEAN | R300_WAIT_3D_IDLECLEAN); ++ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); + FINISH_ACCEL(); + + BEGIN_ACCEL(5); +@@ -125,7 +121,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) + (8 << R300_MSBD1_SHIFT))); + FINISH_ACCEL(); + +- BEGIN_ACCEL(4); ++ BEGIN_ACCEL(5); ++ OUT_ACCEL_REG(R300_GA_ENHANCE, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL); + OUT_ACCEL_REG(R300_GA_POLY_MODE, R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE); + OUT_ACCEL_REG(R300_GA_ROUND_MODE, (R300_GEOMETRY_ROUND_NEAREST | + R300_COLOR_ROUND_NEAREST)); +@@ -148,7 +145,399 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) + OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0); + FINISH_ACCEL(); + +- BEGIN_ACCEL(5); ++ /* setup the VAP */ ++ if (info->has_tcl) ++ vap_cntl = ((5 << R300_PVS_NUM_SLOTS_SHIFT) | ++ (5 << R300_PVS_NUM_CNTLRS_SHIFT) | ++ (9 << R300_VF_MAX_VTX_NUM_SHIFT)); ++ else ++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | ++ (5 << R300_PVS_NUM_CNTLRS_SHIFT) | ++ (5 << R300_VF_MAX_VTX_NUM_SHIFT)); ++ ++ if (info->ChipFamily == CHIP_FAMILY_RV515) ++ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); ++ else if ((info->ChipFamily == CHIP_FAMILY_RV530) || ++ (info->ChipFamily == CHIP_FAMILY_RV560)) ++ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); ++ else if (info->ChipFamily == CHIP_FAMILY_R420) ++ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); ++ else if ((info->ChipFamily == CHIP_FAMILY_R520) || ++ (info->ChipFamily == CHIP_FAMILY_R580) || ++ (info->ChipFamily == CHIP_FAMILY_RV570)) ++ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); ++ else ++ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); ++ ++ if (info->has_tcl) ++ BEGIN_ACCEL(15); ++ else ++ BEGIN_ACCEL(9); ++ OUT_ACCEL_REG(R300_VAP_VTX_STATE_CNTL, 0); ++ OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); ++ ++ if (info->has_tcl) ++ OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0); ++ else ++ OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS); ++ OUT_ACCEL_REG(R300_VAP_CNTL, vap_cntl); ++ OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); ++ OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); ++ OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0); ++ ++ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, ++ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) | ++ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) | ++ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_0_SHIFT) | ++ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_0_SHIFT) | ++ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) ++ << R300_WRITE_ENA_0_SHIFT) | ++ (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) | ++ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) | ++ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_1_SHIFT) | ++ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_1_SHIFT) | ++ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) ++ << R300_WRITE_ENA_1_SHIFT))); ++ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1, ++ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) | ++ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) | ++ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_2_SHIFT) | ++ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_2_SHIFT) | ++ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) ++ << R300_WRITE_ENA_2_SHIFT))); ++ ++ if (info->has_tcl) { ++ OUT_ACCEL_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0); ++ OUT_ACCEL_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000); ++ OUT_ACCEL_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000); ++ OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000); ++ OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000); ++ OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); ++ } ++ FINISH_ACCEL(); ++ ++ /* pre-load the vertex shaders */ ++ if (info->has_tcl) { ++ /* exa mask shader program */ ++ BEGIN_ACCEL(13); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); ++ /* PVS inst 0 */ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(0) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ ++ /* PVS inst 1 */ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(1) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ ++ /* PVS inst 2 */ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(2) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(7) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(7) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(7) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ FINISH_ACCEL(); ++ ++ BEGIN_ACCEL(9); ++ /* exa no mask instruction */ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 3); ++ /* PVS inst 0 */ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(0) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ ++ /* PVS inst 1 */ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(1) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ FINISH_ACCEL(); ++ ++ /* Xv shader program */ ++ BEGIN_ACCEL(9); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 5); ++ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(0) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(0) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_DST_OPCODE(R300_VE_ADD) | ++ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | ++ R300_PVS_DST_OFFSET(1) | ++ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | ++ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, ++ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | ++ R300_PVS_SRC_OFFSET(6) | ++ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | ++ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); ++ FINISH_ACCEL(); ++ } ++ ++ /* pre-load the RS instructions */ ++ BEGIN_ACCEL(4); ++ if (IS_R300_3D) { ++ /* rasterizer source table ++ * R300_RS_TEX_PTR is the offset into the input RS stream ++ * 0,1 are tex0 ++ * 2,3 are tex1 ++ */ ++ OUT_ACCEL_REG(R300_RS_IP_0, ++ (R300_RS_TEX_PTR(0) | ++ R300_RS_SEL_S(R300_RS_SEL_C0) | ++ R300_RS_SEL_T(R300_RS_SEL_C1) | ++ R300_RS_SEL_R(R300_RS_SEL_K0) | ++ R300_RS_SEL_Q(R300_RS_SEL_K1))); ++ OUT_ACCEL_REG(R300_RS_IP_1, ++ (R300_RS_TEX_PTR(2) | ++ R300_RS_SEL_S(R300_RS_SEL_C0) | ++ R300_RS_SEL_T(R300_RS_SEL_C1) | ++ R300_RS_SEL_R(R300_RS_SEL_K0) | ++ R300_RS_SEL_Q(R300_RS_SEL_K1))); ++ /* src tex */ ++ /* R300_INST_TEX_ID - select the RS source table entry ++ * R300_INST_TEX_ADDR - the FS temp register for the texture data ++ */ ++ OUT_ACCEL_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) | ++ R300_RS_INST_TEX_CN_WRITE | ++ R300_INST_TEX_ADDR(0))); ++ /* mask tex */ ++ OUT_ACCEL_REG(R300_RS_INST_1, (R300_INST_TEX_ID(1) | ++ R300_RS_INST_TEX_CN_WRITE | ++ R300_INST_TEX_ADDR(1))); ++ ++ } else { ++ /* rasterizer source table ++ * R300_RS_TEX_PTR is the offset into the input RS stream ++ * 0,1 are tex0 ++ * 2,3 are tex1 ++ */ ++ OUT_ACCEL_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT))); ++ ++ OUT_ACCEL_REG(R500_RS_IP_1, ((2 << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ (3 << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT))); ++ /* src tex */ ++ /* R500_RS_INST_TEX_ID_SHIFT - select the RS source table entry ++ * R500_RS_INST_TEX_ADDR_SHIFT - the FS temp register for the texture data ++ */ ++ OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) | ++ R500_RS_INST_TEX_CN_WRITE | ++ (0 << R500_RS_INST_TEX_ADDR_SHIFT))); ++ /* mask tex */ ++ OUT_ACCEL_REG(R500_RS_INST_1, ((1 << R500_RS_INST_TEX_ID_SHIFT) | ++ R500_RS_INST_TEX_CN_WRITE | ++ (1 << R500_RS_INST_TEX_ADDR_SHIFT))); ++ } ++ FINISH_ACCEL(); ++ ++ /* pre-load FS tex instructions */ ++ if (IS_R300_3D) { ++ BEGIN_ACCEL(2); ++ /* tex inst for src texture */ ++ OUT_ACCEL_REG(R300_US_TEX_INST_0, ++ (R300_TEX_SRC_ADDR(0) | ++ R300_TEX_DST_ADDR(0) | ++ R300_TEX_ID(0) | ++ R300_TEX_INST(R300_TEX_INST_LD))); ++ ++ /* tex inst for mask texture */ ++ OUT_ACCEL_REG(R300_US_TEX_INST_1, ++ (R300_TEX_SRC_ADDR(1) | ++ R300_TEX_DST_ADDR(1) | ++ R300_TEX_ID(1) | ++ R300_TEX_INST(R300_TEX_INST_LD))); ++ FINISH_ACCEL(); ++ } ++ ++ if (IS_R300_3D) { ++ BEGIN_ACCEL(9); ++ OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX); ++ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ ++ OUT_ACCEL_REG(R300_US_CODE_ADDR_0, ++ (R300_ALU_START(0) | ++ R300_ALU_SIZE(0) | ++ R300_TEX_START(0) | ++ R300_TEX_SIZE(0))); ++ OUT_ACCEL_REG(R300_US_CODE_ADDR_1, ++ (R300_ALU_START(0) | ++ R300_ALU_SIZE(0) | ++ R300_TEX_START(0) | ++ R300_TEX_SIZE(0))); ++ OUT_ACCEL_REG(R300_US_CODE_ADDR_2, ++ (R300_ALU_START(0) | ++ R300_ALU_SIZE(0) | ++ R300_TEX_START(0) | ++ R300_TEX_SIZE(0))); ++ } else { ++ BEGIN_ACCEL(7); ++ OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); ++ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ ++ OUT_ACCEL_REG(R500_US_FC_CTRL, 0); ++ } + OUT_ACCEL_REG(R300_US_W_FMT, 0); + OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED | R300_OUT_FMT_C0_SEL_BLUE | -@@ -165,11 +179,6 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +@@ -165,11 +554,6 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) R300_OUT_FMT_C1_SEL_GREEN | R300_OUT_FMT_C2_SEL_RED | R300_OUT_FMT_C3_SEL_ALPHA)); @@ -4290,7 +5387,7 @@ index 0250aef..5c9eae1 100644 FINISH_ACCEL(); -@@ -179,7 +188,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +@@ -179,7 +563,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) OUT_ACCEL_REG(R300_FG_ALPHA_FUNC, 0); FINISH_ACCEL(); @@ -4300,12 +5397,13 @@ index 0250aef..5c9eae1 100644 OUT_ACCEL_REG(R300_RB3D_ZSTENCILCNTL, 0); OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE); OUT_ACCEL_REG(R300_RB3D_BW_CNTL, 0); -@@ -205,12 +215,12 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +@@ -205,12 +590,13 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) OUT_ACCEL_REG(R300_SC_SCISSOR1, ((8191 << R300_SCISSOR_X_SHIFT) | (8191 << R300_SCISSOR_Y_SHIFT))); - if (IS_R300_VARIANT || (info->ChipFamily == CHIP_FAMILY_RS690)) { -+ if (IS_R300_3D) { ++ if (IS_R300_VARIANT || (info->ChipFamily == CHIP_FAMILY_RS690) || ++ (info->ChipFamily == CHIP_FAMILY_RS740)) { /* clip has offset 1440 */ OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((1088 << R300_CLIP_X_SHIFT) | (1088 << R300_CLIP_Y_SHIFT))); @@ -4316,7 +5414,7 @@ index 0250aef..5c9eae1 100644 } else { OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((0 << R300_CLIP_X_SHIFT) | (0 << R300_CLIP_Y_SHIFT))); -@@ -239,6 +249,19 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +@@ -239,6 +625,19 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) OUT_ACCEL_REG(R200_SE_VAP_CNTL, R200_VAP_FORCE_W_TO_ONE | R200_VAP_VF_MAX_VTX_NUM); FINISH_ACCEL(); @@ -4336,7 +5434,7 @@ index 0250aef..5c9eae1 100644 } else { BEGIN_ACCEL(2); if ((info->ChipFamily == CHIP_FAMILY_RADEON) || -@@ -252,20 +275,21 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +@@ -252,20 +651,21 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) RADEON_VTX_ST1_NONPARAMETRIC | RADEON_TEX1_W_ROUTING_USE_W0); FINISH_ACCEL(); @@ -4371,7 +5469,7 @@ index 0250aef..5c9eae1 100644 diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c -index 3524b75..e2d31eb 100644 +index 3524b75..6a9a76d 100644 --- a/src/radeon_crtc.c +++ b/src/radeon_crtc.c @@ -57,12 +57,7 @@ extern void atombios_crtc_mode_set(xf86CrtcPtr crtc, @@ -4387,7 +5485,40 @@ index 3524b75..e2d31eb 100644 static void radeon_crtc_dpms(xf86CrtcPtr crtc, int mode) -@@ -190,8 +185,10 @@ RADEONComputePLL(RADEONPLLPtr pll, +@@ -72,6 +67,9 @@ radeon_crtc_dpms(xf86CrtcPtr crtc, int mode) + RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; + xf86CrtcPtr crtc0 = pRADEONEnt->pCrtc[0]; + ++ if ((mode == DPMSModeOn) && radeon_crtc->enabled) ++ return; ++ + if (IS_AVIVO_VARIANT) { + atombios_crtc_dpms(crtc, mode); + } else { +@@ -91,6 +89,11 @@ radeon_crtc_dpms(xf86CrtcPtr crtc, int mode) + legacy_crtc_dpms(crtc0, mode); + } + } ++ ++ if (mode == DPMSModeOn) ++ radeon_crtc->enabled = TRUE; ++ else ++ radeon_crtc->enabled = FALSE; + } + + static Bool +@@ -103,6 +106,10 @@ radeon_crtc_mode_fixup(xf86CrtcPtr crtc, DisplayModePtr mode, + static void + radeon_crtc_mode_prepare(xf86CrtcPtr crtc) + { ++ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; ++ ++ if (radeon_crtc->enabled) ++ crtc->funcs->hide_cursor(crtc); + radeon_crtc_dpms(crtc, DPMSModeOff); + } + +@@ -190,8 +197,10 @@ RADEONComputePLL(RADEONPLLPtr pll, best_vco_diff = vco_diff; } } @@ -4400,7 +5531,18 @@ index 3524b75..e2d31eb 100644 } ErrorF("best_freq: %u\n", (unsigned int)best_freq); -@@ -275,6 +272,10 @@ radeon_crtc_load_lut(xf86CrtcPtr crtc) +@@ -238,6 +247,10 @@ radeon_crtc_mode_commit(xf86CrtcPtr crtc) + } + + radeon_crtc_dpms(crtc, DPMSModeOn); ++ ++ if (crtc->scrn->pScreen != NULL) ++ xf86_reload_cursors(crtc->scrn->pScreen); ++ + } + + void +@@ -275,6 +288,10 @@ radeon_crtc_load_lut(xf86CrtcPtr crtc) OUTPAL(i, radeon_crtc->lut_r[i], radeon_crtc->lut_g[i], radeon_crtc->lut_b[i]); } @@ -4411,7 +5553,7 @@ index 3524b75..e2d31eb 100644 } -@@ -533,11 +534,12 @@ static const xf86CrtcFuncsRec radeon_crtc_funcs = { +@@ -533,11 +550,12 @@ static const xf86CrtcFuncsRec radeon_crtc_funcs = { Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) { RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); @@ -4425,7 +5567,7 @@ index 3524b75..e2d31eb 100644 pRADEONEnt->pCrtc[0] = xf86CrtcCreate(pScrn, &radeon_crtc_funcs); if (!pRADEONEnt->pCrtc[0]) return FALSE; -@@ -549,16 +551,20 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) +@@ -549,16 +567,20 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) pRADEONEnt->pCrtc[0]->driver_private = pRADEONEnt->Controller[0]; pRADEONEnt->Controller[0]->crtc_id = 0; pRADEONEnt->Controller[0]->crtc_offset = 0; @@ -4448,7 +5590,7 @@ index 3524b75..e2d31eb 100644 pRADEONEnt->Controller[1] = xnfcalloc(sizeof(RADEONCrtcPrivateRec), 1); if (!pRADEONEnt->Controller[1]) { -@@ -569,6 +575,10 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) +@@ -569,6 +591,10 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) pRADEONEnt->pCrtc[1]->driver_private = pRADEONEnt->Controller[1]; pRADEONEnt->Controller[1]->crtc_id = 1; pRADEONEnt->Controller[1]->crtc_offset = AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL; @@ -4459,7 +5601,7 @@ index 3524b75..e2d31eb 100644 } return TRUE; -@@ -719,3 +729,41 @@ RADEONUnblank(ScrnInfoPtr pScrn) +@@ -719,3 +745,41 @@ RADEONUnblank(ScrnInfoPtr pScrn) } } @@ -4502,10 +5644,99 @@ index 3524b75..e2d31eb 100644 + return changed; +} diff --git a/src/radeon_cursor.c b/src/radeon_cursor.c -index 0f7e668..de64dee 100644 +index 0f7e668..42f9a85 100644 --- a/src/radeon_cursor.c +++ b/src/radeon_cursor.c -@@ -346,14 +346,6 @@ Bool RADEONCursorInit(ScreenPtr pScreen) +@@ -92,6 +92,7 @@ + static void + avivo_setup_cursor(xf86CrtcPtr crtc, Bool enable) + { ++ ScrnInfoPtr pScrn = crtc->scrn; + RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; + RADEONInfoPtr info = RADEONPTR(crtc->scrn); + unsigned char *RADEONMMIO = info->MMIO; +@@ -100,7 +101,7 @@ avivo_setup_cursor(xf86CrtcPtr crtc, Bool enable) + + if (enable) { + OUTREG(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, +- info->fbLocation + radeon_crtc->cursor_offset); ++ info->fbLocation + radeon_crtc->cursor_offset + pScrn->fbOffset); + OUTREG(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, + ((CURSOR_WIDTH - 1) << 16) | (CURSOR_HEIGHT - 1)); + OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, +@@ -108,6 +109,24 @@ avivo_setup_cursor(xf86CrtcPtr crtc, Bool enable) + } + } + ++static void ++avivo_lock_cursor(xf86CrtcPtr crtc, Bool lock) ++{ ++ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; ++ RADEONInfoPtr info = RADEONPTR(crtc->scrn); ++ unsigned char *RADEONMMIO = info->MMIO; ++ CARD32 tmp; ++ ++ tmp = INREG(AVIVO_D1CUR_UPDATE + radeon_crtc->crtc_offset); ++ ++ if (lock) ++ tmp |= AVIVO_D1CURSOR_UPDATE_LOCK; ++ else ++ tmp &= ~AVIVO_D1CURSOR_UPDATE_LOCK; ++ ++ OUTREG(AVIVO_D1CUR_UPDATE + radeon_crtc->crtc_offset, tmp); ++} ++ + void + radeon_crtc_show_cursor (xf86CrtcPtr crtc) + { +@@ -118,10 +137,12 @@ radeon_crtc_show_cursor (xf86CrtcPtr crtc) + unsigned char *RADEONMMIO = info->MMIO; + + if (IS_AVIVO_VARIANT) { ++ avivo_lock_cursor(crtc, TRUE); + OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, + INREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset) + | AVIVO_D1CURSOR_EN); + avivo_setup_cursor(crtc, TRUE); ++ avivo_lock_cursor(crtc, FALSE); + } else { + switch (crtc_id) { + case 0: +@@ -149,10 +170,12 @@ radeon_crtc_hide_cursor (xf86CrtcPtr crtc) + unsigned char *RADEONMMIO = info->MMIO; + + if (IS_AVIVO_VARIANT) { ++ avivo_lock_cursor(crtc, TRUE); + OUTREG(AVIVO_D1CUR_CONTROL+ radeon_crtc->crtc_offset, + INREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset) + & ~(AVIVO_D1CURSOR_EN)); + avivo_setup_cursor(crtc, FALSE); ++ avivo_lock_cursor(crtc, FALSE); + } else { + switch(crtc_id) { + case 0: +@@ -195,9 +218,11 @@ radeon_crtc_set_cursor_position (xf86CrtcPtr crtc, int x, int y) + /* avivo cursor spans the full fb width */ + x += crtc->x; + y += crtc->y; ++ avivo_lock_cursor(crtc, TRUE); + OUTREG(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, ((xorigin ? 0 : x) << 16) + | (yorigin ? 0 : y)); + OUTREG(AVIVO_D1CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); ++ avivo_lock_cursor(crtc, FALSE); + } else { + if (crtc_id == 0) { + OUTREG(RADEON_CUR_HORZ_VERT_OFF, (RADEON_CUR_LOCK +@@ -274,7 +299,7 @@ radeon_crtc_load_cursor_argb (xf86CrtcPtr crtc, CARD32 *image) + RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; + RADEONInfoPtr info = RADEONPTR(pScrn); + CURSOR_SWAPPING_DECL_MMIO +- CARD32 *d = (CARD32 *)(pointer)(info->FB + radeon_crtc->cursor_offset + pScrn->fbOffset); ++ CARD32 *d = (CARD32 *)(pointer)(info->FB + radeon_crtc->cursor_offset); + + RADEONCTRACE(("RADEONLoadCursorARGB\n")); + +@@ -346,14 +371,6 @@ Bool RADEONCursorInit(ScreenPtr pScreen) return xf86_cursors_init (pScreen, CURSOR_WIDTH, CURSOR_HEIGHT, (HARDWARE_CURSOR_TRUECOLOR_AT_8BPP | @@ -4520,8 +5751,21 @@ index 0f7e668..de64dee 100644 HARDWARE_CURSOR_AND_SOURCE_WITH_MASK | HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_1 | HARDWARE_CURSOR_ARGB)); +diff --git a/src/radeon_dri.h b/src/radeon_dri.h +index 3b54626..67892a6 100644 +--- a/src/radeon_dri.h ++++ b/src/radeon_dri.h +@@ -46,7 +46,7 @@ + #define RADEON_DEFAULT_BUFFER_SIZE 2 /* MB (must be page aligned) */ + #define RADEON_DEFAULT_GART_TEX_SIZE 1 /* MB (must be page aligned) */ + +-#define RADEON_DEFAULT_CP_TIMEOUT 10000 /* usecs */ ++#define RADEON_DEFAULT_CP_TIMEOUT 100000 /* usecs */ + + #define RADEON_DEFAULT_PCI_APER_SIZE 32 /* in MB */ + diff --git a/src/radeon_driver.c b/src/radeon_driver.c -index 5cf8d51..d5595ea 100644 +index 5cf8d51..2701f57 100644 --- a/src/radeon_driver.c +++ b/src/radeon_driver.c @@ -126,35 +126,6 @@ static void RADEONSaveMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save); @@ -4583,7 +5827,58 @@ index 5cf8d51..d5595ea 100644 } struct RADEONInt10Save { -@@ -439,6 +409,9 @@ static Bool RADEONUnmapMMIO(ScrnInfoPtr pScrn) +@@ -387,6 +357,12 @@ static void RADEONFreeRec(ScrnInfoPtr pScrn) + static Bool RADEONMapMMIO(ScrnInfoPtr pScrn) + { + RADEONInfoPtr info = RADEONPTR(pScrn); ++ RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); ++ ++ if (pRADEONEnt->MMIO) { ++ info->MMIO = pRADEONEnt->MMIO; ++ return TRUE; ++ } + + #ifndef XSERVER_LIBPCIACCESS + +@@ -397,7 +373,6 @@ static Bool RADEONMapMMIO(ScrnInfoPtr pScrn) + info->MMIOSize); + + if (!info->MMIO) return FALSE; +- + #else + + void** result = (void**)&info->MMIO; +@@ -416,6 +391,7 @@ static Bool RADEONMapMMIO(ScrnInfoPtr pScrn) + + #endif + ++ pRADEONEnt->MMIO = info->MMIO; + return TRUE; + } + +@@ -425,6 +401,13 @@ static Bool RADEONMapMMIO(ScrnInfoPtr pScrn) + static Bool RADEONUnmapMMIO(ScrnInfoPtr pScrn) + { + RADEONInfoPtr info = RADEONPTR(pScrn); ++ RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); ++ ++ if (info->IsPrimary || info->IsSecondary) { ++ /* never unmap on zaphod */ ++ info->MMIO = NULL; ++ return TRUE; ++ } + + #ifndef XSERVER_LIBPCIACCESS + xf86UnMapVidMem(pScrn->scrnIndex, info->MMIO, info->MMIOSize); +@@ -432,6 +415,7 @@ static Bool RADEONUnmapMMIO(ScrnInfoPtr pScrn) + pci_device_unmap_range(info->PciInfo, info->MMIO, info->MMIOSize); + #endif + ++ pRADEONEnt->MMIO = NULL; + info->MMIO = NULL; + return TRUE; + } +@@ -439,6 +423,9 @@ static Bool RADEONUnmapMMIO(ScrnInfoPtr pScrn) /* Memory map the frame buffer. Used by RADEONMapMem, below. */ static Bool RADEONMapFB(ScrnInfoPtr pScrn) { @@ -4593,7 +5888,7 @@ index 5cf8d51..d5595ea 100644 RADEONInfoPtr info = RADEONPTR(pScrn); xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -@@ -456,7 +429,7 @@ static Bool RADEONMapFB(ScrnInfoPtr pScrn) +@@ -456,7 +443,7 @@ static Bool RADEONMapFB(ScrnInfoPtr pScrn) #else @@ -4602,7 +5897,7 @@ index 5cf8d51..d5595ea 100644 info->LinearAddr, info->FbMapSize, PCI_DEV_MAP_FLAG_WRITABLE | -@@ -585,10 +558,10 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) +@@ -585,10 +572,10 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) unsigned char *RADEONMMIO = info->MMIO; CARD32 data; @@ -4617,7 +5912,7 @@ index 5cf8d51..d5595ea 100644 } else if (IS_AVIVO_VARIANT) { OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0x7f0000); (void)INREG(AVIVO_MC_INDEX); -@@ -614,12 +587,12 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, CARD32 data) +@@ -614,12 +601,12 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, CARD32 data) RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; @@ -4636,7 +5931,7 @@ index 5cf8d51..d5595ea 100644 } else if (IS_AVIVO_VARIANT) { OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0xff0000); (void)INREG(AVIVO_MC_INDEX); -@@ -636,7 +609,7 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, CARD32 data) +@@ -636,7 +623,7 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, CARD32 data) } } @@ -4645,7 +5940,7 @@ index 5cf8d51..d5595ea 100644 { RADEONInfoPtr info = RADEONPTR(pScrn); -@@ -648,7 +621,8 @@ Bool avivo_get_mc_idle(ScrnInfoPtr pScrn) +@@ -648,7 +635,8 @@ Bool avivo_get_mc_idle(ScrnInfoPtr pScrn) return TRUE; else return FALSE; @@ -4655,7 +5950,7 @@ index 5cf8d51..d5595ea 100644 if (INMC(pScrn, RS690_MC_STATUS) & RS690_MC_STATUS_IDLE) return TRUE; else -@@ -663,7 +637,7 @@ Bool avivo_get_mc_idle(ScrnInfoPtr pScrn) +@@ -663,7 +651,7 @@ Bool avivo_get_mc_idle(ScrnInfoPtr pScrn) #define LOC_FB 0x1 #define LOC_AGP 0x2 @@ -4664,7 +5959,7 @@ index 5cf8d51..d5595ea 100644 { RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; -@@ -681,12 +655,13 @@ void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, CARD32 fb_loc, +@@ -681,12 +669,13 @@ void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, CARD32 fb_loc, if (mask & LOC_AGP) OUTMC(pScrn, RV515_MC_AGP_LOCATION, agp_loc); (void)INMC(pScrn, RV515_MC_AGP_LOCATION); @@ -4680,7 +5975,7 @@ index 5cf8d51..d5595ea 100644 if (mask & LOC_FB) OUTMC(pScrn, R520_MC_FB_LOCATION, fb_loc); if (mask & LOC_AGP) -@@ -700,7 +675,7 @@ void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, CARD32 fb_loc, +@@ -700,7 +689,7 @@ void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, CARD32 fb_loc, } } @@ -4689,7 +5984,7 @@ index 5cf8d51..d5595ea 100644 { RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; -@@ -719,7 +694,8 @@ void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, CARD32 *fb_loc, +@@ -719,7 +708,8 @@ void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, CARD32 *fb_loc, *agp_loc = INMC(pScrn, RV515_MC_AGP_LOCATION); *agp_loc_hi = 0; } @@ -4699,7 +5994,7 @@ index 5cf8d51..d5595ea 100644 if (mask & LOC_FB) *fb_loc = INMC(pScrn, RS690_MC_FB_LOCATION); if (mask & LOC_AGP) { -@@ -1259,7 +1235,8 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) +@@ -1259,7 +1249,8 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) } #endif @@ -4709,7 +6004,7 @@ index 5cf8d51..d5595ea 100644 if (info->IsIGP) info->mc_fb_location = INREG(RADEON_NB_TOM); else -@@ -1458,23 +1435,20 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) +@@ -1458,23 +1449,20 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) MessageType from = X_PROBED; CARD32 accessible, bar_size; @@ -4736,7 +6031,27 @@ index 5cf8d51..d5595ea 100644 /* Some production boards of m6 will return 0 if it's 8 MB */ if (pScrn->videoRam == 0) { pScrn->videoRam = 8192; -@@ -2163,7 +2137,14 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) +@@ -1811,6 +1799,19 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) + if (!xf86LoadSubModule(pScrn, "shadow")) + return FALSE; + } ++ ++ ++ if ((info->ChipFamily == CHIP_FAMILY_RS100) || ++ (info->ChipFamily == CHIP_FAMILY_RS200) || ++ (info->ChipFamily == CHIP_FAMILY_RS300) || ++ (info->ChipFamily == CHIP_FAMILY_RS400) || ++ (info->ChipFamily == CHIP_FAMILY_RS690) || ++ (info->ChipFamily == CHIP_FAMILY_RS740)) ++ info->has_tcl = FALSE; ++ else { ++ info->has_tcl = TRUE; ++ } ++ + return TRUE; + } + +@@ -2163,7 +2164,14 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) } else { from = xf86GetOptValBool(info->Options, OPTION_PAGE_FLIP, &info->allowPageFlip) ? X_CONFIG : X_DEFAULT; @@ -4752,7 +6067,7 @@ index 5cf8d51..d5595ea 100644 } #else from = X_DEFAULT; -@@ -2221,7 +2202,7 @@ static void RADEONPreInitColorTiling(ScrnInfoPtr pScrn) +@@ -2221,7 +2229,7 @@ static void RADEONPreInitColorTiling(ScrnInfoPtr pScrn) info->pKernelDRMVersion->version_minor, info->pKernelDRMVersion->version_patchlevel); info->allowColorTiling = FALSE; @@ -4761,7 +6076,7 @@ index 5cf8d51..d5595ea 100644 } #endif /* XF86DRI */ -@@ -2366,7 +2347,7 @@ static Bool RADEONPreInitXv(ScrnInfoPtr pScrn) +@@ -2366,7 +2374,7 @@ static Bool RADEONPreInitXv(ScrnInfoPtr pScrn) } bios_header=info->VBIOS[0x48]; @@ -4770,7 +6085,7 @@ index 5cf8d51..d5595ea 100644 mm_table=info->VBIOS[bios_header+0x38]; if(mm_table==0) -@@ -2636,8 +2617,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2636,8 +2644,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) if (xf86RegisterResources(info->pEnt->index, 0, ResExclusive)) goto fail; @@ -4780,7 +6095,7 @@ index 5cf8d51..d5595ea 100644 pScrn->racMemFlags = RAC_FB | RAC_COLORMAP | RAC_VIEWPORT | RAC_CURSOR; pScrn->monitor = pScrn->confScreen->monitor; -@@ -2689,7 +2669,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2689,7 +2696,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) if (!RADEONPreInitWeight(pScrn)) goto fail; @@ -4789,7 +6104,7 @@ index 5cf8d51..d5595ea 100644 if ((s = xf86GetOptValString(info->Options, OPTION_DISP_PRIORITY))) { if (strcmp(s, "AUTO") == 0) { info->DispPriority = 1; -@@ -2698,7 +2678,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2698,7 +2705,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) } else if (strcmp(s, "HIGH") == 0) { info->DispPriority = 2; } else @@ -4798,7 +6113,7 @@ index 5cf8d51..d5595ea 100644 } if (!RADEONPreInitInt10(pScrn, &pInt10)) -@@ -2739,17 +2719,22 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2739,17 +2746,22 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) if (crtc_max_Y > 8192) crtc_max_Y = 8192; } else { @@ -4828,7 +6143,7 @@ index 5cf8d51..d5595ea 100644 } } xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Max desktop size set to %dx%d\n", -@@ -2793,14 +2778,16 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2793,14 +2805,16 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) if (!RADEONPreInitAccel(pScrn)) goto fail; @@ -4848,7 +6163,7 @@ index 5cf8d51..d5595ea 100644 if (pScrn->modes == NULL) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "No modes.\n"); goto fail; -@@ -3001,7 +2988,8 @@ RADEONInitBIOSRegisters(ScrnInfoPtr pScrn) +@@ -3001,7 +3015,8 @@ RADEONInitBIOSRegisters(ScrnInfoPtr pScrn) /* let the bios control the backlight */ save->bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE; /* tell the bios not to handle mode switching */ @@ -4858,7 +6173,7 @@ index 5cf8d51..d5595ea 100644 if (info->ChipFamily >= CHIP_FAMILY_R600) { OUTREG(R600_BIOS_2_SCRATCH, save->bios_2_scratch); -@@ -3014,7 +3002,8 @@ RADEONInitBIOSRegisters(ScrnInfoPtr pScrn) +@@ -3014,7 +3029,8 @@ RADEONInitBIOSRegisters(ScrnInfoPtr pScrn) /* let the bios control the backlight */ save->bios_0_scratch &= ~RADEON_DRIVER_BRIGHTNESS_EN; /* tell the bios not to handle mode switching */ @@ -4868,7 +6183,7 @@ index 5cf8d51..d5595ea 100644 /* tell the bios a driver is loaded */ save->bios_7_scratch |= RADEON_DRV_LOADED; -@@ -3032,9 +3021,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3032,9 +3048,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); @@ -4878,7 +6193,7 @@ index 5cf8d51..d5595ea 100644 #ifdef RENDER int subPixelOrder = SubPixelUnknown; char* s; -@@ -3080,11 +3067,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3080,11 +3094,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, RADEONBlank(pScrn); if (info->IsMobility && !IS_AVIVO_VARIANT) { @@ -4898,7 +6213,7 @@ index 5cf8d51..d5595ea 100644 } if (IS_R300_VARIANT || IS_RV100_VARIANT) -@@ -3139,12 +3131,14 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3139,12 +3158,14 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, RADEONInitMemoryMap(pScrn); /* empty the surfaces */ @@ -4919,7 +6234,7 @@ index 5cf8d51..d5595ea 100644 } #ifdef XF86DRI -@@ -3340,28 +3334,8 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3340,28 +3361,8 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, /* xf86CrtcRotate() accesses pScrn->pScreen */ pScrn->pScreen = pScreen; @@ -4948,7 +6263,7 @@ index 5cf8d51..d5595ea 100644 RADEONSaveScreen(pScreen, SCREEN_SAVER_ON); -@@ -3760,15 +3734,15 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save) +@@ -3760,15 +3761,15 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save) { RADEONInfoPtr info = RADEONPTR(pScrn); CARD32 fb, agp, agp_hi; @@ -4968,7 +6283,7 @@ index 5cf8d51..d5595ea 100644 changed = 1; if (changed) { -@@ -4039,12 +4013,13 @@ static void RADEONSavePalette(ScrnInfoPtr pScrn, RADEONSavePtr save) +@@ -4039,12 +4040,13 @@ static void RADEONSavePalette(ScrnInfoPtr pScrn, RADEONSavePtr save) } #endif @@ -4983,7 +6298,7 @@ index 5cf8d51..d5595ea 100644 // state->vga_memory_base = INREG(AVIVO_VGA_MEMORY_BASE); // state->vga_fb_start = INREG(AVIVO_VGA_FB_START); -@@ -4110,8 +4085,6 @@ avivo_save(ScrnInfoPtr pScrn, RADEONSavePtr save) +@@ -4110,8 +4112,6 @@ avivo_save(ScrnInfoPtr pScrn, RADEONSavePtr save) state->grph1.viewport_start = INREG(AVIVO_D1MODE_VIEWPORT_START); state->grph1.viewport_size = INREG(AVIVO_D1MODE_VIEWPORT_SIZE); @@ -4992,7 +6307,7 @@ index 5cf8d51..d5595ea 100644 state->crtc2.pll_source = INREG(AVIVO_PCLK_CRTC2_CNTL); -@@ -4151,57 +4124,207 @@ avivo_save(ScrnInfoPtr pScrn, RADEONSavePtr save) +@@ -4151,57 +4151,207 @@ avivo_save(ScrnInfoPtr pScrn, RADEONSavePtr save) state->grph2.viewport_start = INREG(AVIVO_D2MODE_VIEWPORT_START); state->grph2.viewport_size = INREG(AVIVO_D2MODE_VIEWPORT_SIZE); @@ -5235,7 +6550,7 @@ index 5cf8d51..d5595ea 100644 // OUTMC(pScrn, AVIVO_MC_MEMORY_MAP, state->mc_memory_map); // OUTREG(AVIVO_VGA_MEMORY_BASE, state->vga_memory_base); -@@ -4266,8 +4389,6 @@ avivo_restore(ScrnInfoPtr pScrn, RADEONSavePtr restore) +@@ -4266,8 +4416,6 @@ avivo_restore(ScrnInfoPtr pScrn, RADEONSavePtr restore) OUTREG(AVIVO_D1MODE_VIEWPORT_START, state->grph1.viewport_start); OUTREG(AVIVO_D1MODE_VIEWPORT_SIZE, state->grph1.viewport_size); @@ -5244,7 +6559,7 @@ index 5cf8d51..d5595ea 100644 OUTREG(AVIVO_PCLK_CRTC2_CNTL, state->crtc2.pll_source); -@@ -4306,49 +4427,199 @@ avivo_restore(ScrnInfoPtr pScrn, RADEONSavePtr restore) +@@ -4306,49 +4454,199 @@ avivo_restore(ScrnInfoPtr pScrn, RADEONSavePtr restore) OUTREG(AVIVO_D2MODE_VIEWPORT_START, state->grph2.viewport_start); OUTREG(AVIVO_D2MODE_VIEWPORT_SIZE, state->grph2.viewport_size); @@ -5446,8 +6761,8 @@ index 5cf8d51..d5595ea 100644 + j++; + } + } - } - ++ } ++ + /* scalers */ + j = 0; + for (i = 0x6578; i <= 0x65e4; i += 4) { @@ -5464,10 +6779,10 @@ index 5cf8d51..d5595ea 100644 + for (i = 0x66e8; i <= 0x66fc; i += 4) { + OUTREG(i, state->dxscl[j]); + j++; -+ } + } + OUTREG(0x6e30, state->dxscl[6]); + OUTREG(0x6e34, state->dxscl[7]); -+ + OUTREG(AVIVO_D1VGA_CONTROL, state->vga1_cntl); OUTREG(AVIVO_D2VGA_CONTROL, state->vga2_cntl); } @@ -5479,7 +6794,7 @@ index 5cf8d51..d5595ea 100644 RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; struct avivo_state *state = &restore->avivo; -@@ -4471,7 +4742,7 @@ static void RADEONSave(ScrnInfoPtr pScrn) +@@ -4471,7 +4769,7 @@ static void RADEONSave(ScrnInfoPtr pScrn) } /* Restore the original (text) mode */ @@ -5488,7 +6803,7 @@ index 5cf8d51..d5595ea 100644 { RADEONInfoPtr info = RADEONPTR(pScrn); RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); -@@ -4567,7 +4838,9 @@ void RADEONRestore(ScrnInfoPtr pScrn) +@@ -4567,7 +4865,9 @@ void RADEONRestore(ScrnInfoPtr pScrn) */ if (IS_AVIVO_VARIANT) avivo_restore_vga_regs(pScrn, restore); @@ -5499,7 +6814,7 @@ index 5cf8d51..d5595ea 100644 #if 0 RADEONWaitForVerticalSync(pScrn); -@@ -4885,8 +5158,6 @@ Bool RADEONEnterVT(int scrnIndex, int flags) +@@ -4885,8 +5185,6 @@ Bool RADEONEnterVT(int scrnIndex, int flags) ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; @@ -5508,7 +6823,7 @@ index 5cf8d51..d5595ea 100644 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "RADEONEnterVT\n"); -@@ -4907,41 +5178,34 @@ Bool RADEONEnterVT(int scrnIndex, int flags) +@@ -4907,41 +5205,34 @@ Bool RADEONEnterVT(int scrnIndex, int flags) RADEONWaitForIdleMMIO(pScrn); if (info->IsMobility && !IS_AVIVO_VARIANT) { @@ -5541,11 +6856,11 @@ index 5cf8d51..d5595ea 100644 - crtc->desiredX = 0; - crtc->desiredY = 0; - } -- + - if (!xf86CrtcSetMode (crtc, &crtc->desiredMode, crtc->desiredRotation, - crtc->desiredX, crtc->desiredY)) - return FALSE; - +- - } + if (!xf86SetDesiredModes(pScrn)) + return FALSE; @@ -5565,7 +6880,7 @@ index 5cf8d51..d5595ea 100644 } /* get the DRI back into shape after resume */ -@@ -4966,8 +5230,6 @@ Bool RADEONEnterVT(int scrnIndex, int flags) +@@ -4966,8 +5257,6 @@ Bool RADEONEnterVT(int scrnIndex, int flags) } #endif @@ -5574,7 +6889,18 @@ index 5cf8d51..d5595ea 100644 return TRUE; } -@@ -4988,8 +5250,9 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -4978,6 +5267,10 @@ void RADEONLeaveVT(int scrnIndex, int flags) + { + ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; + RADEONInfoPtr info = RADEONPTR(pScrn); ++#ifndef HAVE_FREE_SHADOW ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); ++ int o; ++#endif + + xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, + "RADEONLeaveVT\n"); +@@ -4988,8 +5281,9 @@ void RADEONLeaveVT(int scrnIndex, int flags) DRILock(pScrn->pScreen, 0); RADEONCP_STOP(pScrn, info); @@ -5586,8 +6912,32 @@ index 5cf8d51..d5595ea 100644 /* we need to backup the PCIE GART TABLE from fb memory */ memcpy(info->pciGartBackup, (info->FB + info->pciGartOffset), info->pciGartSize); } +@@ -5009,6 +5303,23 @@ void RADEONLeaveVT(int scrnIndex, int flags) + } + #endif + ++#ifndef HAVE_FREE_SHADOW ++ for (o = 0; o < config->num_crtc; o++) { ++ xf86CrtcPtr crtc = config->crtc[o]; ++ ++ if (crtc->rotatedPixmap || crtc->rotatedData) { ++ crtc->funcs->shadow_destroy(crtc, crtc->rotatedPixmap, ++ crtc->rotatedData); ++ crtc->rotatedPixmap = NULL; ++ crtc->rotatedData = NULL; ++ } ++ } ++#else ++ xf86RotateFreeShadow(pScrn); ++#endif ++ ++ xf86_hide_cursors (pScrn); ++ + RADEONRestore(pScrn); + + xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, diff --git a/src/radeon_exa.c b/src/radeon_exa.c -index 4da4841..a6ededa 100644 +index 4da4841..fa6ac0d 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -99,10 +99,17 @@ static __inline__ int @@ -5609,11 +6959,29 @@ index 4da4841..a6ededa 100644 } static __inline__ CARD32 F_TO_DW(float val) +@@ -182,7 +189,7 @@ Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, CARD32 *pitch_offset) + if (bpp == 24) + bpp = 8; + +- offset = exaGetPixmapOffset(pPix) + info->fbLocation; ++ offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + pitch = exaGetPixmapPitch(pPix); + + return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); +@@ -395,7 +402,7 @@ Bool RADEONSetupMemEXA (ScreenPtr pScreen) + else + screen_size = pScrn->virtualY * byteStride; + +- info->exa->memoryBase = info->FB + pScrn->fbOffset; ++ info->exa->memoryBase = info->FB; + info->exa->memorySize = info->FbMapSize - info->FbSecureSize; + info->exa->offScreenBase = screen_size; + diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c -index 10221c0..d5ee5a6 100644 +index 10221c0..272ffa9 100644 --- a/src/radeon_exa_funcs.c +++ b/src/radeon_exa_funcs.c -@@ -533,13 +533,12 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) +@@ -533,18 +533,17 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) #ifdef RENDER if (info->RenderAccel) { @@ -5631,6 +6999,30 @@ index 10221c0..d5ee5a6 100644 info->exa->CheckComposite = R300CheckComposite; info->exa->PrepareComposite = FUNC_NAME(R300PrepareComposite); + info->exa->Composite = FUNC_NAME(RadeonComposite); +- info->exa->DoneComposite = RadeonDoneComposite; ++ info->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); + } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || + (info->ChipFamily == CHIP_FAMILY_RV280) || + (info->ChipFamily == CHIP_FAMILY_RS300) || +@@ -555,7 +554,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) + info->exa->PrepareComposite = + FUNC_NAME(R200PrepareComposite); + info->exa->Composite = FUNC_NAME(RadeonComposite); +- info->exa->DoneComposite = RadeonDoneComposite; ++ info->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); + } else { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " + "enabled for R100 type cards.\n"); +@@ -563,7 +562,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) + info->exa->PrepareComposite = + FUNC_NAME(R100PrepareComposite); + info->exa->Composite = FUNC_NAME(RadeonComposite); +- info->exa->DoneComposite = RadeonDoneComposite; ++ info->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); + } + } + #endif @@ -572,11 +571,11 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); @@ -5647,10 +7039,32 @@ index 10221c0..d5ee5a6 100644 RADEONEngineInit(pScrn); diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c -index 9bbccb5..4e5ab81 100644 +index 9bbccb5..54b0272 100644 --- a/src/radeon_exa_render.c +++ b/src/radeon_exa_render.c -@@ -177,9 +177,8 @@ static Bool R300GetDestFormat(PicturePtr pDstPicture, CARD32 *dst_format) +@@ -26,6 +26,7 @@ + * Eric Anholt + * Zack Rusin + * Benjamin Herrenschmidt ++ * Alex Deucher + * + */ + +@@ -57,6 +58,13 @@ + #ifdef ONLY_ONCE + static Bool is_transform[2]; + static PictTransform *transform[2]; ++static Bool has_mask; ++/* Whether we are tiling horizontally and vertically */ ++static Bool need_src_tile_x; ++static Bool need_src_tile_y; ++/* Size of tiles ... set to 65536x65536 if not tiling in that direction */ ++static Bool src_tile_width; ++static Bool src_tile_height; + + struct blendinfo { + Bool dst_alpha; +@@ -177,9 +185,8 @@ static Bool R300GetDestFormat(PicturePtr pDstPicture, CARD32 *dst_format) *dst_format = R300_COLORFORMAT_I8; break; default: @@ -5662,7 +7076,150 @@ index 9bbccb5..4e5ab81 100644 } return TRUE; } -@@ -338,41 +337,76 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -221,6 +228,95 @@ union intfloat { + CARD32 i; + }; + ++/* Check if we need a software-fallback because of a repeating ++ * non-power-of-two texture. ++ * ++ * canTile: whether we can emulate a repeat by drawing in tiles: ++ * possible for the source, but not for the mask. (Actually ++ * we could do tiling for the mask too, but dealing with the ++ * combination of a tiled mask and a tiled source would be ++ * a lot of complexity, so we handle only the most common ++ * case of a repeating mask.) ++ */ ++static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile) ++{ ++ int w = pPict->pDrawable->width; ++ int h = pPict->pDrawable->height; ++ ++ if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0) && ++ !(!pPict->transform && canTile)) ++ RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n", ++ canTile ? "source" : "mask", w, h, pPict->transform != 0)); ++ ++ return TRUE; ++} ++ ++/* Determine if the pitch of the pixmap meets the criteria for being ++ * used as a repeating texture: no padding or only a single line texture. ++ */ ++static Bool RADEONPitchMatches(PixmapPtr pPix) ++{ ++ int w = pPix->drawable.width; ++ int h = pPix->drawable.height; ++ CARD32 txpitch = exaGetPixmapPitch(pPix); ++ ++ if (h > 1 && ((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch) ++ return FALSE; ++ ++ return TRUE; ++} ++ ++/* We can't turn on repeats normally for a non-power-of-two dimension, ++ * but if the source isn't transformed, we can get the same effect ++ * by drawing the image in multiple tiles. (A common case that it's ++ * important to get right is drawing a strip of a NPOTxPOT texture ++ * repeating in the POT direction. With tiling, this ends up as a ++ * a single tile on R300 and newer, which is perfect.) ++ * ++ * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in ++ * one direction and POT in the other in the POT direction; on ++ * older chips we can only repeat at all if the texture is POT in ++ * both directions. ++ * ++ * needMatchingPitch: On R100/R200, we can only repeat horizontally if ++ * there is no padding in the texture. Textures with small POT widths ++ * (1,2,4,8) thus can't be tiled. ++ */ ++static Bool RADEONSetupSourceTile(PicturePtr pPict, ++ PixmapPtr pPix, ++ Bool canTile1d, ++ Bool needMatchingPitch) ++{ ++ need_src_tile_x = need_src_tile_y = FALSE; ++ src_tile_width = src_tile_height = 65536; /* "infinite" */ ++ ++ if (pPict->repeat) { ++ Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix); ++ ++ int w = pPict->pDrawable->width; ++ int h = pPict->pDrawable->height; ++ ++ if (pPict->transform) { ++ if (badPitch) ++ RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n", ++ w, (unsigned)exaGetPixmapPitch(pPix))); ++ } else { ++ need_src_tile_x = (w & (w - 1)) != 0 || badPitch; ++ need_src_tile_y = (h & (h - 1)) != 0; ++ ++ if (!canTile1d) ++ need_src_tile_x = need_src_tile_y = need_src_tile_x || need_src_tile_y; ++ } ++ ++ if (need_src_tile_x) ++ src_tile_width = w; ++ if (need_src_tile_y) ++ src_tile_height = h; ++ } ++ ++ return TRUE; ++} ++ + /* R100-specific code */ + + static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit) +@@ -240,8 +336,8 @@ static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + +- if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) +- RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h)); ++ if (!RADEONCheckTexturePOT(pPict, unit == 0)) ++ return FALSE; + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) +@@ -261,11 +357,12 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + CARD32 txfilter, txformat, txoffset, txpitch; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; ++ Bool repeat = pPict->repeat && !(unit == 0 && (need_src_tile_x || need_src_tile_y)); + int i; + ACCEL_PREAMBLE(); + + txpitch = exaGetPixmapPitch(pPix); +- txoffset = exaGetPixmapOffset(pPix) + info->fbLocation; ++ txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + + if ((txoffset & 0x1f) != 0) + RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); +@@ -281,9 +378,8 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + if (RADEONPixmapIsColortiled(pPix)) + txoffset |= RADEON_TXO_MACRO_TILE; + +- if (pPict->repeat) { +- if ((h != 1) && +- (((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch)) ++ if (repeat) { ++ if (!RADEONPitchMatches(pPix)) + RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n", + w, (unsigned)txpitch)); + +@@ -307,6 +403,9 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } + ++ if (repeat) ++ txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP; ++ + BEGIN_ACCEL(5); + if (unit == 0) { + OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter); +@@ -338,41 +437,76 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, } #ifdef ONLY_ONCE @@ -5758,7 +7315,7 @@ index 9bbccb5..4e5ab81 100644 if (!RADEONGetDestFormat(pDstPicture, &tmp1)) return FALSE; -@@ -400,7 +434,9 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, +@@ -400,22 +534,32 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, if (!info->XInited3D) RADEONInit3DEngine(pScrn); @@ -5766,10 +7323,75 @@ index 9bbccb5..4e5ab81 100644 + if (!RADEONGetDestFormat(pDstPicture, &dst_format)) + return FALSE; + ++ if (pMask) ++ has_mask = TRUE; ++ else ++ has_mask = FALSE; ++ pixel_shift = pDst->drawable.bitsPerPixel >> 4; - dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; -@@ -532,7 +568,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; ++ dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + dst_pitch = exaGetPixmapPitch(pDst); + colorpitch = dst_pitch >> pixel_shift; + if (RADEONPixmapIsColortiled(pDst)) + colorpitch |= RADEON_COLOR_TILE_ENABLE; + +- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; ++ dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + dst_pitch = exaGetPixmapPitch(pDst); + if ((dst_offset & 0x0f) != 0) + RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); + if (((dst_pitch >> pixel_shift) & 0x7) != 0) + RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); + ++ if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE)) ++ return FALSE; ++ + if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0)) + return FALSE; + pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE; +@@ -471,9 +615,13 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, + + OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend); + OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend); +- OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | +- RADEON_SE_VTX_FMT_ST0 | +- RADEON_SE_VTX_FMT_ST1); ++ if (pMask) ++ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | ++ RADEON_SE_VTX_FMT_ST0 | ++ RADEON_SE_VTX_FMT_ST1)); ++ else ++ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | ++ RADEON_SE_VTX_FMT_ST0)); + /* Op operator. */ + blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format); + +@@ -503,8 +651,8 @@ static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + +- if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) +- RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h)); ++ if (!RADEONCheckTexturePOT(pPict, unit == 0)) ++ return FALSE; + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) +@@ -522,17 +670,18 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + CARD32 txfilter, txformat, txoffset, txpitch; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; ++ Bool repeat = pPict->repeat && !(unit == 0 && (need_src_tile_x || need_src_tile_y)); + int i; + ACCEL_PREAMBLE(); + + txpitch = exaGetPixmapPitch(pPix); +- txoffset = exaGetPixmapOffset(pPix) + info->fbLocation; ++ txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + + if ((txoffset & 0x1f) != 0) RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); if ((txpitch & 0x1f) != 0) RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); @@ -5778,7 +7400,29 @@ index 9bbccb5..4e5ab81 100644 for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++) { if (R200TexFormats[i].fmt == pPict->format) -@@ -604,32 +640,61 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -542,9 +691,8 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + if (RADEONPixmapIsColortiled(pPix)) + txoffset |= R200_TXO_MACRO_TILE; + +- if (pPict->repeat) { +- if ((h != 1) && +- (((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch)) ++ if (repeat) { ++ if (!RADEONPitchMatches(pPix)) + RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n", + w, (unsigned)txpitch)); + +@@ -570,6 +718,9 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } + ++ if (repeat) ++ txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP; ++ + BEGIN_ACCEL(6); + if (unit == 0) { + OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); +@@ -604,32 +755,61 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture) { @@ -5792,7 +7436,18 @@ index 9bbccb5..4e5ab81 100644 - RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); + if (!pSrcPicture->pDrawable) + return FALSE; -+ + +- if (pMaskPicture != NULL && pMaskPicture->componentAlpha) { +- /* Check if it's component alpha that relies on a source alpha and on +- * the source value. We can only get one of those into the single +- * source value that we get to blend with. +- */ +- if (RadeonBlendOp[op].src_alpha && +- (RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) != +- RADEON_SRC_BLEND_GL_ZERO) +- { +- RADEON_FALLBACK(("Component alpha not supported with source " +- "alpha and source value blending.\n")); + pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); + + if (pSrcPixmap->drawable.width >= 2048 || @@ -5819,19 +7474,8 @@ index 9bbccb5..4e5ab81 100644 + RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", + pMaskPixmap->drawable.width, + pMaskPixmap->drawable.height)); -+ } - -- if (pMaskPicture != NULL && pMaskPicture->componentAlpha) { -- /* Check if it's component alpha that relies on a source alpha and on -- * the source value. We can only get one of those into the single -- * source value that we get to blend with. -- */ -- if (RadeonBlendOp[op].src_alpha && -- (RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) != -- RADEON_SRC_BLEND_GL_ZERO) -- { -- RADEON_FALLBACK(("Component alpha not supported with source " -- "alpha and source value blending.\n")); + } ++ + if (pMaskPicture->componentAlpha) { + /* Check if it's component alpha that relies on a source alpha and + * on the source value. We can only get one of those into the @@ -5843,7 +7487,7 @@ index 9bbccb5..4e5ab81 100644 + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha and source value blending.\n")); + } - } ++ } + + if (!R200CheckCompositeTexture(pMaskPicture, 1)) + return FALSE; @@ -5856,7 +7500,7 @@ index 9bbccb5..4e5ab81 100644 if (!RADEONGetDestFormat(pDstPicture, &tmp1)) return FALSE; -@@ -653,7 +718,9 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -653,10 +833,17 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, if (!info->XInited3D) RADEONInit3DEngine(pScrn); @@ -5864,10 +7508,46 @@ index 9bbccb5..4e5ab81 100644 + if (!RADEONGetDestFormat(pDstPicture, &dst_format)) + return FALSE; + ++ if (pMask) ++ has_mask = TRUE; ++ else ++ has_mask = FALSE; ++ pixel_shift = pDst->drawable.bitsPerPixel >> 4; - dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; -@@ -744,13 +811,22 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, +- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; ++ dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + dst_pitch = exaGetPixmapPitch(pDst); + colorpitch = dst_pitch >> pixel_shift; + if (RADEONPixmapIsColortiled(pDst)) +@@ -667,6 +854,9 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, + if (((dst_pitch >> pixel_shift) & 0x7) != 0) + RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); + ++ if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE)) ++ return FALSE; ++ + if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0)) + return FALSE; + pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE; +@@ -688,9 +878,13 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, + OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); + + OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); +- OUT_ACCEL_REG(R200_SE_VTX_FMT_1, +- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | +- (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); ++ if (pMask) ++ OUT_ACCEL_REG(R200_SE_VTX_FMT_1, ++ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | ++ (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); ++ else ++ OUT_ACCEL_REG(R200_SE_VTX_FMT_1, ++ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); + + OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); + +@@ -744,13 +938,22 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, #ifdef ONLY_ONCE @@ -5892,7 +7572,42 @@ index 9bbccb5..4e5ab81 100644 RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++) -@@ -794,6 +870,7 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -762,13 +965,24 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, int unit) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + +- if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) +- RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h)); ++ if (!RADEONCheckTexturePOT(pPict, unit == 0)) ++ return FALSE; + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) + RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); + ++ /* for REPEAT_NONE, Render semantics are that sampling outside the source ++ * picture results in alpha=0 pixels. We can implement this with a border color ++ * *if* our source texture has an alpha channel, otherwise we need to fall ++ * back. If we're not transformed then we hope that upper layers have clipped ++ * rendering to the bounds of the source drawable, in which case it doesn't ++ * matter. I have not, however, verified that the X server always does such ++ * clipping. ++ */ ++ if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) ++ RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); ++ + return TRUE; + } + +@@ -787,13 +1001,14 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + TRACE; + + txpitch = exaGetPixmapPitch(pPix); +- txoffset = exaGetPixmapOffset(pPix) + info->fbLocation; ++ txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + + if ((txoffset & 0x1f) != 0) + RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); if ((txpitch & 0x1f) != 0) RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); @@ -5900,7 +7615,7 @@ index 9bbccb5..4e5ab81 100644 pixel_shift = pPix->drawable.bitsPerPixel >> 4; txpitch >>= pixel_shift; txpitch -= 1; -@@ -809,24 +886,32 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -809,24 +1024,35 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, txformat1 = R300TexFormats[i].card_fmt; @@ -5934,27 +7649,39 @@ index 9bbccb5..4e5ab81 100644 - txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | - R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST)); -+ if (pPict->repeat) -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP)); ++ if (pPict->repeat && !(unit == 0 && need_src_tile_x)) ++ txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP); + else -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST)); ++ txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL); + ++ if (pPict->repeat && !(unit == 0 && need_src_tile_y)) ++ txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP); ++ else ++ txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL); ++ + txfilter |= (unit << R300_TX_ID_SHIFT); switch (pPict->filter) { case PictFilterNearest: -@@ -841,7 +926,7 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -839,13 +1065,15 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } - BEGIN_ACCEL(6); +- BEGIN_ACCEL(6); ++ BEGIN_ACCEL(pPict->repeat ? 6 : 7); OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter); - OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0x0); + OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0); OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0); OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1); OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch); -@@ -867,8 +952,8 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP + OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset); ++ if (!pPict->repeat) ++ OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0); + FINISH_ACCEL(); + + if (pPict->transform != 0) { +@@ -867,8 +1095,8 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP ScreenPtr pScreen = pDstPicture->pDrawable->pScreen; PixmapPtr pSrcPixmap, pDstPixmap; ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; @@ -5965,7 +7692,7 @@ index 9bbccb5..4e5ab81 100644 TRACE; -@@ -876,51 +961,64 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP +@@ -876,51 +1104,64 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0])) RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); @@ -6063,7 +7790,7 @@ index 9bbccb5..4e5ab81 100644 return FALSE; if (!R300GetDestFormat(pDstPicture, &tmp1)) -@@ -940,7 +1038,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -940,7 +1181,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, CARD32 txenable, colorpitch; CARD32 blendcntl; int pixel_shift; @@ -6071,7 +7798,7 @@ index 9bbccb5..4e5ab81 100644 ACCEL_PREAMBLE(); TRACE; -@@ -948,7 +1045,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -948,10 +1188,17 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, if (!info->XInited3D) RADEONInit3DEngine(pScrn); @@ -6079,14 +7806,32 @@ index 9bbccb5..4e5ab81 100644 + if (!R300GetDestFormat(pDstPicture, &dst_format)) + return FALSE; + ++ if (pMask) ++ has_mask = TRUE; ++ else ++ has_mask = FALSE; ++ pixel_shift = pDst->drawable.bitsPerPixel >> 4; - dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; -@@ -979,279 +1078,643 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; ++ dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + dst_pitch = exaGetPixmapPitch(pDst); + colorpitch = dst_pitch >> pixel_shift; + +@@ -965,6 +1212,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, + if (((dst_pitch >> pixel_shift) & 0x7) != 0) + RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); ++ if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE)) ++ return FALSE; ++ + if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0)) + return FALSE; + txenable = R300_TEX_0_ENABLE; +@@ -980,28 +1230,32 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, RADEON_SWITCH_TO_3D(); -- /* setup the VAP */ + /* setup the VAP */ - - if (has_tcl) { - BEGIN_ACCEL(28); @@ -6096,34 +7841,59 @@ index 9bbccb5..4e5ab81 100644 - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (4 << R300_PVS_NUM_FPUS_SHIFT) | - (12 << R300_VF_MAX_VTX_NUM_SHIFT))); -- } else { ++ if (info->has_tcl) { ++ if (pMask) ++ BEGIN_ACCEL(8); ++ else ++ BEGIN_ACCEL(7); + } else { - BEGIN_ACCEL(10); - OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS); - OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (4 << R300_PVS_NUM_FPUS_SHIFT) | - (5 << R300_VF_MAX_VTX_NUM_SHIFT))); -- } -- ++ if (pMask) ++ BEGIN_ACCEL(6); ++ else ++ BEGIN_ACCEL(5); + } + - OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); - OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0); - - if (has_tcl) { -- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -- (0 << R300_SKIP_DWORDS_0_SHIFT) | -- (0 << R300_DST_VEC_LOC_0_SHIFT) | -- R300_SIGNED_0 | -- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -- (0 << R300_SKIP_DWORDS_1_SHIFT) | ++ /* These registers define the number, type, and location of data submitted ++ * to the PVS unit of GA input (when PVS is disabled) ++ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is ++ * enabled. This memory provides the imputs to the vertex shader program ++ * and ordering is not important. When PVS/TCL is disabled, this field maps ++ * directly to the GA input memory and the order is signifigant. In ++ * PVS_BYPASS mode the order is as follows: ++ * Position ++ * Point Size ++ * Color 0-3 ++ * Textures 0-7 ++ * Fog ++ */ ++ if (pMask) { + OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, + ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | + (0 << R300_SKIP_DWORDS_0_SHIFT) | +@@ -1009,35 +1263,15 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, + R300_SIGNED_0 | + (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | + (0 << R300_SKIP_DWORDS_1_SHIFT) | - (10 << R300_DST_VEC_LOC_1_SHIFT) | -- R300_SIGNED_1)); -- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, -- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | -- (0 << R300_SKIP_DWORDS_2_SHIFT) | ++ (6 << R300_DST_VEC_LOC_1_SHIFT) | + R300_SIGNED_1)); + OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, + ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | + (0 << R300_SKIP_DWORDS_2_SHIFT) | - (11 << R300_DST_VEC_LOC_2_SHIFT) | -- R300_LAST_VEC_2 | -- R300_SIGNED_2)); ++ (7 << R300_DST_VEC_LOC_2_SHIFT) | + R300_LAST_VEC_2 | + R300_SIGNED_2)); - OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, - ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) | @@ -6145,15 +7915,16 @@ index 9bbccb5..4e5ab81 100644 - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) - << R300_WRITE_ENA_2_SHIFT))); - } else { -- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -- (0 << R300_SKIP_DWORDS_0_SHIFT) | -- (0 << R300_DST_VEC_LOC_0_SHIFT) | -- R300_SIGNED_0 | -- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -- (0 << R300_SKIP_DWORDS_1_SHIFT) | -- (6 << R300_DST_VEC_LOC_1_SHIFT) | -- R300_SIGNED_1)); ++ } else + OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, + ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | + (0 << R300_SKIP_DWORDS_0_SHIFT) | +@@ -1046,223 +1280,569 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, + (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | + (0 << R300_SKIP_DWORDS_1_SHIFT) | + (6 << R300_DST_VEC_LOC_1_SHIFT) | ++ R300_LAST_VEC_1 | + R300_SIGNED_1)); - OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, - ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | - (0 << R300_SKIP_DWORDS_2_SHIFT) | @@ -6181,42 +7952,6 @@ index 9bbccb5..4e5ab81 100644 - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y) - << R300_WRITE_ENA_2_SHIFT))); - } -+ BEGIN_ACCEL(8); -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -+ (0 << R300_SKIP_DWORDS_0_SHIFT) | -+ (0 << R300_DST_VEC_LOC_0_SHIFT) | -+ R300_SIGNED_0 | -+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -+ (0 << R300_SKIP_DWORDS_1_SHIFT) | -+ (6 << R300_DST_VEC_LOC_1_SHIFT) | -+ R300_SIGNED_1)); -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | -+ (0 << R300_SKIP_DWORDS_2_SHIFT) | -+ (7 << R300_DST_VEC_LOC_2_SHIFT) | -+ R300_LAST_VEC_2 | -+ R300_SIGNED_2)); -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, -+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) | -+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) -+ << R300_WRITE_ENA_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) | -+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) | -+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) -+ << R300_WRITE_ENA_1_SHIFT))); -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1, -+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) | -+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) | -+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) -+ << R300_WRITE_ENA_2_SHIFT))); - /* setup the vertex shader */ - if (has_tcl) { @@ -6243,13 +7978,45 @@ index 9bbccb5..4e5ab81 100644 - OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000); - OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000); - OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); -- } ++ /* load the vertex shader ++ * We pre-load vertex programs in RADEONInit3DEngine(): ++ * - exa no mask ++ * - exa mask ++ * - Xv ++ * Here we select the offset of the vertex program we want to use ++ */ ++ if (info->has_tcl) { ++ if (pMask) { ++ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, ++ ((0 << R300_PVS_FIRST_INST_SHIFT) | ++ (2 << R300_PVS_XYZW_VALID_INST_SHIFT) | ++ (2 << R300_PVS_LAST_INST_SHIFT))); ++ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, ++ (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); ++ } else { ++ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, ++ ((3 << R300_PVS_FIRST_INST_SHIFT) | ++ (4 << R300_PVS_XYZW_VALID_INST_SHIFT) | ++ (4 << R300_PVS_LAST_INST_SHIFT))); ++ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, ++ (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); ++ } + } ++ ++ /* Position and one or two sets of 2 texture coordinates */ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); - OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, - ((2 << R300_TEX_0_COMP_CNT_SHIFT) | - (2 << R300_TEX_1_COMP_CNT_SHIFT))); +- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, +- ((2 << R300_TEX_0_COMP_CNT_SHIFT) | +- (2 << R300_TEX_1_COMP_CNT_SHIFT))); ++ if (pMask) ++ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ++ ((2 << R300_TEX_0_COMP_CNT_SHIFT) | ++ (2 << R300_TEX_1_COMP_CNT_SHIFT))); ++ else ++ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ++ (2 << R300_TEX_0_COMP_CNT_SHIFT)); -+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); ++ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0); + OUT_ACCEL_REG(R300_TX_ENABLE, txenable); FINISH_ACCEL(); @@ -6430,16 +8197,6 @@ index 9bbccb5..4e5ab81 100644 + R300_OUT_FMT_C2_SEL_BLUE | + R300_OUT_FMT_C3_SEL_ALPHA); + break; -+ case PICT_a1r5g5b5: -+ case PICT_x1r5g5b5: -+ /* fix me */ -+ case PICT_r5g6b5: -+ output_fmt = (R300_OUT_FMT_C_5_6_5 | -+ R300_OUT_FMT_C0_SEL_BLUE | -+ R300_OUT_FMT_C1_SEL_GREEN | -+ R300_OUT_FMT_C2_SEL_RED | -+ R300_OUT_FMT_C3_SEL_ALPHA); -+ break; + case PICT_a8: + output_fmt = (R300_OUT_FMT_C4_8 | + R300_OUT_FMT_C0_SEL_ALPHA); @@ -6467,95 +8224,22 @@ index 9bbccb5..4e5ab81 100644 - else - cblend |= R200_TXC_ARG_A_R0_COLOR; - ablend |= R200_TXA_ARG_A_R0_ALPHA; -+ /* setup the rasterizer */ ++ /* setup the rasterizer, load FS */ ++ BEGIN_ACCEL(9); + if (pMask) { -+ BEGIN_ACCEL(20); + /* 4 components: 2 for tex0, 2 for tex1 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); -+ /* rasterizer source table */ -+ OUT_ACCEL_REG(R300_RS_IP_0, -+ (R300_RS_TEX_PTR(0) | -+ R300_RS_SEL_S(R300_RS_SEL_C0) | -+ R300_RS_SEL_T(R300_RS_SEL_C1) | -+ R300_RS_SEL_R(R300_RS_SEL_K0) | -+ R300_RS_SEL_Q(R300_RS_SEL_K1))); -+ OUT_ACCEL_REG(R300_RS_IP_1, -+ (R300_RS_TEX_PTR(2) | -+ R300_RS_SEL_S(R300_RS_SEL_C0) | -+ R300_RS_SEL_T(R300_RS_SEL_C1) | -+ R300_RS_SEL_R(R300_RS_SEL_K0) | -+ R300_RS_SEL_Q(R300_RS_SEL_K1))); + ++ /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); -+ /* src tex */ -+ OUT_ACCEL_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) | -+ R300_RS_INST_TEX_CN_WRITE | -+ R300_INST_TEX_ADDR(0))); -+ /* mask tex */ -+ OUT_ACCEL_REG(R300_RS_INST_1, (R300_INST_TEX_ID(1) | -+ R300_RS_INST_TEX_CN_WRITE | -+ R300_INST_TEX_ADDR(1))); + -+ OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX); -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* max num of temps used */ + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -+ R300_ALU_CODE_SIZE(0) | -+ R300_TEX_CODE_OFFSET(0) | -+ R300_TEX_CODE_SIZE(1))); -+ -+ } else { -+ BEGIN_ACCEL(17); -+ /* 2 components: 2 for tex0 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ OUT_ACCEL_REG(R300_RS_IP_0, -+ (R300_RS_TEX_PTR(0) | -+ R300_RS_SEL_S(R300_RS_SEL_C0) | -+ R300_RS_SEL_T(R300_RS_SEL_C1) | -+ R300_RS_SEL_R(R300_RS_SEL_K0) | -+ R300_RS_SEL_Q(R300_RS_SEL_K1))); -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); -+ /* src tex */ -+ OUT_ACCEL_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) | -+ R300_RS_INST_TEX_CN_WRITE | -+ R300_INST_TEX_ADDR(0))); -+ -+ OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX); -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* max num of temps used */ -+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -+ R300_ALU_CODE_SIZE(0) | -+ R300_TEX_CODE_OFFSET(0) | -+ R300_TEX_CODE_SIZE(0))); - -- if (pMask) { -- if (pMaskPicture->componentAlpha && -- pDstPicture->format != PICT_a8) -- cblend |= R200_TXC_ARG_B_R1_COLOR; -- else -- cblend |= R200_TXC_ARG_B_R1_ALPHA; -- ablend |= R200_TXA_ARG_B_R1_ALPHA; -+ } -+ -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_0, -+ (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_1, -+ (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_2, -+ (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); ++ R300_ALU_CODE_SIZE(0) | ++ R300_TEX_CODE_OFFSET(0) | ++ R300_TEX_CODE_SIZE(1))); + -+ if (pMask) { + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, + (R300_ALU_START(0) | + R300_ALU_SIZE(0) | @@ -6563,6 +8247,18 @@ index 9bbccb5..4e5ab81 100644 + R300_TEX_SIZE(1) | + R300_RGBA_OUT)); + } else { ++ /* 2 components: 2 for tex0 */ ++ OUT_ACCEL_REG(R300_RS_COUNT, ++ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | ++ R300_RS_COUNT_HIRES_EN)); ++ ++ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); ++ ++ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | ++ R300_ALU_CODE_SIZE(0) | ++ R300_TEX_CODE_OFFSET(0) | ++ R300_TEX_CODE_SIZE(0))); ++ + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, + (R300_ALU_START(0) | + R300_ALU_SIZE(0) | @@ -6570,23 +8266,27 @@ index 9bbccb5..4e5ab81 100644 + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + } -+ + +- if (pMask) { +- if (pMaskPicture->componentAlpha && +- pDstPicture->format != PICT_a8) +- cblend |= R200_TXC_ARG_B_R1_COLOR; +- else +- cblend |= R200_TXC_ARG_B_R1_ALPHA; +- ablend |= R200_TXA_ARG_B_R1_ALPHA; ++ /* shader output swizzling */ + OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); + -+ OUT_ACCEL_REG(R300_US_TEX_INST_0, -+ (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(0) | -+ R300_TEX_ID(0) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ -+ if (pMask) { -+ OUT_ACCEL_REG(R300_US_TEX_INST_1, -+ (R300_TEX_SRC_ADDR(1) | -+ R300_TEX_DST_ADDR(1) | -+ R300_TEX_ID(1) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ } ++ /* tex inst for src texture is pre-loaded in RADEONInit3DEngine() */ ++ /* tex inst for mask texture is pre-loaded in RADEONInit3DEngine() */ + ++ /* RGB inst ++ * temp addresses for texture inputs ++ * ALU_RGB_ADDR0 is src tex (temp 0) ++ * ALU_RGB_ADDR1 is mask tex (temp 1) ++ * R300_ALU_RGB_OMASK - output components to write ++ * R300_ALU_RGB_TARGET_A - render target ++ */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, + (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(1) | @@ -6596,6 +8296,9 @@ index 9bbccb5..4e5ab81 100644 + R300_ALU_RGB_MASK_G | + R300_ALU_RGB_MASK_B)) | + R300_ALU_RGB_TARGET_A)); ++ /* RGB inst ++ * ALU operation ++ */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, + (R300_ALU_RGB_SEL_A(src_color) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | @@ -6606,6 +8309,13 @@ index 9bbccb5..4e5ab81 100644 + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); ++ /* Alpha inst ++ * temp addresses for texture inputs ++ * ALU_ALPHA_ADDR0 is src tex (0) ++ * ALU_ALPHA_ADDR1 is mask tex (1) ++ * R300_ALU_ALPHA_OMASK - output components to write ++ * R300_ALU_ALPHA_TARGET_A - render target ++ */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, + (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(1) | @@ -6614,6 +8324,9 @@ index 9bbccb5..4e5ab81 100644 + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); ++ /* Alpha inst ++ * ALU operation ++ */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, + (R300_ALU_ALPHA_SEL_A(src_alpha) | + R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | @@ -6729,77 +8442,34 @@ index 9bbccb5..4e5ab81 100644 + R300_OUT_FMT_C2_SEL_BLUE | + R300_OUT_FMT_C3_SEL_ALPHA); + break; -+ case PICT_a1r5g5b5: -+ case PICT_x1r5g5b5: -+ /* fix me */ -+ case PICT_r5g6b5: -+ output_fmt = (R300_OUT_FMT_C_5_6_5 | -+ R300_OUT_FMT_C0_SEL_BLUE | -+ R300_OUT_FMT_C1_SEL_GREEN | -+ R300_OUT_FMT_C2_SEL_RED | -+ R300_OUT_FMT_C3_SEL_ALPHA); -+ break; + case PICT_a8: + output_fmt = (R300_OUT_FMT_C4_8 | + R300_OUT_FMT_C0_SEL_ALPHA); + break; + } + ++ BEGIN_ACCEL(6); + if (pMask) { -+ BEGIN_ACCEL(13); ++ /* 4 components: 2 for tex0, 2 for tex1 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); -+ OUT_ACCEL_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) | -+ (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | -+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | -+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT))); -+ -+ OUT_ACCEL_REG(R500_RS_IP_1, ((2 << R500_RS_IP_TEX_PTR_S_SHIFT) | -+ (3 << R500_RS_IP_TEX_PTR_T_SHIFT) | -+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | -+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT))); + ++ /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); + -+ /* src tex */ -+ OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) | -+ R500_RS_INST_TEX_CN_WRITE | -+ (0 << R500_RS_INST_TEX_ADDR_SHIFT))); -+ /* mask tex */ -+ OUT_ACCEL_REG(R500_RS_INST_1, ((1 << R500_RS_INST_TEX_ID_SHIFT) | -+ R500_RS_INST_TEX_CN_WRITE | -+ (1 << R500_RS_INST_TEX_ADDR_SHIFT))); -+ -+ OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); -+ OUT_ACCEL_REG(R500_US_FC_CTRL, 0); + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(2))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(2))); + OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); + } else { -+ BEGIN_ACCEL(11); + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + -+ OUT_ACCEL_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) | -+ (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | -+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | -+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT))); -+ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + -+ /* src tex */ -+ OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) | -+ R500_RS_INST_TEX_CN_WRITE | -+ (0 << R500_RS_INST_TEX_ADDR_SHIFT))); -+ -+ OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); -+ OUT_ACCEL_REG(R500_US_FC_CTRL, 0); + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(1))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | @@ -6813,6 +8483,7 @@ index 9bbccb5..4e5ab81 100644 + if (pMask) { + BEGIN_ACCEL(19); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0); ++ /* tex inst for src texture */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | @@ -6846,6 +8517,7 @@ index 9bbccb5..4e5ab81 100644 + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + ++ /* tex inst for mask texture */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | @@ -6883,6 +8555,7 @@ index 9bbccb5..4e5ab81 100644 + } else { + BEGIN_ACCEL(13); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0); ++ /* tex inst for src texture */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | @@ -6919,6 +8592,8 @@ index 9bbccb5..4e5ab81 100644 + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + } + ++ /* ALU inst */ ++ /* *_OMASK* - output component write mask */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | @@ -6928,21 +8603,31 @@ index 9bbccb5..4e5ab81 100644 + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); -+ ++ /* ALU inst ++ * temp addresses for texture inputs ++ * RGB_ADDR0 is src tex (temp 0) ++ * RGB_ADDR1 is mask tex (temp 1) ++ */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(1) | + R500_RGB_ADDR2(0))); -+ ++ /* ALU inst ++ * temp addresses for texture inputs ++ * ALPHA_ADDR0 is src tex (temp 0) ++ * ALPHA_ADDR1 is mask tex (temp 1) ++ */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(1) | + R500_ALPHA_ADDR2(0))); + ++ /* R500_ALU_RGB_TARGET - RGB render target */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + src_color | + R500_ALU_RGB_SEL_B_SRC1 | + mask_color | + R500_ALU_RGB_TARGET(0))); + ++ /* R500_ALPHA_RGB_TARGET - alpha render target */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(0) | + R500_ALPHA_SEL_A_SRC0 | @@ -6979,8 +8664,269 @@ index 9bbccb5..4e5ab81 100644 FINISH_ACCEL(); + return TRUE; + } + +-#define VTX_COUNT 6 ++#define VTX_COUNT_MASK 6 ++#define VTX_COUNT 4 + + #ifdef ACCEL_CP + +-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ ++#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ + do { \ + OUT_RING_F(_dstX); \ + OUT_RING_F(_dstY); \ +@@ -1272,9 +1852,17 @@ do { \ + OUT_RING_F(_maskY); \ + } while (0) + ++#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ ++do { \ ++ OUT_RING_F(_dstX); \ ++ OUT_RING_F(_dstY); \ ++ OUT_RING_F(_srcX); \ ++ OUT_RING_F(_srcY); \ ++} while (0) ++ + #else /* ACCEL_CP */ + +-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ ++#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ + do { \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ +@@ -1284,6 +1872,14 @@ do { \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ + } while (0) + ++#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ ++do { \ ++ OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ ++ OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ ++ OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ ++ OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ ++} while (0) ++ + #endif /* !ACCEL_CP */ + + #ifdef ONLY_ONCE +@@ -1299,11 +1895,11 @@ static inline void transformPoint(PictTransform *transform, xPointFixed *point) + } + #endif + +-static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, +- int srcX, int srcY, +- int maskX, int maskY, +- int dstX, int dstY, +- int w, int h) ++static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, ++ int srcX, int srcY, ++ int maskX, int maskY, ++ int dstX, int dstY, ++ int w, int h) + { + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + int vtx_count; +@@ -1347,9 +1943,12 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + transformPoint(transform[1], &maskBottomRight); + } + +- vtx_count = VTX_COUNT; ++ if (has_mask) ++ vtx_count = VTX_COUNT_MASK; ++ else ++ vtx_count = VTX_COUNT; + +- if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { ++ if (IS_R300_3D || IS_R500_3D) { + BEGIN_ACCEL(1); + OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); + FINISH_ACCEL(); +@@ -1360,17 +1959,21 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + BEGIN_RING(4 * vtx_count + 3); + OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, + 4 * vtx_count + 1)); +- OUT_RING(RADEON_CP_VC_FRMT_XY | +- RADEON_CP_VC_FRMT_ST0 | +- RADEON_CP_VC_FRMT_ST1); ++ if (has_mask) ++ OUT_RING(RADEON_CP_VC_FRMT_XY | ++ RADEON_CP_VC_FRMT_ST0 | ++ RADEON_CP_VC_FRMT_ST1); ++ else ++ OUT_RING(RADEON_CP_VC_FRMT_XY | ++ RADEON_CP_VC_FRMT_ST0); + OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN | + RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); + } else { +- if (IS_R300_VARIANT || IS_AVIVO_VARIANT) +- BEGIN_RING(4 * vtx_count + 6); ++ if (IS_R300_3D | IS_R500_3D) ++ BEGIN_RING(4 * vtx_count + 4); + else + BEGIN_RING(4 * vtx_count + 2); + +@@ -1382,8 +1985,8 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + } + + #else /* ACCEL_CP */ +- if (IS_R300_VARIANT || IS_AVIVO_VARIANT) +- BEGIN_ACCEL(3 + vtx_count * 4); ++ if (IS_R300_3D | IS_R500_3D) ++ BEGIN_ACCEL(2 + vtx_count * 4); + else + BEGIN_ACCEL(1 + vtx_count * 4); + +@@ -1399,24 +2002,34 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + } + #endif + +- VTX_OUT((float)dstX, (float)dstY, +- xFixedToFloat(srcTopLeft.x) / info->texW[0], xFixedToFloat(srcTopLeft.y) / info->texH[0], +- xFixedToFloat(maskTopLeft.x) / info->texW[1], xFixedToFloat(maskTopLeft.y) / info->texH[1]); +- VTX_OUT((float)dstX, (float)(dstY + h), +- xFixedToFloat(srcBottomLeft.x) / info->texW[0], xFixedToFloat(srcBottomLeft.y) / info->texH[0], +- xFixedToFloat(maskBottomLeft.x) / info->texW[1], xFixedToFloat(maskBottomLeft.y) / info->texH[1]); +- VTX_OUT((float)(dstX + w), (float)(dstY + h), +- xFixedToFloat(srcBottomRight.x) / info->texW[0], xFixedToFloat(srcBottomRight.y) / info->texH[0], +- xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]); +- VTX_OUT((float)(dstX + w), (float)dstY, +- xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0], +- xFixedToFloat(maskTopRight.x) / info->texW[1], xFixedToFloat(maskTopRight.y) / info->texH[1]); +- +- if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { +- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); +- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); ++ if (has_mask) { ++ VTX_OUT_MASK((float)dstX, (float)dstY, ++ xFixedToFloat(srcTopLeft.x) / info->texW[0], xFixedToFloat(srcTopLeft.y) / info->texH[0], ++ xFixedToFloat(maskTopLeft.x) / info->texW[1], xFixedToFloat(maskTopLeft.y) / info->texH[1]); ++ VTX_OUT_MASK((float)dstX, (float)(dstY + h), ++ xFixedToFloat(srcBottomLeft.x) / info->texW[0], xFixedToFloat(srcBottomLeft.y) / info->texH[0], ++ xFixedToFloat(maskBottomLeft.x) / info->texW[1], xFixedToFloat(maskBottomLeft.y) / info->texH[1]); ++ VTX_OUT_MASK((float)(dstX + w), (float)(dstY + h), ++ xFixedToFloat(srcBottomRight.x) / info->texW[0], xFixedToFloat(srcBottomRight.y) / info->texH[0], ++ xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]); ++ VTX_OUT_MASK((float)(dstX + w), (float)dstY, ++ xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0], ++ xFixedToFloat(maskTopRight.x) / info->texW[1], xFixedToFloat(maskTopRight.y) / info->texH[1]); ++ } else { ++ VTX_OUT((float)dstX, (float)dstY, ++ xFixedToFloat(srcTopLeft.x) / info->texW[0], xFixedToFloat(srcTopLeft.y) / info->texH[0]); ++ VTX_OUT((float)dstX, (float)(dstY + h), ++ xFixedToFloat(srcBottomLeft.x) / info->texW[0], xFixedToFloat(srcBottomLeft.y) / info->texH[0]); ++ VTX_OUT((float)(dstX + w), (float)(dstY + h), ++ xFixedToFloat(srcBottomRight.x) / info->texW[0], xFixedToFloat(srcBottomRight.y) / info->texH[0]); ++ VTX_OUT((float)(dstX + w), (float)dstY, ++ xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0]); + } + ++ if (IS_R300_3D | IS_R500_3D) ++ /* flushing is pipelined, free/finish is not */ ++ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); ++ + #ifdef ACCEL_CP + ADVANCE_RING(); + #else +@@ -1426,14 +2039,88 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + LEAVE_DRAW(0); + } + #undef VTX_OUT ++#undef VTX_OUT_MASK + +-#ifdef ONLY_ONCE +-static void RadeonDoneComposite(PixmapPtr pDst) ++static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, ++ int srcX, int srcY, ++ int maskX, int maskY, ++ int dstX, int dstY, ++ int width, int height) ++{ ++ int tileSrcY, tileMaskY, tileDstY; ++ int remainingHeight; ++ ++ if (!need_src_tile_x && !need_src_tile_y) { ++ FUNC_NAME(RadeonCompositeTile)(pDst, ++ srcX, srcY, ++ maskX, maskY, ++ dstX, dstY, ++ width, height); ++ return; ++ } ++ ++ /* Tiling logic borrowed from exaFillRegionTiled */ ++ ++ modulus(srcY, src_tile_height, tileSrcY); ++ tileMaskY = maskY; ++ tileDstY = dstY; ++ ++ remainingHeight = height; ++ while (remainingHeight > 0) { ++ int remainingWidth = width; ++ int tileSrcX, tileMaskX, tileDstX; ++ int h = src_tile_height - tileSrcY; ++ ++ if (h > remainingHeight) ++ h = remainingHeight; ++ remainingHeight -= h; ++ ++ modulus(srcX, src_tile_width, tileSrcX); ++ tileMaskX = maskX; ++ tileDstX = dstX; ++ ++ while (remainingWidth > 0) { ++ int w = src_tile_width - tileSrcX; ++ if (w > remainingWidth) ++ w = remainingWidth; ++ remainingWidth -= w; ++ ++ FUNC_NAME(RadeonCompositeTile)(pDst, ++ tileSrcX, tileSrcY, ++ tileMaskX, tileMaskY, ++ tileDstX, tileDstY, ++ w, h); ++ ++ tileSrcX = 0; ++ tileMaskX += w; ++ tileDstX += w; ++ } ++ tileSrcY = 0; ++ tileMaskY += h; ++ tileDstY += h; ++ } ++} ++ ++static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst) + { ++ RINFO_FROM_SCREEN(pDst->drawable.pScreen); ++ ACCEL_PREAMBLE(); ++ + ENTER_DRAW(0); ++ ++ if (IS_R500_3D || ((info->ChipFamily == CHIP_FAMILY_RS400) || ++ (info->ChipFamily == CHIP_FAMILY_RS690) || ++ (info->ChipFamily == CHIP_FAMILY_RS740))) { ++ /* r500 shows corruption on small things like glyphs without a 3D idle ++ * IGP shows more substantial corruption ++ */ ++ BEGIN_ACCEL(1); ++ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); ++ FINISH_ACCEL(); ++ } ++ + LEAVE_DRAW(0); + } +-#endif /* ONLY_ONCE */ + + #undef ONLY_ONCE + #undef FUNC_NAME diff --git a/src/radeon_output.c b/src/radeon_output.c -index 62cc5d4..28539d4 100644 +index 62cc5d4..907d824 100644 --- a/src/radeon_output.c +++ b/src/radeon_output.c @@ -178,15 +178,9 @@ static Bool AVIVOI2CDoLock(xf86OutputPtr output, int lock_state); @@ -7026,7 +8972,30 @@ index 62cc5d4..28539d4 100644 } else { xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DDC2/I2C is not properly initialized\n"); MonType = MT_NONE; -@@ -673,15 +669,15 @@ radeon_bios_output_lock(xf86OutputPtr output, Bool lock) +@@ -485,6 +481,10 @@ static void + radeon_dpms(xf86OutputPtr output, int mode) + { + RADEONInfoPtr info = RADEONPTR(output->scrn); ++ RADEONOutputPrivatePtr radeon_output = output->driver_private; ++ ++ if ((mode == DPMSModeOn) && radeon_output->enabled) ++ return; + + if (IS_AVIVO_VARIANT) { + atombios_output_dpms(output, mode); +@@ -493,6 +493,11 @@ radeon_dpms(xf86OutputPtr output, int mode) + } + radeon_bios_output_dpms(output, mode); + ++ if (mode == DPMSModeOn) ++ radeon_output->enabled = TRUE; ++ else ++ radeon_output->enabled = FALSE; ++ + } + + static void +@@ -673,15 +678,15 @@ radeon_bios_output_lock(xf86OutputPtr output, Bool lock) if (info->IsAtomBios) { if (lock) { @@ -7046,7 +9015,7 @@ index 62cc5d4..28539d4 100644 } } if (info->ChipFamily >= CHIP_FAMILY_R600) -@@ -1168,6 +1164,7 @@ static Atom tmds_pll_atom; +@@ -1168,6 +1173,7 @@ static Atom tmds_pll_atom; static Atom rmx_atom; static Atom monitor_type_atom; static Atom load_detection_atom; @@ -7054,7 +9023,7 @@ index 62cc5d4..28539d4 100644 static Atom tv_hsize_atom; static Atom tv_hpos_atom; static Atom tv_vpos_atom; -@@ -1235,6 +1232,30 @@ radeon_create_resources(xf86OutputPtr output) +@@ -1235,6 +1241,30 @@ radeon_create_resources(xf86OutputPtr output) } } @@ -7085,7 +9054,7 @@ index 62cc5d4..28539d4 100644 if (OUTPUT_IS_DVI && radeon_output->TMDSType == TMDS_INT) { tmds_pll_atom = MAKE_ATOM("tmds_pll"); -@@ -1413,6 +1434,26 @@ radeon_create_resources(xf86OutputPtr output) +@@ -1413,6 +1443,26 @@ radeon_create_resources(xf86OutputPtr output) } static Bool @@ -7112,7 +9081,7 @@ index 62cc5d4..28539d4 100644 radeon_set_property(xf86OutputPtr output, Atom property, RRPropertyValuePtr value) { -@@ -1451,22 +1492,47 @@ radeon_set_property(xf86OutputPtr output, Atom property, +@@ -1451,22 +1501,47 @@ radeon_set_property(xf86OutputPtr output, Atom property, radeon_output->load_detection = val; @@ -7164,7 +9133,7 @@ index 62cc5d4..28539d4 100644 } else if (property == tmds_pll_atom) { const char *s; if (value->type != XA_STRING || value->format != 8) -@@ -1475,12 +1541,12 @@ radeon_set_property(xf86OutputPtr output, Atom property, +@@ -1475,12 +1550,12 @@ radeon_set_property(xf86OutputPtr output, Atom property, if (value->size == strlen("bios") && !strncmp("bios", s, strlen("bios"))) { if (!RADEONGetTMDSInfoFromBIOS(output)) RADEONGetTMDSInfoFromTable(output); @@ -7181,7 +9150,7 @@ index 62cc5d4..28539d4 100644 } else if (property == monitor_type_atom) { const char *s; if (value->type != XA_STRING || value->format != 8) -@@ -1495,8 +1561,8 @@ radeon_set_property(xf86OutputPtr output, Atom property, +@@ -1495,8 +1570,8 @@ radeon_set_property(xf86OutputPtr output, Atom property, } else if (value->size == strlen("digital") && !strncmp("digital", s, strlen("digital"))) { radeon_output->DVIType = DVI_DIGITAL; return TRUE; @@ -7192,7 +9161,7 @@ index 62cc5d4..28539d4 100644 } else if (property == tv_hsize_atom) { if (value->type != XA_INTEGER || value->format != 32 || -@@ -1511,7 +1577,7 @@ radeon_set_property(xf86OutputPtr output, Atom property, +@@ -1511,7 +1586,7 @@ radeon_set_property(xf86OutputPtr output, Atom property, radeon_output->hSize = val; if (radeon_output->tv_on && !IS_AVIVO_VARIANT) RADEONUpdateHVPosition(output, &output->crtc->mode); @@ -7201,7 +9170,7 @@ index 62cc5d4..28539d4 100644 } else if (property == tv_hpos_atom) { if (value->type != XA_INTEGER || value->format != 32 || -@@ -1526,7 +1592,7 @@ radeon_set_property(xf86OutputPtr output, Atom property, +@@ -1526,7 +1601,7 @@ radeon_set_property(xf86OutputPtr output, Atom property, radeon_output->hPos = val; if (radeon_output->tv_on && !IS_AVIVO_VARIANT) RADEONUpdateHVPosition(output, &output->crtc->mode); @@ -7210,7 +9179,7 @@ index 62cc5d4..28539d4 100644 } else if (property == tv_vpos_atom) { if (value->type != XA_INTEGER || value->format != 32 || -@@ -1541,38 +1607,38 @@ radeon_set_property(xf86OutputPtr output, Atom property, +@@ -1541,38 +1616,38 @@ radeon_set_property(xf86OutputPtr output, Atom property, radeon_output->vPos = val; if (radeon_output->tv_on && !IS_AVIVO_VARIANT) RADEONUpdateHVPosition(output, &output->crtc->mode); @@ -7259,7 +9228,7 @@ index 62cc5d4..28539d4 100644 } return TRUE; -@@ -1622,6 +1688,8 @@ void RADEONSetOutputType(ScrnInfoPtr pScrn, RADEONOutputPrivatePtr radeon_output +@@ -1622,6 +1697,8 @@ void RADEONSetOutputType(ScrnInfoPtr pScrn, RADEONOutputPrivatePtr radeon_output case CONNECTOR_HDMI_TYPE_A: case CONNECTOR_HDMI_TYPE_B: output = OUTPUT_HDMI; break; @@ -7268,7 +9237,7 @@ index 62cc5d4..28539d4 100644 case CONNECTOR_DIGITAL: case CONNECTOR_NONE: case CONNECTOR_UNSUPPORTED: -@@ -2139,16 +2207,17 @@ void RADEONInitConnector(xf86OutputPtr output) +@@ -2139,16 +2216,17 @@ void RADEONInitConnector(xf86OutputPtr output) RADEONInfoPtr info = RADEONPTR(pScrn); RADEONOutputPrivatePtr radeon_output = output->driver_private; @@ -7291,7 +9260,7 @@ index 62cc5d4..28539d4 100644 if (radeon_output->type == OUTPUT_LVDS) { radeon_output->rmx_type = RMX_FULL; -@@ -2189,6 +2258,9 @@ void RADEONInitConnector(xf86OutputPtr output) +@@ -2189,6 +2267,9 @@ void RADEONInitConnector(xf86OutputPtr output) RADEONGetTVDacAdjInfo(output); } @@ -7301,7 +9270,7 @@ index 62cc5d4..28539d4 100644 if (radeon_output->ddc_i2c.valid) RADEONI2CInit(output, &radeon_output->pI2CBus, output->name, FALSE); -@@ -2729,11 +2801,12 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) +@@ -2729,11 +2810,12 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) for (i = 0 ; i < RADEON_MAX_BIOS_CONNECTOR; i++) { if (info->BiosConnector[i].valid) { @@ -7315,7 +9284,7 @@ index 62cc5d4..28539d4 100644 if (!radeon_output) { return FALSE; } -@@ -2742,6 +2815,7 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) +@@ -2742,6 +2824,7 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) radeon_output->devices = info->BiosConnector[i].devices; radeon_output->output_id = info->BiosConnector[i].output_id; radeon_output->ddc_i2c = info->BiosConnector[i].ddc_i2c; @@ -7421,7 +9390,7 @@ index 4ec7485..5d2eb43 100644 /* Return the options for supported chipset 'n'; NULL otherwise */ static const OptionInfoRec * diff --git a/src/radeon_probe.h b/src/radeon_probe.h -index 9c1bdc5..0096ce0 100644 +index 9c1bdc5..f03e997 100644 --- a/src/radeon_probe.h +++ b/src/radeon_probe.h @@ -104,7 +104,8 @@ typedef enum @@ -7434,7 +9403,7 @@ index 9c1bdc5..0096ce0 100644 } RADEONTmdsType; typedef enum -@@ -191,12 +192,7 @@ typedef struct _RADEONCrtcPrivateRec { +@@ -191,12 +192,8 @@ typedef struct _RADEONCrtcPrivateRec { CARD8 lut_r[256], lut_g[256], lut_b[256]; uint32_t crtc_offset; @@ -7445,10 +9414,11 @@ index 9c1bdc5..0096ce0 100644 - INT16 cursor_x; - INT16 cursor_y; + int can_tile; ++ Bool enabled; } RADEONCrtcPrivateRec, *RADEONCrtcPrivatePtr; typedef struct { -@@ -208,6 +204,7 @@ typedef struct { +@@ -208,6 +205,7 @@ typedef struct { int devices; int hpd_mask; RADEONI2CBusRec ddc_i2c; @@ -7456,7 +9426,7 @@ index 9c1bdc5..0096ce0 100644 } RADEONBIOSConnector; typedef struct _RADEONOutputPrivateRec { -@@ -256,6 +253,10 @@ typedef struct _RADEONOutputPrivateRec { +@@ -256,10 +254,15 @@ typedef struct _RADEONOutputPrivateRec { int SupportedTVStds; Bool tv_on; int load_detection; @@ -7467,7 +9437,20 @@ index 9c1bdc5..0096ce0 100644 char *name; int output_id; -@@ -310,24 +311,6 @@ struct avivo_grph_state { + int devices; ++ Bool enabled; + } RADEONOutputPrivateRec, *RADEONOutputPrivatePtr; + + struct avivo_pll_state { +@@ -273,7 +276,6 @@ struct avivo_pll_state { + CARD32 int_ss_cntl; + }; + +- + struct avivo_crtc_state { + CARD32 pll_source; + CARD32 h_total; +@@ -310,24 +312,6 @@ struct avivo_grph_state { CARD32 viewport_start; CARD32 viewport_size; @@ -7492,7 +9475,7 @@ index 9c1bdc5..0096ce0 100644 }; struct avivo_state -@@ -343,9 +326,6 @@ struct avivo_state +@@ -343,9 +327,6 @@ struct avivo_state CARD32 crtc_master_en; CARD32 crtc_tv_control; @@ -7502,7 +9485,7 @@ index 9c1bdc5..0096ce0 100644 struct avivo_pll_state pll1; struct avivo_pll_state pll2; -@@ -355,11 +335,41 @@ struct avivo_state +@@ -355,11 +336,41 @@ struct avivo_state struct avivo_grph_state grph1; struct avivo_grph_state grph2; @@ -7549,7 +9532,7 @@ index 9c1bdc5..0096ce0 100644 }; -@@ -495,10 +505,16 @@ typedef struct { +@@ -495,10 +506,16 @@ typedef struct { CARD32 palette[256]; CARD32 palette2[256]; @@ -7570,8 +9553,16 @@ index 9c1bdc5..0096ce0 100644 /* TV out registers */ CARD32 tv_master_cntl; +@@ -571,6 +588,7 @@ typedef struct + RADEONSaveRec ModeReg; /* Current mode */ + RADEONSaveRec SavedReg; /* Original (text) mode */ + ++ void *MMIO; /* Map of MMIO region */ + } RADEONEntRec, *RADEONEntPtr; + + /* radeon_probe.c */ diff --git a/src/radeon_reg.h b/src/radeon_reg.h -index 046c52b..dcfdbac 100644 +index 046c52b..815bcaa 100644 --- a/src/radeon_reg.h +++ b/src/radeon_reg.h @@ -887,6 +887,33 @@ @@ -7608,7 +9599,33 @@ index 046c52b..dcfdbac 100644 #define RADEON_GEN_INT_CNTL 0x0040 #define RADEON_GEN_INT_STATUS 0x0044 -@@ -3328,10 +3355,32 @@ +@@ -1634,9 +1661,25 @@ + + #define RADEON_WAIT_UNTIL 0x1720 + # define RADEON_WAIT_CRTC_PFLIP (1 << 0) ++# define RADEON_WAIT_RE_CRTC_VLINE (1 << 1) ++# define RADEON_WAIT_FE_CRTC_VLINE (1 << 2) ++# define RADEON_WAIT_CRTC_VLINE (1 << 3) ++# define RADEON_WAIT_DMA_VID_IDLE (1 << 8) ++# define RADEON_WAIT_DMA_GUI_IDLE (1 << 9) ++# define RADEON_WAIT_CMDFIFO (1 << 10) /* wait for CMDFIFO_ENTRIES */ ++# define RADEON_WAIT_OV0_FLIP (1 << 11) ++# define RADEON_WAIT_AGP_FLUSH (1 << 13) ++# define RADEON_WAIT_2D_IDLE (1 << 14) ++# define RADEON_WAIT_3D_IDLE (1 << 15) + # define RADEON_WAIT_2D_IDLECLEAN (1 << 16) + # define RADEON_WAIT_3D_IDLECLEAN (1 << 17) + # define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) ++# define RADEON_CMDFIFO_ENTRIES_SHIFT 10 ++# define RADEON_CMDFIFO_ENTRIES_MASK 0x7f ++# define RADEON_WAIT_VAP_IDLE (1 << 28) ++# define RADEON_WAIT_BOTH_CRTC_PFLIP (1 << 30) ++# define RADEON_ENG_DISPLAY_SELECT_CRTC0 (0 << 31) ++# define RADEON_ENG_DISPLAY_SELECT_CRTC1 (1 << 31) + + #define RADEON_X_MPLL_REF_FB_DIV 0x000a /* PLL */ + #define RADEON_XCLK_CNTL 0x000d /* PLL */ +@@ -3328,10 +3371,32 @@ # define RADEON_TVPLL_TEST_DIS (1 << 31) # define RADEON_TVCLK_SRC_SEL_TVPLL (1 << 30) @@ -7645,7 +9662,16 @@ index 046c52b..dcfdbac 100644 #define RS690_MC_INDEX 0x78 # define RS690_MC_INDEX_MASK 0x1ff -@@ -3555,6 +3604,8 @@ +@@ -3475,6 +3540,8 @@ + #define AVIVO_D1CUR_SIZE 0x6410 + #define AVIVO_D1CUR_POSITION 0x6414 + #define AVIVO_D1CUR_HOT_SPOT 0x6418 ++#define AVIVO_D1CUR_UPDATE 0x6424 ++# define AVIVO_D1CURSOR_UPDATE_LOCK (1 << 16) + + #define AVIVO_DC_LUT_RW_SELECT 0x6480 + #define AVIVO_DC_LUT_RW_MODE 0x6484 +@@ -3555,6 +3622,8 @@ #define AVIVO_D2SCL_SCALER_ENABLE 0x6d90 #define AVIVO_D2SCL_SCALER_TAP_CONTROL 0x6d94 @@ -7654,7 +9680,7 @@ index 046c52b..dcfdbac 100644 #define AVIVO_DACA_ENABLE 0x7800 # define AVIVO_DAC_ENABLE (1 << 0) #define AVIVO_DACA_SOURCE_SELECT 0x7804 -@@ -3745,6 +3796,8 @@ +@@ -3745,6 +3814,8 @@ # define AVIVO_LVDS_BACKLIGHT_LEVEL_MASK 0x0000ff00 # define AVIVO_LVDS_BACKLIGHT_LEVEL_SHIFT 8 @@ -7663,7 +9689,51 @@ index 046c52b..dcfdbac 100644 #define AVIVO_GPIO_0 0x7e30 #define AVIVO_GPIO_1 0x7e40 #define AVIVO_GPIO_2 0x7e50 -@@ -3986,6 +4039,123 @@ +@@ -3832,6 +3903,7 @@ + #define R300_GB_SELECT 0x401c + #define R300_GB_ENABLE 0x4008 + #define R300_GB_AA_CONFIG 0x4020 ++#define R400_GB_PIPE_SELECT 0x402c + #define R300_GB_MSPOS0 0x4010 + # define R300_MS_X0_SHIFT 0 + # define R300_MS_Y0_SHIFT 4 +@@ -3850,6 +3922,10 @@ + # define R300_MS_Y5_SHIFT 20 + # define R300_MSBD1_SHIFT 24 + ++#define R300_GA_ENHANCE 0x4274 ++# define R300_GA_DEADLOCK_CNTL (1 << 0) ++# define R300_GA_FASTSYNC_CNTL (1 << 1) ++ + #define R300_GA_POLY_MODE 0x4288 + # define R300_FRONT_PTYPE_POINT (0 << 4) + # define R300_FRONT_PTYPE_LINE (1 << 4) +@@ -3889,6 +3965,8 @@ + # define R300_ALPHA3_SHADING_GOURAUD (2 << 14) + #define R300_GA_OFFSET 0x4290 + ++#define R500_SU_REG_DEST 0x42c8 ++ + #define R300_VAP_CNTL_STATUS 0x2140 + # define R300_PVS_BYPASS (1 << 8) + #define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 +@@ -3899,6 +3977,7 @@ + # define R300_VF_MAX_VTX_NUM_SHIFT 18 + # define R300_GL_CLIP_SPACE_DEF (0 << 22) + # define R300_DX_CLIP_SPACE_DEF (1 << 22) ++# define R500_TCL_STATE_OPTIMIZATION (1 << 23) + #define R300_VAP_VTE_CNTL 0x20B0 + # define R300_VPORT_X_SCALE_ENA (1 << 0) + # define R300_VPORT_X_OFFSET_ENA (1 << 1) +@@ -3909,6 +3988,7 @@ + # define R300_VTX_XY_FMT (1 << 8) + # define R300_VTX_Z_FMT (1 << 9) + # define R300_VTX_W0_FMT (1 << 10) ++#define R300_VAP_VTX_STATE_CNTL 0x2180 + #define R300_VAP_PSC_SGN_NORM_CNTL 0x21DC + #define R300_VAP_PROG_STREAM_CNTL_0 0x2150 + # define R300_DATA_TYPE_0_SHIFT 0 +@@ -3986,6 +4066,123 @@ # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 #define R300_VAP_PVS_VECTOR_DATA_REG 0x2204 @@ -7787,15 +9857,17 @@ index 046c52b..dcfdbac 100644 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC #define R300_VAP_OUT_VTX_FMT_0 0x2090 # define R300_VTX_POS_PRESENT (1 << 0) -@@ -4019,6 +4189,7 @@ +@@ -4019,6 +4216,9 @@ # define R300_CLIP_DISABLE (1 << 16) # define R300_UCP_CULL_ONLY_ENA (1 << 17) # define R300_BOUNDARY_EDGE_FLAG_ENA (1 << 18) +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 ++ ++#define R500_VAP_INDEX_OFFSET 0x208c #define R300_SU_TEX_WRAP 0x42a0 #define R300_SU_POLY_OFFSET_ENABLE 0x42b4 -@@ -4036,6 +4207,7 @@ +@@ -4036,6 +4236,7 @@ # define R300_RS_COUNT_HIRES_EN (1 << 18) #define R300_RS_IP_0 0x4310 @@ -7803,7 +9875,7 @@ index 046c52b..dcfdbac 100644 # define R300_RS_TEX_PTR(x) (x << 0) # define R300_RS_COL_PTR(x) (x << 6) # define R300_RS_COL_FMT(x) (x << 9) -@@ -4063,7 +4235,10 @@ +@@ -4063,7 +4264,10 @@ # define R300_RS_W_EN (1 << 4) # define R300_TX_OFFSET_RS(x) (x << 5) #define R300_RS_INST_0 0x4330 @@ -7814,7 +9886,7 @@ index 046c52b..dcfdbac 100644 #define R300_TX_INVALTAGS 0x4100 #define R300_TX_FILTER0_0 0x4400 -@@ -4082,6 +4257,7 @@ +@@ -4082,6 +4286,7 @@ # define R300_TX_MIN_FILTER_NEAREST (1 << 11) # define R300_TX_MAG_FILTER_LINEAR (2 << 9) # define R300_TX_MIN_FILTER_LINEAR (2 << 11) @@ -7822,7 +9894,7 @@ index 046c52b..dcfdbac 100644 #define R300_TX_FILTER1_0 0x4440 #define R300_TX_FORMAT0_0 0x4480 # define R300_TXWIDTH_SHIFT 0 -@@ -4164,11 +4340,14 @@ +@@ -4164,11 +4369,16 @@ # define R300_TX_FORMAT_SWAP_YUV (1 << 24) #define R300_TX_FORMAT2_0 0x4500 @@ -7835,10 +9907,12 @@ index 046c52b..dcfdbac 100644 # define R300_ENDIAN_SWAP_HALF_DWORD (3 << 0) -# define R300_MACRO_TILE (1 << 2); +# define R300_MACRO_TILE (1 << 2) ++ ++#define R300_TX_BORDER_COLOR_0 0x45c0 #define R300_TX_ENABLE 0x4104 # define R300_TEX_0_ENABLE (1 << 0) -@@ -4189,7 +4368,7 @@ +@@ -4189,7 +4399,7 @@ # define R300_OUT_FMT_C2_16_MPEG (7 << 0) # define R300_OUT_FMT_C2_4 (8 << 0) # define R300_OUT_FMT_C_3_3_2 (9 << 0) @@ -7847,7 +9921,7 @@ index 046c52b..dcfdbac 100644 # define R300_OUT_FMT_C_11_11_10 (11 << 0) # define R300_OUT_FMT_C_10_11_11 (12 << 0) # define R300_OUT_FMT_C_2_10_10_10 (13 << 0) -@@ -4227,14 +4406,197 @@ +@@ -4227,28 +4437,221 @@ # define R300_TEX_CODE_OFFSET(x) (x << 13) # define R300_TEX_CODE_SIZE(x) (x << 18) #define R300_US_CODE_ADDR_0 0x4610 @@ -8045,7 +10119,34 @@ index 046c52b..dcfdbac 100644 #define R300_FG_DEPTH_SRC 0x4bd8 #define R300_FG_FOG_BLEND 0x4bc0 -@@ -4256,6 +4618,9 @@ + #define R300_FG_ALPHA_FUNC 0x4bd4 + ++#define R300_DST_PIPE_CONFIG 0x170c ++# define R300_PIPE_AUTO_CONFIG (1 << 31) ++#define R300_RB2D_DSTCACHE_MODE 0x3428 ++# define R300_DC_AUTOFLUSH_ENABLE (1 << 8) ++# define R300_DC_DC_DISABLE_IGNORE_PE (1 << 17) ++#define R300_RB2D_DSTCACHE_CTLSTAT 0x342c ++# define R300_DC_FLUSH_2D (1 << 0) ++# define R300_DC_FREE_2D (1 << 2) ++# define R300_RB2D_DC_FLUSH_ALL (R300_DC_FLUSH_2D | R300_DC_FREE_2D) ++# define R300_RB2D_DC_BUSY (1 << 31) + #define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c + # define R300_DC_FLUSH_3D (2 << 0) + # define R300_DC_FREE_3D (2 << 2) ++# define R300_RB3D_DC_FLUSH_ALL (R300_DC_FLUSH_3D | R300_DC_FREE_3D) ++# define R300_DC_FINISH_3D (1 << 4) + #define R300_RB3D_ZCACHE_CTLSTAT 0x4f18 + # define R300_ZC_FLUSH (1 << 0) + # define R300_ZC_FREE (1 << 1) +-#define R300_WAIT_UNTIL 0x1720 +-# define R300_WAIT_2D_IDLECLEAN (1 << 16) +-# define R300_WAIT_3D_IDLECLEAN (1 << 17) ++# define R300_ZC_FLUSH_ALL 0x3 + #define R300_RB3D_ZSTENCILCNTL 0x4f04 + #define R300_RB3D_ZCACHE_CTLSTAT 0x4f18 + #define R300_RB3D_BW_CNTL 0x4f1c +@@ -4256,6 +4659,9 @@ #define R300_RB3D_ZTOP 0x4f14 #define R300_RB3D_ROPCNTL 0x4e18 #define R300_RB3D_BLENDCNTL 0x4e04 @@ -8055,7 +10156,7 @@ index 046c52b..dcfdbac 100644 #define R300_RB3D_ABLENDCNTL 0x4e08 #define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c #define R300_RB3D_COLOROFFSET0 0x4e28 -@@ -4387,7 +4752,7 @@ +@@ -4387,7 +4793,7 @@ # define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) # define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) # define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) @@ -8064,7 +10165,7 @@ index 046c52b..dcfdbac 100644 #define R500_US_ALU_RGBA_INST_0 0xb000 # define R500_ALU_RGBA_OP_MAD (0 << 0) # define R500_ALU_RGBA_OP_DP3 (1 << 0) -@@ -4540,7 +4905,7 @@ +@@ -4540,7 +4946,7 @@ # define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) # define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) # define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) @@ -8073,7 +10174,7 @@ index 046c52b..dcfdbac 100644 #define R500_US_CMN_INST_0 0xb800 # define R500_INST_TYPE_ALU (0 << 0) # define R500_INST_TYPE_OUT (1 << 0) -@@ -4779,17 +5144,18 @@ +@@ -4779,17 +5185,18 @@ #define R500_GA_US_VECTOR_DATA 0x4254 #define R500_RS_INST_0 0x4320 @@ -8103,7 +10204,7 @@ index 046c52b..dcfdbac 100644 #define R500_US_FC_CTRL 0x4624 #define R500_US_CODE_ADDR 0x4630 -@@ -4797,16 +5163,17 @@ +@@ -4797,16 +5204,18 @@ #define R500_US_CODE_OFFSET 0x4638 #define R500_RS_IP_0 0x4074 @@ -8117,6 +10218,7 @@ index 046c52b..dcfdbac 100644 -#define R500_RS_IP_COL_FMT_SHIFT 27 -#define R500_RS_IP_COL_FMT_RGBA (0<<27) -#define R500_RS_IP_OFFSET_EN (1 << 31) +- +#define R500_RS_IP_1 0x4078 +# define R500_RS_IP_PTR_K0 62 +# define R500_RS_IP_PTR_K1 63 @@ -8128,7 +10230,8 @@ index 046c52b..dcfdbac 100644 +# define R500_RS_IP_COL_FMT_SHIFT 27 +# define R500_RS_IP_COL_FMT_RGBA (0 << 27) +# define R500_RS_IP_OFFSET_EN (1 << 31) - ++ ++#define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */ #endif diff --git a/src/radeon_render.c b/src/radeon_render.c @@ -8155,7 +10258,7 @@ index a80d136..950753c 100644 static void diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c -index 329a834..0a6598d 100644 +index 329a834..5d153e7 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -46,6 +46,9 @@ @@ -8177,6 +10280,15 @@ index 329a834..0a6598d 100644 return TRUE; else return FALSE; +@@ -223,7 +226,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + left = (x1 >> 16) & ~1; + npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left; + +- pPriv->src_offset = pPriv->video_offset + info->fbLocation; ++ pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset; + pPriv->src_addr = (CARD8 *)(info->FB + pPriv->video_offset + (top * dstPitch)); + pPriv->src_pitch = dstPitch; + pPriv->size = size; @@ -300,6 +303,16 @@ static XF86VideoEncodingRec DummyEncoding[1] = } }; @@ -8216,10 +10328,10 @@ index 329a834..0a6598d 100644 adapt->pFormats = Formats; adapt->nPorts = num_texture_ports; diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c -index e0f3bba..45dc0c9 100644 +index e0f3bba..b0286a6 100644 --- a/src/radeon_textured_videofuncs.c +++ b/src/radeon_textured_videofuncs.c -@@ -80,10 +80,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv +@@ -80,16 +80,15 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv CARD32 txenable, colorpitch; CARD32 blendcntl; int dstxoff, dstyoff, pixel_shift; @@ -8231,7 +10343,14 @@ index e0f3bba..45dc0c9 100644 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; -@@ -107,15 +106,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + #ifdef USE_EXA + if (info->useEXA) { +- dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation; ++ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + dst_pitch = exaGetPixmapPitch(pPixmap); + } else + #endif +@@ -107,28 +106,22 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv dstyoff = 0; #endif @@ -8247,17 +10366,39 @@ index e0f3bba..45dc0c9 100644 if (!info->XInited3D) RADEONInit3DEngine(pScrn); -@@ -127,8 +117,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + /* we can probably improve this */ + BEGIN_VIDEO(2); +- OUT_VIDEO_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); ++ if (IS_R300_3D || IS_R500_3D) ++ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); ++ else ++ OUT_VIDEO_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); + /* We must wait for 3d to idle, in case source was just written as a dest. */ + OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); FINISH_VIDEO(); - if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { - int has_tcl = (info->ChipFamily != CHIP_FAMILY_RS690 && info->ChipFamily != CHIP_FAMILY_RS400); + if (IS_R300_3D || IS_R500_3D) { ++ CARD32 output_fmt; switch (pPixmap->drawable.bitsPerPixel) { case 16: -@@ -157,8 +146,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv +@@ -144,6 +137,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + return; + } + ++ output_fmt = (R300_OUT_FMT_C4_8 | ++ R300_OUT_FMT_C0_SEL_BLUE | ++ R300_OUT_FMT_C1_SEL_GREEN | ++ R300_OUT_FMT_C2_SEL_RED | ++ R300_OUT_FMT_C3_SEL_ALPHA); ++ + colorpitch = dst_pitch >> pixel_shift; + colorpitch |= dst_format; + +@@ -157,8 +156,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; @@ -8268,7 +10409,7 @@ index e0f3bba..45dc0c9 100644 txformat0 |= R300_TXPITCH_EN; -@@ -173,6 +162,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv +@@ -173,6 +172,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv txpitch = pPriv->src_pitch / 2; txpitch -= 1; @@ -8281,11 +10422,10 @@ index e0f3bba..45dc0c9 100644 txoffset = pPriv->src_offset; BEGIN_VIDEO(6); -@@ -186,110 +181,40 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - +@@ -187,173 +192,220 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv txenable = R300_TEX_0_ENABLE; -- /* setup the VAP */ + /* setup the VAP */ - if (has_tcl) { - BEGIN_VIDEO(26); - OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, 0); @@ -8358,11 +10498,51 @@ index e0f3bba..45dc0c9 100644 - - /* setup vertex shader */ - if (has_tcl) { -- OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0, ++ if (info->has_tcl) ++ BEGIN_VIDEO(6); ++ else ++ BEGIN_VIDEO(4); ++ ++ /* These registers define the number, type, and location of data submitted ++ * to the PVS unit of GA input (when PVS is disabled) ++ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is ++ * enabled. This memory provides the imputs to the vertex shader program ++ * and ordering is not important. When PVS/TCL is disabled, this field maps ++ * directly to the GA input memory and the order is signifigant. In ++ * PVS_BYPASS mode the order is as follows: ++ * Position ++ * Point Size ++ * Color 0-3 ++ * Textures 0-7 ++ * Fog ++ */ ++ OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0, ++ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | ++ (0 << R300_SKIP_DWORDS_0_SHIFT) | ++ (0 << R300_DST_VEC_LOC_0_SHIFT) | ++ R300_SIGNED_0 | ++ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | ++ (0 << R300_SKIP_DWORDS_1_SHIFT) | ++ (6 << R300_DST_VEC_LOC_1_SHIFT) | ++ R300_LAST_VEC_1 | ++ R300_SIGNED_1)); ++ ++ /* load the vertex shader ++ * We pre-load vertex programs in RADEONInit3DEngine(): ++ * - exa no mask ++ * - exa mask ++ * - Xv ++ * Here we select the offset of the vertex program we want to use ++ */ ++ if (info->has_tcl) { + OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0, - ((0 << R300_PVS_FIRST_INST_SHIFT) | - (1 << R300_PVS_XYZW_VALID_INST_SHIFT) | - (1 << R300_PVS_LAST_INST_SHIFT))); -- OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1, ++ ((5 << R300_PVS_FIRST_INST_SHIFT) | ++ (6 << R300_PVS_XYZW_VALID_INST_SHIFT) | ++ (6 << R300_PVS_LAST_INST_SHIFT))); + OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1, - (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); - OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); - OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f00203); @@ -8381,55 +10561,41 @@ index e0f3bba..45dc0c9 100644 - OUT_VIDEO_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000); - OUT_VIDEO_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000); - OUT_VIDEO_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); -- } -+ BEGIN_VIDEO(6); -+ OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -+ (0 << R300_SKIP_DWORDS_0_SHIFT) | -+ (0 << R300_DST_VEC_LOC_0_SHIFT) | -+ R300_SIGNED_0 | -+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -+ (0 << R300_SKIP_DWORDS_1_SHIFT) | -+ (6 << R300_DST_VEC_LOC_1_SHIFT) | -+ R300_LAST_VEC_1 | -+ R300_SIGNED_1)); -+ OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, -+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) | -+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y) -+ << R300_WRITE_ENA_0_SHIFT) | -+ (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) | -+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) | -+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) | -+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y) -+ << R300_WRITE_ENA_1_SHIFT))); ++ (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); + } ++ /* Position and one set of 2 texture coordinates */ OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); -+ -+ OUT_VIDEO_REG(R300_TX_INVALTAGS, 0); -+ OUT_VIDEO_REG(R300_TX_ENABLE, txenable); ++ OUT_VIDEO_REG(R300_US_OUT_FMT_0, output_fmt); FINISH_VIDEO(); /* setup pixel shader */ - if (IS_R300_VARIANT || info->ChipFamily == CHIP_FAMILY_RS690) { +- BEGIN_VIDEO(16); + if (IS_R300_3D) { - BEGIN_VIDEO(16); ++ BEGIN_VIDEO(8); ++ /* 2 components: 2 for tex0 */ OUT_VIDEO_REG(R300_RS_COUNT, ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -@@ -302,7 +227,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - R300_RS_SEL_T(R300_RS_SEL_C1) | - R300_RS_SEL_R(R300_RS_SEL_K0) | - R300_RS_SEL_Q(R300_RS_SEL_K1))); + R300_RS_COUNT_HIRES_EN)); +- OUT_VIDEO_REG(R300_RS_IP_0, +- (R300_RS_TEX_PTR(0) | +- R300_RS_COL_PTR(0) | +- R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA) | +- R300_RS_SEL_S(R300_RS_SEL_C0) | +- R300_RS_SEL_T(R300_RS_SEL_C1) | +- R300_RS_SEL_R(R300_RS_SEL_K0) | +- R300_RS_SEL_Q(R300_RS_SEL_K1))); - OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_TX_OFFSET_RS(6)); +- OUT_VIDEO_REG(R300_RS_INST_0, R300_RS_INST_TEX_CN_WRITE); +- OUT_VIDEO_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX); +- OUT_VIDEO_REG(R300_US_PIXSIZE, 0); ++ /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); - OUT_VIDEO_REG(R300_RS_INST_0, R300_RS_INST_TEX_CN_WRITE); - OUT_VIDEO_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX); - OUT_VIDEO_REG(R300_US_PIXSIZE, 0); -@@ -311,61 +236,186 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv ++ + OUT_VIDEO_REG(R300_US_CODE_OFFSET, + (R300_ALU_CODE_OFFSET(0) | R300_ALU_CODE_SIZE(1) | R300_TEX_CODE_OFFSET(0) | R300_TEX_CODE_SIZE(1))); @@ -8442,40 +10608,23 @@ index e0f3bba..45dc0c9 100644 - OUT_VIDEO_REG(R300_US_ALU_RGB_INST_0, 0x50a80); - OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR_0, 0x1800000); - OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST_0, 0x00040889); -+ OUT_VIDEO_REG(R300_US_CODE_ADDR_0, -+ (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); -+ OUT_VIDEO_REG(R300_US_CODE_ADDR_1, -+ (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); -+ OUT_VIDEO_REG(R300_US_CODE_ADDR_2, -+ (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); ++ + OUT_VIDEO_REG(R300_US_CODE_ADDR_3, + (R300_ALU_START(0) | + R300_ALU_SIZE(0) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); -+ OUT_VIDEO_REG(R300_US_TEX_INST_0, -+ (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(0) | -+ R300_TEX_ID(0) | -+ R300_TEX_INST(R300_TEX_INST_LD))); ++ ++ /* tex inst is preloaded in RADEONInit3DEngine() */ ++ ++ /* ALU inst */ ++ /* RGB */ + OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR_0, + (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK((R300_ALU_RGB_MASK_R | -+ R300_ALU_RGB_MASK_G | -+ R300_ALU_RGB_MASK_B)) | + R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | + R300_ALU_RGB_MASK_G | + R300_ALU_RGB_MASK_B)) | @@ -8490,12 +10639,12 @@ index e0f3bba..45dc0c9 100644 + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); ++ /* Alpha */ + OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR_0, + (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); @@ -8512,29 +10661,25 @@ index e0f3bba..45dc0c9 100644 FINISH_VIDEO(); } else { - BEGIN_VIDEO(22); -+ BEGIN_VIDEO(23); ++ BEGIN_VIDEO(18); ++ /* 2 components: 2 for tex0 */ OUT_VIDEO_REG(R300_RS_COUNT, ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | R300_RS_COUNT_HIRES_EN)); - OUT_VIDEO_REG(R500_RS_IP_0, (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); -+ OUT_VIDEO_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) | -+ (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | -+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | -+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT))); -+ -+ OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); - +- - OUT_VIDEO_REG(R300_RS_INST_COUNT, 0); - OUT_VIDEO_REG(R500_RS_INST_0, R500_RS_INST_TEX_CN_WRITE); -+ OUT_VIDEO_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) | -+ R500_RS_INST_TEX_CN_WRITE | -+ (0 << R500_RS_INST_TEX_ADDR_SHIFT))); - OUT_VIDEO_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_VIDEO_REG(R300_US_PIXSIZE, 0); - OUT_VIDEO_REG(R500_US_FC_CTRL, 0); +- OUT_VIDEO_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); +- OUT_VIDEO_REG(R300_US_PIXSIZE, 0); +- OUT_VIDEO_REG(R500_US_FC_CTRL, 0); - OUT_VIDEO_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); - OUT_VIDEO_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); ++ ++ /* R300_INST_COUNT_RS - highest RS instruction used */ ++ OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); ++ + OUT_VIDEO_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(1))); + OUT_VIDEO_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | @@ -8546,6 +10691,7 @@ index e0f3bba..45dc0c9 100644 - OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0xe4000400); - OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + ++ /* tex inst */ + OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | @@ -8587,6 +10733,7 @@ index e0f3bba..45dc0c9 100644 - OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00c0c000); - OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x20490000); + ++ /* ALU inst */ + OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | @@ -8601,14 +10748,12 @@ index e0f3bba..45dc0c9 100644 + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | -+ R500_RGB_ADDR2_CONST | -+ R500_RGB_SRCP_OP_1_MINUS_2RGB0)); ++ R500_RGB_ADDR2_CONST)); + OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | -+ R500_ALPHA_ADDR2_CONST | -+ R500_ALPHA_SRCP_OP_1_MINUS_2A0)); ++ R500_ALPHA_ADDR2_CONST)); + + OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | @@ -8632,50 +10777,56 @@ index e0f3bba..45dc0c9 100644 } - BEGIN_VIDEO(6); -- OUT_VIDEO_REG(R300_TX_INVALTAGS, 0); -- OUT_VIDEO_REG(R300_TX_ENABLE, txenable); -+ BEGIN_VIDEO(4); ++ BEGIN_VIDEO(5); + OUT_VIDEO_REG(R300_TX_INVALTAGS, 0); + OUT_VIDEO_REG(R300_TX_ENABLE, txenable); - OUT_VIDEO_REG(R300_RB3D_COLOROFFSET0, dst_offset); +@@ -361,8 +413,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv OUT_VIDEO_REG(R300_RB3D_COLORPITCH0, colorpitch); blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO; -- OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl); ++ /* no need to enable blending */ + OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl); - OUT_VIDEO_REG(R300_RB3D_ABLENDCNTL, 0); -- FINISH_VIDEO(); -+ OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE); - -- BEGIN_VIDEO(1); - OUT_VIDEO_REG(R300_VAP_VTX_SIZE, VTX_DWORD_COUNT); FINISH_VIDEO(); -@@ -538,7 +588,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + BEGIN_VIDEO(1); +@@ -538,8 +590,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); } else { - if (IS_R300_VARIANT || IS_AVIVO_VARIANT) +- BEGIN_RING(4 * VTX_DWORD_COUNT + 6); + if (IS_R300_3D || IS_R500_3D) - BEGIN_RING(4 * VTX_DWORD_COUNT + 6); ++ BEGIN_RING(4 * VTX_DWORD_COUNT + 4); else BEGIN_RING(4 * VTX_DWORD_COUNT + 2); -@@ -549,7 +599,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, +@@ -549,8 +601,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); } #else /* ACCEL_CP */ - if (IS_R300_VARIANT || IS_AVIVO_VARIANT) +- BEGIN_VIDEO(3 + VTX_DWORD_COUNT * 4); + if (IS_R300_3D || IS_R500_3D) - BEGIN_VIDEO(3 + VTX_DWORD_COUNT * 4); ++ BEGIN_VIDEO(2 + VTX_DWORD_COUNT * 4); else BEGIN_VIDEO(1 + VTX_DWORD_COUNT * 4); -@@ -575,7 +625,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + +@@ -575,10 +627,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv VTX_OUT((float)(dstX + dstw), (float)dstY, xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0]); - if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { -+ if (IS_R300_3D || IS_R500_3D) { - OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); - OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); - } +- OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); +- OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); +- } ++ if (IS_R300_3D || IS_R500_3D) ++ /* flushing is pipelined, free/finish is not */ ++ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); + + #ifdef ACCEL_CP + ADVANCE_RING(); diff --git a/src/radeon_video.c b/src/radeon_video.c index 7502e1e..216cd65 100644 --- a/src/radeon_video.c diff --git a/xorg-x11-drv-ati.spec b/xorg-x11-drv-ati.spec index 1e12d12..34c4869 100644 --- a/xorg-x11-drv-ati.spec +++ b/xorg-x11-drv-ati.spec @@ -5,7 +5,7 @@ Summary: Xorg X11 ati video driver Name: xorg-x11-drv-ati Version: 6.8.0 -Release: 12%{?dist} +Release: 13%{?dist} URL: http://www.x.org License: MIT Group: User Interface/X Hardware Support @@ -91,6 +91,9 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man4/radeon.4* %changelog +* Wed May 07 2008 Dave Airlie 6.8.0-13 +- more upstream fixes for EXA accel + zaphod mode + * Thu Apr 24 2008 Dave Airlie 6.8.0-12 - not so much faster as kill my Apple MAC DDC - next time do this upstream first