From 3b99e16f2b686ee528dd06c8de810d9432a57e19 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mar 14 2009 08:42:44 +0000 Subject: - rebase to latest -ati upstream release --- diff --git a/.cvsignore b/.cvsignore index 7590a06..976083d 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1 @@ -xf86-video-ati-6.11.0.tar.bz2 +xf86-video-ati-6.12.0.tar.bz2 diff --git a/radeon-6.11.0-git.patch b/radeon-6.11.0-git.patch deleted file mode 100644 index 441cbb8..0000000 --- a/radeon-6.11.0-git.patch +++ /dev/null @@ -1,13286 +0,0 @@ -diff --git a/configure.ac b/configure.ac -index 0523cc0..b094a50 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -22,7 +22,7 @@ - - AC_PREREQ(2.57) - AC_INIT([xf86-video-ati], -- 6.11.0, -+ 6.11.0.99, - [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], - xf86-video-ati) - -diff --git a/man/radeon.man b/man/radeon.man -index b9686b5..a067981 100644 ---- a/man/radeon.man -+++ b/man/radeon.man -@@ -567,6 +567,11 @@ atombios chips. TV-out is experimental and may not function on these chips as - well as hoped for. - The default is - .B off. -+.TP -+.BI "Option \*qR4xxATOM\*q \*q" boolean \*q -+This option enables modesetting on R/RV4xx chips using atombios. -+The default is -+.B off. - - .SH SEE ALSO - __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), Xserver(__appmansuffix__), X(__miscmansuffix__) -diff --git a/src/Makefile.am b/src/Makefile.am -index c15cc30..7cc2a6f 100644 ---- a/src/Makefile.am -+++ b/src/Makefile.am -@@ -66,7 +66,7 @@ XMODE_SRCS=\ - modes/xf86DiDGA.c - - if USE_EXA --RADEON_EXA_SOURCES = radeon_exa.c -+RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c - endif - - AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@ @XMODES_CFLAGS@ -DDISABLE_EASF -DENABLE_ALL_SERVICE_FUNCTIONS -DATOM_BIOS -DATOM_BIOS_PARSER -DDRIVER_PARSER -@@ -128,6 +128,12 @@ EXTRA_DIST = \ - radeon_render.c \ - radeon_accelfuncs.c \ - radeon_textured_videofuncs.c \ -+ r600_reg.h \ -+ r600_reg_auto_r6xx.h \ -+ r600_reg_r6xx.h \ -+ r600_reg_r7xx.h \ -+ r600_shader.h \ -+ r600_state.h \ - ati.h \ - ativersion.h \ - bicubic_table.h \ -diff --git a/src/atipciids.h b/src/atipciids.h -index 7735e93..47c03c7 100644 ---- a/src/atipciids.h -+++ b/src/atipciids.h -@@ -34,6 +34,7 @@ - #define _ATIPCIIDS_H - - /* PCI Vendor */ -+#define PCI_VENDOR_ASUS 0x1043 - #define PCI_VENDOR_ATI 0x1002 - #define PCI_VENDOR_IBM 0x1014 - #define PCI_VENDOR_AMD 0x1022 -diff --git a/src/atombios_crtc.c b/src/atombios_crtc.c -index 5c26ef8..50db578 100644 ---- a/src/atombios_crtc.c -+++ b/src/atombios_crtc.c -@@ -1,4 +1,4 @@ -- /* -+/* - * Copyright © 2007 Red Hat, Inc. - * Copyright 2007 Advanced Micro Devices, Inc. - * -@@ -50,6 +50,23 @@ - extern int - atombios_get_encoder_mode(xf86OutputPtr output); - -+extern void -+RADEONInitCrtcBase(xf86CrtcPtr crtc, RADEONSavePtr save, -+ int x, int y); -+extern void -+RADEONInitCrtc2Base(xf86CrtcPtr crtc, RADEONSavePtr save, -+ int x, int y); -+extern void -+RADEONRestoreCrtcBase(ScrnInfoPtr pScrn, -+ RADEONSavePtr restore); -+extern void -+RADEONRestoreCrtc2Base(ScrnInfoPtr pScrn, -+ RADEONSavePtr restore); -+extern void -+RADEONInitCommonRegisters(RADEONSavePtr save, RADEONInfoPtr info); -+extern void -+RADEONInitSurfaceCntl(xf86CrtcPtr crtc, RADEONSavePtr save); -+ - AtomBiosResult - atombios_lock_crtc(atomBiosHandlePtr atomBIOS, int crtc, int lock) - { -@@ -186,7 +203,7 @@ atombios_set_crtc_timing(atomBiosHandlePtr atomBIOS, SET_CRTC_TIMING_PARAMETERS_ - conv_param.ucOverscanRight = crtc_param->ucOverscanRight; - conv_param.ucOverscanLeft = crtc_param->ucOverscanLeft; - conv_param.ucOverscanBottom = crtc_param->ucOverscanBottom; -- conv_param.ucOverscanTop = crtc_param->ucOverscanTop; -+ conv_param.ucOverscanTop = crtc_param->ucOverscanTop; - conv_param.ucReserved = crtc_param->ucReserved; - - data.exec.index = GetIndexIntoMasterTable(COMMAND, SetCRTC_Timing); -@@ -202,8 +219,39 @@ atombios_set_crtc_timing(atomBiosHandlePtr atomBIOS, SET_CRTC_TIMING_PARAMETERS_ - return ATOM_NOT_IMPLEMENTED; - } - -+static AtomBiosResult -+atombios_set_crtc_dtd_timing(atomBiosHandlePtr atomBIOS, SET_CRTC_USING_DTD_TIMING_PARAMETERS *crtc_param) -+{ -+ AtomBiosArgRec data; -+ unsigned char *space; -+ SET_CRTC_USING_DTD_TIMING_PARAMETERS conv_param; -+ -+ conv_param.usH_Size = cpu_to_le16(crtc_param->usH_Size); -+ conv_param.usH_Blanking_Time= cpu_to_le16(crtc_param->usH_Blanking_Time); -+ conv_param.usV_Size = cpu_to_le16(crtc_param->usV_Size); -+ conv_param.usV_Blanking_Time= cpu_to_le16(crtc_param->usV_Blanking_Time); -+ conv_param.usH_SyncOffset= cpu_to_le16(crtc_param->usH_SyncOffset); -+ conv_param.usH_SyncWidth= cpu_to_le16(crtc_param->usH_SyncWidth); -+ conv_param.usV_SyncOffset= cpu_to_le16(crtc_param->usV_SyncOffset); -+ conv_param.usV_SyncWidth= cpu_to_le16(crtc_param->usV_SyncWidth); -+ conv_param.susModeMiscInfo.usAccess = cpu_to_le16(crtc_param->susModeMiscInfo.usAccess); -+ conv_param.ucCRTC= crtc_param->ucCRTC; -+ -+ data.exec.index = GetIndexIntoMasterTable(COMMAND, SetCRTC_UsingDTDTiming); -+ data.exec.dataSpace = (void *)&space; -+ data.exec.pspace = &conv_param; -+ -+ if (RHDAtomBiosFunc(atomBIOS->scrnIndex, atomBIOS, ATOMBIOS_EXEC, &data) == ATOM_SUCCESS) { -+ ErrorF("Set DTD CRTC Timing success\n"); -+ return ATOM_SUCCESS ; -+ } -+ -+ ErrorF("Set DTD CRTC Timing failed\n"); -+ return ATOM_NOT_IMPLEMENTED; -+} -+ - void --atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode, int pll_flags) -+atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) - { - RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - RADEONInfoPtr info = RADEONPTR(crtc->scrn); -@@ -219,25 +267,20 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode, int pll_flags) - xf86OutputPtr output; - RADEONOutputPrivatePtr radeon_output = NULL; - radeon_encoder_ptr radeon_encoder = NULL; -+ int pll_flags = 0; -+ uint32_t temp; - - void *ptr; - AtomBiosArgRec data; - unsigned char *space; -- RADEONSavePtr save = info->ModeReg; - - memset(&spc_param, 0, sizeof(spc_param)); - if (IS_AVIVO_VARIANT) { -- uint32_t temp; -- - if (IS_DCE3_VARIANT && mode->Clock > 200000) /* range limits??? */ - pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV; - else - pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV; - -- -- RADEONComputePLL(&info->pll, mode->Clock, &temp, &fb_div, &ref_div, &post_div, pll_flags); -- sclock = temp; -- - /* disable spread spectrum clocking for now -- thanks Hedy Lamarr */ - if (radeon_crtc->crtc_id == 0) { - temp = INREG(AVIVO_P1PLL_INT_SS_CNTL); -@@ -247,12 +290,30 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode, int pll_flags) - OUTREG(AVIVO_P2PLL_INT_SS_CNTL, temp & ~1); - } - } else { -- sclock = save->dot_clock_freq; -- fb_div = save->feedback_div; -- post_div = save->post_div; -- ref_div = save->ppll_ref_div; -+ pll_flags |= RADEON_PLL_LEGACY; -+ -+ for (i = 0; i < xf86_config->num_output; i++) { -+ xf86OutputPtr output = xf86_config->output[i]; -+ RADEONOutputPrivatePtr radeon_output = output->driver_private; -+ -+ if (output->crtc == crtc) { -+ if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT | -+ ATOM_DEVICE_DFP_SUPPORT)) -+ pll_flags |= RADEON_PLL_NO_ODD_POST_DIV; -+ if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT)) -+ pll_flags |= (RADEON_PLL_USE_BIOS_DIVS | RADEON_PLL_USE_REF_DIV); -+ } -+ } -+ -+ if (mode->Clock > 200000) /* range limits??? */ -+ pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV; -+ else -+ pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV; - } - -+ RADEONComputePLL(&info->pll, mode->Clock, &temp, &fb_div, &ref_div, &post_div, pll_flags); -+ sclock = temp; -+ - xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, - "crtc(%d) Clock: mode %d, PLL %lu\n", - radeon_crtc->crtc_id, mode->Clock, (long unsigned int)sclock * 10); -@@ -349,9 +410,10 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc, - int need_tv_timings = 0; - int i, ret; - SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION crtc_timing; -+ SET_CRTC_USING_DTD_TIMING_PARAMETERS crtc_dtd_timing; - Bool tilingChanged = FALSE; -- int pll_flags = 0; - memset(&crtc_timing, 0, sizeof(crtc_timing)); -+ memset(&crtc_dtd_timing, 0, sizeof(crtc_dtd_timing)); - - if (info->allowColorTiling) { - radeon_crtc->can_tile = (adjusted_mode->Flags & (V_DBLSCAN | V_INTERLACE)) ? FALSE : TRUE; -@@ -373,9 +435,6 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc, - need_tv_timings = 2; - - } -- -- if (radeon_output->MonType == MT_LCD) -- pll_flags |= RADEON_PLL_USE_REF_DIV; - } - } - -@@ -413,6 +472,36 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc, - if (adjusted_mode->Flags & V_DBLSCAN) - crtc_timing.susModeMiscInfo.usAccess |= ATOM_DOUBLE_CLOCK_MODE; - -+ if (!IS_AVIVO_VARIANT && (radeon_crtc->crtc_id == 0)) { -+ crtc_dtd_timing.ucCRTC = radeon_crtc->crtc_id; -+ crtc_dtd_timing.usH_Size = adjusted_mode->CrtcHDisplay; -+ crtc_dtd_timing.usV_Size = adjusted_mode->CrtcVDisplay; -+ crtc_dtd_timing.usH_Blanking_Time = adjusted_mode->CrtcHBlankEnd - adjusted_mode->CrtcHDisplay; -+ crtc_dtd_timing.usV_Blanking_Time = adjusted_mode->CrtcVBlankEnd - adjusted_mode->CrtcVDisplay; -+ crtc_dtd_timing.usH_SyncOffset = adjusted_mode->CrtcHSyncStart - adjusted_mode->CrtcHDisplay; -+ crtc_dtd_timing.usV_SyncOffset = adjusted_mode->CrtcVSyncStart - adjusted_mode->CrtcVDisplay; -+ crtc_dtd_timing.usH_SyncWidth = adjusted_mode->CrtcHSyncEnd - adjusted_mode->CrtcHSyncStart; -+ crtc_dtd_timing.usV_SyncWidth = adjusted_mode->CrtcVSyncEnd - adjusted_mode->CrtcVSyncStart; -+ ErrorF("%d %d %d %d %d %d %d %d\n", crtc_dtd_timing.usH_Size, crtc_dtd_timing.usH_SyncOffset, -+ crtc_dtd_timing.usH_SyncWidth, crtc_dtd_timing.usH_Blanking_Time, -+ crtc_dtd_timing.usV_Size, crtc_dtd_timing.usV_SyncOffset, -+ crtc_dtd_timing.usV_SyncWidth, crtc_dtd_timing.usV_Blanking_Time); -+ -+ if (adjusted_mode->Flags & V_NVSYNC) -+ crtc_dtd_timing.susModeMiscInfo.usAccess |= ATOM_VSYNC_POLARITY; -+ -+ if (adjusted_mode->Flags & V_NHSYNC) -+ crtc_dtd_timing.susModeMiscInfo.usAccess |= ATOM_HSYNC_POLARITY; -+ -+ if (adjusted_mode->Flags & V_CSYNC) -+ crtc_dtd_timing.susModeMiscInfo.usAccess |= ATOM_COMPOSITESYNC; -+ -+ if (adjusted_mode->Flags & V_INTERLACE) -+ crtc_dtd_timing.susModeMiscInfo.usAccess |= ATOM_INTERLACE; -+ -+ if (adjusted_mode->Flags & V_DBLSCAN) -+ crtc_dtd_timing.susModeMiscInfo.usAccess |= ATOM_DOUBLE_CLOCK_MODE; -+ } - } - - ErrorF("Mode %dx%d - %d %d %d\n", adjusted_mode->CrtcHDisplay, adjusted_mode->CrtcVDisplay, -@@ -421,6 +510,11 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc, - RADEONInitMemMapRegisters(pScrn, info->ModeReg, info); - RADEONRestoreMemMapRegisters(pScrn, info->ModeReg); - -+ atombios_crtc_set_pll(crtc, adjusted_mode); -+ atombios_set_crtc_timing(info->atomBIOS, &crtc_timing); -+ if (!IS_AVIVO_VARIANT && (radeon_crtc->crtc_id == 0)) -+ atombios_set_crtc_dtd_timing(info->atomBIOS, &crtc_dtd_timing); -+ - if (IS_AVIVO_VARIANT) { - uint32_t fb_format; - -@@ -472,6 +566,8 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc, - OUTREG(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 1); - - OUTREG(AVIVO_D1MODE_DESKTOP_HEIGHT + radeon_crtc->crtc_offset, mode->VDisplay); -+ x &= ~3; -+ y &= ~1; - OUTREG(AVIVO_D1MODE_VIEWPORT_START + radeon_crtc->crtc_offset, (x << 16) | y); - OUTREG(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset, - (mode->HDisplay << 16) | mode->VDisplay); -@@ -482,11 +578,56 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc, - else - OUTREG(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset, - 0); -- } -- -- atombios_crtc_set_pll(crtc, adjusted_mode, pll_flags); -+ } else { -+ int format = 0; -+ uint32_t crtc_gen_cntl, crtc2_gen_cntl, crtc_pitch; -+ -+ -+ RADEONInitCommonRegisters(info->ModeReg, info); -+ RADEONInitSurfaceCntl(crtc, info->ModeReg); -+ ErrorF("restore common\n"); -+ RADEONRestoreCommonRegisters(pScrn, info->ModeReg); -+ -+ switch (info->CurrentLayout.pixel_code) { -+ case 4: format = 1; break; -+ case 8: format = 2; break; -+ case 15: format = 3; break; /* 555 */ -+ case 16: format = 4; break; /* 565 */ -+ case 24: format = 5; break; /* RGB */ -+ case 32: format = 6; break; /* xRGB */ -+ default: -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Unsupported pixel depth (%d)\n", -+ info->CurrentLayout.bitsPerPixel); -+ } - -- atombios_set_crtc_timing(info->atomBIOS, &crtc_timing); -+ crtc_pitch = (((pScrn->displayWidth * pScrn->bitsPerPixel) + -+ ((pScrn->bitsPerPixel * 8) -1)) / -+ (pScrn->bitsPerPixel * 8)); -+ crtc_pitch |= crtc_pitch << 16; -+ -+ switch (radeon_crtc->crtc_id) { -+ case 0: -+ ErrorF("init crtc1\n"); -+ crtc_gen_cntl = INREG(RADEON_CRTC_GEN_CNTL) & 0xfffff0ff; -+ crtc_gen_cntl |= (format << 8); -+ OUTREG(RADEON_CRTC_GEN_CNTL, crtc_gen_cntl); -+ OUTREG(RADEON_CRTC_PITCH, crtc_pitch); -+ RADEONInitCrtcBase(crtc, info->ModeReg, x, y); -+ RADEONRestoreCrtcBase(pScrn, info->ModeReg); -+ break; -+ case 1: -+ crtc2_gen_cntl = INREG(RADEON_CRTC2_GEN_CNTL) & 0xfffff0ff; -+ crtc2_gen_cntl |= (format << 8); -+ OUTREG(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl); -+ OUTREG(RADEON_CRTC2_PITCH, crtc_pitch); -+ RADEONInitCrtc2Base(crtc, info->ModeReg, x, y); -+ RADEONRestoreCrtc2Base(pScrn, info->ModeReg); -+ OUTREG(RADEON_FP_H2_SYNC_STRT_WID, INREG(RADEON_CRTC2_H_SYNC_STRT_WID)); -+ OUTREG(RADEON_FP_V2_SYNC_STRT_WID, INREG(RADEON_CRTC2_V_SYNC_STRT_WID)); -+ break; -+ } -+ } - - if (info->DispPriority) - RADEONInitDispBandwidth(pScrn); -diff --git a/src/atombios_output.c b/src/atombios_output.c -index c4baa13..4f62b67 100644 ---- a/src/atombios_output.c -+++ b/src/atombios_output.c -@@ -430,7 +430,10 @@ atombios_get_encoder_mode(xf86OutputPtr output) - return ATOM_ENCODER_MODE_LVDS; - break; - case CONNECTOR_DISPLAY_PORT: -- return ATOM_ENCODER_MODE_DP; -+ if (radeon_output->MonType == MT_DP) -+ return ATOM_ENCODER_MODE_DP; -+ else -+ return atombios_maybe_hdmi_mode(output); - break; - case CONNECTOR_DVI_A: - case CONNECTOR_VGA: -@@ -995,6 +998,61 @@ atombios_output_yuv_setup(xf86OutputPtr output, Bool enable) - } - - static int -+atombios_output_overscan_setup(xf86OutputPtr output, DisplayModePtr mode, DisplayModePtr adjusted_mode) -+{ -+ RADEONOutputPrivatePtr radeon_output = output->driver_private; -+ RADEONCrtcPrivatePtr radeon_crtc = output->crtc->driver_private; -+ RADEONInfoPtr info = RADEONPTR(output->scrn); -+ SET_CRTC_OVERSCAN_PS_ALLOCATION overscan_param; -+ AtomBiosArgRec data; -+ unsigned char *space; -+ memset(&overscan_param, 0, sizeof(overscan_param)); -+ -+ overscan_param.usOverscanRight = 0; -+ overscan_param.usOverscanLeft = 0; -+ overscan_param.usOverscanBottom = 0; -+ overscan_param.usOverscanTop = 0; -+ overscan_param.ucCRTC = radeon_crtc->crtc_id; -+ -+ if (radeon_output->Flags & RADEON_USE_RMX) { -+ if (radeon_output->rmx_type == RMX_FULL) { -+ overscan_param.usOverscanRight = 0; -+ overscan_param.usOverscanLeft = 0; -+ overscan_param.usOverscanBottom = 0; -+ overscan_param.usOverscanTop = 0; -+ } else if (radeon_output->rmx_type == RMX_CENTER) { -+ overscan_param.usOverscanTop = (adjusted_mode->CrtcVDisplay - mode->CrtcVDisplay) / 2; -+ overscan_param.usOverscanBottom = (adjusted_mode->CrtcVDisplay - mode->CrtcVDisplay) / 2; -+ overscan_param.usOverscanLeft = (adjusted_mode->CrtcHDisplay - mode->CrtcHDisplay) / 2; -+ overscan_param.usOverscanRight = (adjusted_mode->CrtcHDisplay - mode->CrtcHDisplay) / 2; -+ } else if (radeon_output->rmx_type == RMX_ASPECT) { -+ int a1 = mode->CrtcVDisplay * adjusted_mode->CrtcHDisplay; -+ int a2 = adjusted_mode->CrtcVDisplay * mode->CrtcHDisplay; -+ -+ if (a1 > a2) { -+ overscan_param.usOverscanLeft = (adjusted_mode->CrtcHDisplay - (a2 / mode->CrtcVDisplay)) / 2; -+ overscan_param.usOverscanRight = (adjusted_mode->CrtcHDisplay - (a2 / mode->CrtcVDisplay)) / 2; -+ } else if (a2 > a1) { -+ overscan_param.usOverscanLeft = (adjusted_mode->CrtcVDisplay - (a1 / mode->CrtcHDisplay)) / 2; -+ overscan_param.usOverscanRight = (adjusted_mode->CrtcVDisplay - (a1 / mode->CrtcHDisplay)) / 2; -+ } -+ } -+ } -+ -+ data.exec.index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan); -+ data.exec.dataSpace = (void *)&space; -+ data.exec.pspace = &overscan_param; -+ -+ if (RHDAtomBiosFunc(info->atomBIOS->scrnIndex, info->atomBIOS, ATOMBIOS_EXEC, &data) == ATOM_SUCCESS) { -+ ErrorF("Set CRTC %d Overscan success\n", radeon_crtc->crtc_id); -+ return ATOM_SUCCESS ; -+ } -+ -+ ErrorF("Set CRTC %d Overscan failed\n", radeon_crtc->crtc_id); -+ return ATOM_NOT_IMPLEMENTED; -+} -+ -+static int - atombios_output_scaler_setup(xf86OutputPtr output, DisplayModePtr mode) - { - RADEONInfoPtr info = RADEONPTR(output->scrn); -@@ -1005,6 +1063,9 @@ atombios_output_scaler_setup(xf86OutputPtr output, DisplayModePtr mode) - AtomBiosArgRec data; - unsigned char *space; - -+ if (!IS_AVIVO_VARIANT && radeon_crtc->crtc_id) -+ return ATOM_SUCCESS; -+ - memset(&disp_data, 0, sizeof(disp_data)); - - disp_data.ucScaler = radeon_crtc->crtc_id; -@@ -1051,9 +1112,14 @@ atombios_output_scaler_setup(xf86OutputPtr output, DisplayModePtr mode) - disp_data.ucEnable = ATOM_SCALER_EXPANSION; - else if (radeon_output->rmx_type == RMX_CENTER) - disp_data.ucEnable = ATOM_SCALER_CENTER; -+ else if (radeon_output->rmx_type == RMX_ASPECT) -+ disp_data.ucEnable = ATOM_SCALER_EXPANSION; - } else { - ErrorF("Not using RMX\n"); -- disp_data.ucEnable = ATOM_SCALER_DISABLE; -+ if (IS_AVIVO_VARIANT) -+ disp_data.ucEnable = ATOM_SCALER_DISABLE; -+ else -+ disp_data.ucEnable = ATOM_SCALER_CENTER; - } - - data.exec.index = GetIndexIntoMasterTable(COMMAND, EnableScaler); -@@ -1266,7 +1332,10 @@ atombios_set_output_crtc_source(xf86OutputPtr output) - case 0: - case 1: - default: -- crtc_src_param.ucCRTC = radeon_crtc->crtc_id; -+ if (IS_AVIVO_VARIANT) -+ crtc_src_param.ucCRTC = radeon_crtc->crtc_id; -+ else -+ crtc_src_param.ucCRTC = radeon_crtc->crtc_id << 2; - switch (radeon_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_TMDS1: - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1: -@@ -1408,7 +1477,7 @@ atombios_apply_output_quirks(xf86OutputPtr output, DisplayModePtr mode) - } - - /* set scaler clears this on some chips */ -- if (mode->Flags & V_INTERLACE) -+ if (IS_AVIVO_VARIANT && (mode->Flags & V_INTERLACE)) - OUTREG(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset, AVIVO_D1MODE_INTERLEAVE_EN); - } - -@@ -1419,16 +1488,20 @@ atombios_output_mode_set(xf86OutputPtr output, - { - RADEONOutputPrivatePtr radeon_output = output->driver_private; - radeon_encoder_ptr radeon_encoder = radeon_get_encoder(output); -- -+ RADEONInfoPtr info = RADEONPTR(output->scrn); - if (radeon_encoder == NULL) - return; - -- atombios_output_scaler_setup(output, mode); -+ atombios_output_overscan_setup(output, mode, adjusted_mode); -+ atombios_output_scaler_setup(output, adjusted_mode); - atombios_set_output_crtc_source(output); -- if (radeon_output->active_device & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)) -- atombios_output_yuv_setup(output, TRUE); -- else -- atombios_output_yuv_setup(output, FALSE); -+ -+ if (IS_AVIVO_VARIANT) { -+ if (radeon_output->active_device & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)) -+ atombios_output_yuv_setup(output, TRUE); -+ else -+ atombios_output_yuv_setup(output, FALSE); -+ } - - switch (radeon_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_TMDS1: -diff --git a/src/legacy_crtc.c b/src/legacy_crtc.c -index b8c5380..829b453 100644 ---- a/src/legacy_crtc.c -+++ b/src/legacy_crtc.c -@@ -714,7 +714,7 @@ legacy_crtc_dpms(xf86CrtcPtr crtc, int mode) - - - /* Define common registers for requested video mode */ --static void -+void - RADEONInitCommonRegisters(RADEONSavePtr save, RADEONInfoPtr info) - { - save->ovr_clr = 0; -@@ -745,7 +745,7 @@ RADEONInitCommonRegisters(RADEONSavePtr save, RADEONInfoPtr info) - save->bus_cntl |= RADEON_BUS_RD_DISCARD_EN; - } - --static void -+void - RADEONInitSurfaceCntl(xf86CrtcPtr crtc, RADEONSavePtr save) - { - save->surface_cntl = 0; -diff --git a/src/r600_exa.c b/src/r600_exa.c -new file mode 100644 -index 0000000..f16e040 ---- /dev/null -+++ b/src/r600_exa.c -@@ -0,0 +1,2140 @@ -+/* -+ * Copyright 2008 Advanced Micro Devices, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Author: Alex Deucher -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include "xf86.h" -+ -+#include "exa.h" -+ -+#include "radeon.h" -+#include "radeon_macros.h" -+#include "radeon_reg.h" -+#include "r600_shader.h" -+#include "r600_reg.h" -+#include "r600_state.h" -+ -+extern PixmapPtr -+RADEONGetDrawablePixmap(DrawablePtr pDrawable); -+ -+/* #define SHOW_VERTEXES */ -+ -+# define RADEON_ROP3_ZERO 0x00000000 -+# define RADEON_ROP3_DSa 0x00880000 -+# define RADEON_ROP3_SDna 0x00440000 -+# define RADEON_ROP3_S 0x00cc0000 -+# define RADEON_ROP3_DSna 0x00220000 -+# define RADEON_ROP3_D 0x00aa0000 -+# define RADEON_ROP3_DSx 0x00660000 -+# define RADEON_ROP3_DSo 0x00ee0000 -+# define RADEON_ROP3_DSon 0x00110000 -+# define RADEON_ROP3_DSxn 0x00990000 -+# define RADEON_ROP3_Dn 0x00550000 -+# define RADEON_ROP3_SDno 0x00dd0000 -+# define RADEON_ROP3_Sn 0x00330000 -+# define RADEON_ROP3_DSno 0x00bb0000 -+# define RADEON_ROP3_DSan 0x00770000 -+# define RADEON_ROP3_ONE 0x00ff0000 -+ -+uint32_t RADEON_ROP[16] = { -+ RADEON_ROP3_ZERO, /* GXclear */ -+ RADEON_ROP3_DSa, /* Gxand */ -+ RADEON_ROP3_SDna, /* GXandReverse */ -+ RADEON_ROP3_S, /* GXcopy */ -+ RADEON_ROP3_DSna, /* GXandInverted */ -+ RADEON_ROP3_D, /* GXnoop */ -+ RADEON_ROP3_DSx, /* GXxor */ -+ RADEON_ROP3_DSo, /* GXor */ -+ RADEON_ROP3_DSon, /* GXnor */ -+ RADEON_ROP3_DSxn, /* GXequiv */ -+ RADEON_ROP3_Dn, /* GXinvert */ -+ RADEON_ROP3_SDno, /* GXorReverse */ -+ RADEON_ROP3_Sn, /* GXcopyInverted */ -+ RADEON_ROP3_DSno, /* GXorInverted */ -+ RADEON_ROP3_DSan, /* GXnand */ -+ RADEON_ROP3_ONE, /* GXset */ -+}; -+ -+static void -+R600DoneSolid(PixmapPtr pPix); -+ -+static void -+R600DoneComposite(PixmapPtr pDst); -+ -+ -+static Bool -+R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ cb_config_t cb_conf; -+ shader_config_t vs_conf, ps_conf; -+ int pmask = 0; -+ uint32_t a, r, g, b; -+ float ps_alu_consts[4]; -+ -+ accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; -+ accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; -+ accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); -+ -+ /* bad pitch */ -+ if (accel_state->dst_pitch & 7) -+ return FALSE; -+ -+ /* bad offset */ -+ if (accel_state->dst_mc_addr & 0xff) -+ return FALSE; -+ -+ if (pPix->drawable.bitsPerPixel == 24) -+ return FALSE; -+ -+ CLEAR (cb_conf); -+ CLEAR (vs_conf); -+ CLEAR (ps_conf); -+ -+ /* return FALSE; */ -+ -+#ifdef SHOW_VERTEXES -+ ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height, -+ pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); -+#endif -+ -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ -+ /* Init */ -+ start_3d(pScrn, accel_state->ib); -+ -+ set_default_state(pScrn, accel_state->ib); -+ -+ /* Scissor / viewport */ -+ EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); -+ EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); -+ -+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->solid_vs_offset; -+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->solid_ps_offset; -+ accel_state->vs_size = 512; -+ accel_state->ps_size = 512; -+ -+ /* Shader */ -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->vs_size, accel_state->vs_mc_addr); -+ -+ vs_conf.shader_addr = accel_state->vs_mc_addr; -+ vs_conf.num_gprs = 2; -+ vs_conf.stack_size = 0; -+ vs_setup (pScrn, accel_state->ib, &vs_conf); -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->ps_size, accel_state->ps_mc_addr); -+ -+ ps_conf.shader_addr = accel_state->ps_mc_addr; -+ ps_conf.num_gprs = 1; -+ ps_conf.stack_size = 0; -+ ps_conf.uncached_first_inst = 1; -+ ps_conf.clamp_consts = 0; -+ ps_conf.export_mode = 2; -+ ps_setup (pScrn, accel_state->ib, &ps_conf); -+ -+ /* Render setup */ -+ if (pm & 0x000000ff) -+ pmask |= 4; /* B */ -+ if (pm & 0x0000ff00) -+ pmask |= 2; /* G */ -+ if (pm & 0x00ff0000) -+ pmask |= 1; /* R */ -+ if (pm & 0xff000000) -+ pmask |= 8; /* A */ -+ EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); -+ EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); -+ EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); -+ -+ cb_conf.id = 0; -+ cb_conf.w = accel_state->dst_pitch; -+ cb_conf.h = pPix->drawable.height; -+ cb_conf.base = accel_state->dst_mc_addr; -+ -+ if (pPix->drawable.bitsPerPixel == 8) { -+ cb_conf.format = COLOR_8; -+ cb_conf.comp_swap = 3; /* A */ -+ } else if (pPix->drawable.bitsPerPixel == 16) { -+ cb_conf.format = COLOR_5_6_5; -+ cb_conf.comp_swap = 2; /* RGB */ -+ } else { -+ cb_conf.format = COLOR_8_8_8_8; -+ cb_conf.comp_swap = 1; /* ARGB */ -+ } -+ cb_conf.source_format = 1; -+ cb_conf.blend_clamp = 1; -+ set_render_target(pScrn, accel_state->ib, &cb_conf); -+ -+ EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); -+ EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ -+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ -+ -+ /* Interpolator setup */ -+ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ -+ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); -+ EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); -+ -+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x -+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ -+ /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */ -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift)); -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); -+ /* color semantic id 0 -> GPR[0] */ -+ EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | -+ (0x03 << DEFAULT_VAL_shift) | -+ FLAT_SHADE_bit | -+ SEL_CENTROID_bit)); -+ EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0); -+ -+ /* PS alu constants */ -+ if (pPix->drawable.bitsPerPixel == 16) { -+ r = (fg >> 11) & 0x1f; -+ g = (fg >> 5) & 0x3f; -+ b = (fg >> 0) & 0x1f; -+ ps_alu_consts[0] = (float)r / 31; /* R */ -+ ps_alu_consts[1] = (float)g / 63; /* G */ -+ ps_alu_consts[2] = (float)b / 31; /* B */ -+ ps_alu_consts[3] = 1.0; /* A */ -+ } else if (pPix->drawable.bitsPerPixel == 8) { -+ a = (fg >> 0) & 0xff; -+ ps_alu_consts[0] = 0.0; /* R */ -+ ps_alu_consts[1] = 0.0; /* G */ -+ ps_alu_consts[2] = 0.0; /* B */ -+ ps_alu_consts[3] = (float)a / 255; /* A */ -+ } else { -+ a = (fg >> 24) & 0xff; -+ r = (fg >> 16) & 0xff; -+ g = (fg >> 8) & 0xff; -+ b = (fg >> 0) & 0xff; -+ ps_alu_consts[0] = (float)r / 255; /* R */ -+ ps_alu_consts[1] = (float)g / 255; /* G */ -+ ps_alu_consts[2] = (float)b / 255; /* B */ -+ ps_alu_consts[3] = (float)a / 255; /* A */ -+ } -+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, -+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); -+ -+ accel_state->vb_index = 0; -+ -+#ifdef SHOW_VERTEXES -+ ErrorF("PM: 0x%08x\n", pm); -+#endif -+ -+ return TRUE; -+} -+ -+ -+static void -+R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ float *vb; -+ -+ if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) { -+ R600DoneSolid(pPix); -+ accel_state->vb_index = 0; -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ } -+ -+ vb = (pointer)((char*)accel_state->ib->address + -+ (accel_state->ib->total / 2) + -+ accel_state->vb_index * 8); -+ -+ vb[0] = (float)x1; -+ vb[1] = (float)y1; -+ -+ vb[2] = (float)x1; -+ vb[3] = (float)y2; -+ -+ vb[4] = (float)x2; -+ vb[5] = (float)y2; -+ -+ accel_state->vb_index += 3; -+ -+} -+ -+static void -+R600DoneSolid(PixmapPtr pPix) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ draw_config_t draw_conf; -+ vtx_resource_t vtx_res; -+ -+ CLEAR (draw_conf); -+ CLEAR (vtx_res); -+ -+ if (accel_state->vb_index == 0) { -+ R600IBDiscard(pScrn, accel_state->ib); -+ return; -+ } -+ -+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + -+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); -+ accel_state->vb_size = accel_state->vb_index * 8; -+ -+ /* flush vertex cache */ -+ if ((info->ChipFamily == CHIP_FAMILY_RV610) || -+ (info->ChipFamily == CHIP_FAMILY_RV620) || -+ (info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RV710)) -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ else -+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ -+ /* Vertex buffer setup */ -+ vtx_res.id = SQ_VTX_RESOURCE_vs; -+ vtx_res.vtx_size_dw = 8 / 4; -+ vtx_res.vtx_num_entries = accel_state->vb_size / 4; -+ vtx_res.mem_req_size = 1; -+ vtx_res.vb_addr = accel_state->vb_mc_addr; -+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); -+ -+ /* Draw */ -+ draw_conf.prim_type = DI_PT_RECTLIST; -+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; -+ draw_conf.num_instances = 1; -+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; -+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; -+ -+ draw_auto(pScrn, accel_state->ib, &draw_conf); -+ -+ wait_3d_idle_clean(pScrn, accel_state->ib); -+ -+ /* sync dst surface */ -+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), -+ accel_state->dst_size, accel_state->dst_mc_addr); -+ -+ R600CPFlushIndirect(pScrn, accel_state->ib); -+} -+ -+static void -+R600DoPrepareCopy(ScrnInfoPtr pScrn, -+ int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, -+ int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp, -+ int rop, Pixel planemask) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ int pmask = 0; -+ cb_config_t cb_conf; -+ tex_resource_t tex_res; -+ tex_sampler_t tex_samp; -+ shader_config_t vs_conf, ps_conf; -+ -+ CLEAR (cb_conf); -+ CLEAR (tex_res); -+ CLEAR (tex_samp); -+ CLEAR (vs_conf); -+ CLEAR (ps_conf); -+ -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ -+ /* Init */ -+ start_3d(pScrn, accel_state->ib); -+ -+ set_default_state(pScrn, accel_state->ib); -+ -+ /* Scissor / viewport */ -+ EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); -+ EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); -+ -+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->copy_vs_offset; -+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->copy_ps_offset; -+ accel_state->vs_size = 512; -+ accel_state->ps_size = 512; -+ -+ /* Shader */ -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->vs_size, accel_state->vs_mc_addr); -+ -+ vs_conf.shader_addr = accel_state->vs_mc_addr; -+ vs_conf.num_gprs = 2; -+ vs_conf.stack_size = 0; -+ vs_setup (pScrn, accel_state->ib, &vs_conf); -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->ps_size, accel_state->ps_mc_addr); -+ -+ ps_conf.shader_addr = accel_state->ps_mc_addr; -+ ps_conf.num_gprs = 1; -+ ps_conf.stack_size = 0; -+ ps_conf.uncached_first_inst = 1; -+ ps_conf.clamp_consts = 0; -+ ps_conf.export_mode = 2; -+ ps_setup (pScrn, accel_state->ib, &ps_conf); -+ -+ accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); -+ accel_state->src_mc_addr[0] = src_offset; -+ accel_state->src_pitch[0] = src_pitch; -+ accel_state->src_width[0] = src_width; -+ accel_state->src_height[0] = src_height; -+ accel_state->src_bpp[0] = src_bpp; -+ -+ /* flush texture cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->src_size[0], accel_state->src_mc_addr[0]); -+ -+ /* Texture */ -+ tex_res.id = 0; -+ tex_res.w = src_width; -+ tex_res.h = src_height; -+ tex_res.pitch = accel_state->src_pitch[0]; -+ tex_res.depth = 0; -+ tex_res.dim = SQ_TEX_DIM_2D; -+ tex_res.base = accel_state->src_mc_addr[0]; -+ tex_res.mip_base = accel_state->src_mc_addr[0]; -+ if (src_bpp == 8) { -+ tex_res.format = FMT_8; -+ tex_res.dst_sel_x = SQ_SEL_1; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_1; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_1; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_X; /* A */ -+ } else if (src_bpp == 16) { -+ tex_res.format = FMT_5_6_5; -+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_X; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_1; /* A */ -+ } else { -+ tex_res.format = FMT_8_8_8_8; -+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_X; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_W; /* A */ -+ } -+ -+ tex_res.request_size = 1; -+ tex_res.base_level = 0; -+ tex_res.last_level = 0; -+ tex_res.perf_modulation = 0; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ tex_samp.id = 0; -+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_z = SQ_TEX_WRAP; -+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; -+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; -+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; -+ tex_samp.mip_filter = 0; /* no mipmap */ -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ -+ -+ /* Render setup */ -+ if (planemask & 0x000000ff) -+ pmask |= 4; /* B */ -+ if (planemask & 0x0000ff00) -+ pmask |= 2; /* G */ -+ if (planemask & 0x00ff0000) -+ pmask |= 1; /* R */ -+ if (planemask & 0xff000000) -+ pmask |= 8; /* A */ -+ EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); -+ EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); -+ EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); -+ -+ accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); -+ accel_state->dst_mc_addr = dst_offset; -+ accel_state->dst_pitch = dst_pitch; -+ accel_state->dst_height = dst_height; -+ accel_state->dst_bpp = dst_bpp; -+ -+ cb_conf.id = 0; -+ cb_conf.w = accel_state->dst_pitch; -+ cb_conf.h = dst_height; -+ cb_conf.base = accel_state->dst_mc_addr; -+ if (dst_bpp == 8) { -+ cb_conf.format = COLOR_8; -+ cb_conf.comp_swap = 3; /* A */ -+ } else if (dst_bpp == 16) { -+ cb_conf.format = COLOR_5_6_5; -+ cb_conf.comp_swap = 2; /* RGB */ -+ } else { -+ cb_conf.format = COLOR_8_8_8_8; -+ cb_conf.comp_swap = 1; /* ARGB */ -+ } -+ cb_conf.source_format = 1; -+ cb_conf.blend_clamp = 1; -+ set_render_target(pScrn, accel_state->ib, &cb_conf); -+ -+ EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); -+ EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ -+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ -+ -+ /* Interpolator setup */ -+ /* export tex coord from VS */ -+ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); -+ EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); -+ -+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x -+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ -+ /* input tex coord from VS */ -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); -+ /* color semantic id 0 -> GPR[0] */ -+ EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | -+ (0x01 << DEFAULT_VAL_shift) | -+ SEL_CENTROID_bit)); -+ EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); -+ -+ accel_state->vb_index = 0; -+ -+} -+ -+static void -+R600DoCopy(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ draw_config_t draw_conf; -+ vtx_resource_t vtx_res; -+ -+ CLEAR (draw_conf); -+ CLEAR (vtx_res); -+ -+ if (accel_state->vb_index == 0) { -+ R600IBDiscard(pScrn, accel_state->ib); -+ return; -+ } -+ -+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + -+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); -+ accel_state->vb_size = accel_state->vb_index * 16; -+ -+ /* flush vertex cache */ -+ if ((info->ChipFamily == CHIP_FAMILY_RV610) || -+ (info->ChipFamily == CHIP_FAMILY_RV620) || -+ (info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RV710)) -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ else -+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ -+ /* Vertex buffer setup */ -+ vtx_res.id = SQ_VTX_RESOURCE_vs; -+ vtx_res.vtx_size_dw = 16 / 4; -+ vtx_res.vtx_num_entries = accel_state->vb_size / 4; -+ vtx_res.mem_req_size = 1; -+ vtx_res.vb_addr = accel_state->vb_mc_addr; -+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); -+ -+ draw_conf.prim_type = DI_PT_RECTLIST; -+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; -+ draw_conf.num_instances = 1; -+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; -+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; -+ -+ draw_auto(pScrn, accel_state->ib, &draw_conf); -+ -+ wait_3d_idle_clean(pScrn, accel_state->ib); -+ -+ /* sync dst surface */ -+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), -+ accel_state->dst_size, accel_state->dst_mc_addr); -+ -+ R600CPFlushIndirect(pScrn, accel_state->ib); -+} -+ -+static void -+R600AppendCopyVertex(ScrnInfoPtr pScrn, -+ int srcX, int srcY, -+ int dstX, int dstY, -+ int w, int h) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ float *vb; -+ -+ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { -+ R600DoCopy(pScrn); -+ accel_state->vb_index = 0; -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ } -+ -+ vb = (pointer)((char*)accel_state->ib->address + -+ (accel_state->ib->total / 2) + -+ accel_state->vb_index * 16); -+ -+ vb[0] = (float)dstX; -+ vb[1] = (float)dstY; -+ vb[2] = (float)srcX; -+ vb[3] = (float)srcY; -+ -+ vb[4] = (float)dstX; -+ vb[5] = (float)(dstY + h); -+ vb[6] = (float)srcX; -+ vb[7] = (float)(srcY + h); -+ -+ vb[8] = (float)(dstX + w); -+ vb[9] = (float)(dstY + h); -+ vb[10] = (float)(srcX + w); -+ vb[11] = (float)(srcY + h); -+ -+ accel_state->vb_index += 3; -+} -+ -+static Bool -+R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, -+ int xdir, int ydir, -+ int rop, -+ Pixel planemask) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); -+ accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); -+ -+ accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; -+ accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -+ -+ accel_state->src_width[0] = pSrc->drawable.width; -+ accel_state->src_height[0] = pSrc->drawable.height; -+ accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel; -+ accel_state->dst_height = pDst->drawable.height; -+ accel_state->dst_bpp = pDst->drawable.bitsPerPixel; -+ -+ /* bad pitch */ -+ if (accel_state->src_pitch[0] & 7) -+ return FALSE; -+ if (accel_state->dst_pitch & 7) -+ return FALSE; -+ -+ /* bad offset */ -+ if (accel_state->src_mc_addr[0] & 0xff) -+ return FALSE; -+ if (accel_state->dst_mc_addr & 0xff) -+ return FALSE; -+ -+ if (pSrc->drawable.bitsPerPixel == 24) -+ return FALSE; -+ if (pDst->drawable.bitsPerPixel == 24) -+ return FALSE; -+ -+ /* return FALSE; */ -+ -+#ifdef SHOW_VERTEXES -+ ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height, -+ pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc)); -+ ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height, -+ pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst)); -+#endif -+ -+ accel_state->rop = rop; -+ accel_state->planemask = planemask; -+ -+ if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { -+ unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; -+ accel_state->same_surface = TRUE; -+ -+ if (accel_state->copy_area) { -+ exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); -+ accel_state->copy_area = NULL; -+ } -+ accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); -+ } else { -+ accel_state->same_surface = FALSE; -+ -+ R600DoPrepareCopy(pScrn, -+ accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, -+ accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, -+ accel_state->dst_pitch, pDst->drawable.height, -+ accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, -+ rop, planemask); -+ -+ } -+ -+ return TRUE; -+} -+ -+static Bool -+is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2) -+{ -+ if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */ -+ ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */ -+ ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */ -+ ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2))) /* BR x2, y2 */ -+ return TRUE; -+ else -+ return FALSE; -+} -+ -+static void -+R600OverlapCopy(PixmapPtr pDst, -+ int srcX, int srcY, -+ int dstX, int dstY, -+ int w, int h) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); -+ uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -+ int i, hchunk, vchunk; -+ -+ if (is_overlap(srcX, srcX + w, srcY, srcY + h, -+ dstX, dstX + w, dstY, dstY + h)) { -+ /* Calculate height/width of non-overlapping area */ -+ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX); -+ vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY); -+ -+ /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only -+ * by copying a part of the non-overlapping portion, then adjusting coordinates -+ * Choose horizontal vs vertical to minimize the total number of copy operations -+ */ -+ if (vchunk != 0 && hchunk != 0) { /* diagonal */ -+ if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */ -+ if (srcY > dstY ) { /* diagonal up */ -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); -+ R600DoCopy(pScrn); -+ -+ srcY = srcY + vchunk; -+ dstY = dstY + vchunk; -+ } else { /* diagonal down */ -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); -+ R600DoCopy(pScrn); -+ } -+ h = h - vchunk; -+ vchunk = 0; -+ } else { /* reduce to vertical */ -+ if (srcX > dstX ) { /* diagonal left */ -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); -+ R600DoCopy(pScrn); -+ -+ srcX = srcX + hchunk; -+ dstX = dstX + hchunk; -+ } else { /* diagonal right */ -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); -+ R600DoCopy(pScrn); -+ } -+ w = w - hchunk; -+ hchunk = 0; -+ } -+ } -+ -+ if (vchunk == 0) { /* left/right */ -+ if (srcX < dstX) { /* right */ -+ /* copy right to left */ -+ for (i = w; i > 0; i -= hchunk) { -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); -+ R600DoCopy(pScrn); -+ } -+ } else { /* left */ -+ /* copy left to right */ -+ for (i = 0; i < w; i += hchunk) { -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ -+ R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); -+ R600DoCopy(pScrn); -+ } -+ } -+ } else { /* up/down */ -+ if (srcY > dstY) { /* up */ -+ /* copy top to bottom */ -+ for (i = 0; i < h; i += vchunk) { -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ -+ if (vchunk > h - i) vchunk = h - i; -+ R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk); -+ R600DoCopy(pScrn); -+ } -+ } else { /* down */ -+ /* copy bottom to top */ -+ for (i = h; i > 0; i -= vchunk) { -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ -+ if (vchunk > i) vchunk = i; -+ R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk); -+ R600DoCopy(pScrn); -+ } -+ } -+ } -+ } else { -+ R600DoPrepareCopy(pScrn, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ -+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); -+ R600DoCopy(pScrn); -+ } -+} -+ -+static void -+R600Copy(PixmapPtr pDst, -+ int srcX, int srcY, -+ int dstX, int dstY, -+ int w, int h) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) -+ return; -+ -+ if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { -+ if (accel_state->copy_area) { -+ uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); -+ uint32_t orig_offset, tmp_offset; -+ -+ tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; -+ orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -+ -+ R600DoPrepareCopy(pScrn, -+ pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, -+ pitch, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); -+ R600DoCopy(pScrn); -+ R600DoPrepareCopy(pScrn, -+ pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, -+ pitch, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); -+ R600DoCopy(pScrn); -+ } else -+ R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); -+ } else if (accel_state->same_surface) { -+ uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); -+ uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -+ -+ R600DoPrepareCopy(pScrn, -+ pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, -+ pitch, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, -+ accel_state->rop, accel_state->planemask); -+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); -+ R600DoCopy(pScrn); -+ } else { -+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); -+ } -+ -+} -+ -+static void -+R600DoneCopy(PixmapPtr pDst) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ if (!accel_state->same_surface) -+ R600DoCopy(pScrn); -+ -+ if (accel_state->copy_area) { -+ exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); -+ accel_state->copy_area = NULL; -+ } -+ -+} -+ -+#define RADEON_TRACE_FALL 0 -+#define RADEON_TRACE_DRAW 0 -+ -+#if RADEON_TRACE_FALL -+#define RADEON_FALLBACK(x) \ -+do { \ -+ ErrorF("%s: ", __FUNCTION__); \ -+ ErrorF x; \ -+ return FALSE; \ -+} while (0) -+#else -+#define RADEON_FALLBACK(x) return FALSE -+#endif -+ -+#define xFixedToFloat(f) (((float) (f)) / 65536) -+ -+static inline void transformPoint(PictTransform *transform, xPointFixed *point) -+{ -+ PictVector v; -+ v.vector[0] = point->x; -+ v.vector[1] = point->y; -+ v.vector[2] = xFixed1; -+ PictureTransformPoint(transform, &v); -+ point->x = v.vector[0]; -+ point->y = v.vector[1]; -+} -+ -+struct blendinfo { -+ Bool dst_alpha; -+ Bool src_alpha; -+ uint32_t blend_cntl; -+}; -+ -+static struct blendinfo R600BlendOp[] = { -+ /* Clear */ -+ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, -+ /* Src */ -+ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, -+ /* Dst */ -+ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, -+ /* Over */ -+ {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, -+ /* OverReverse */ -+ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, -+ /* In */ -+ {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, -+ /* InReverse */ -+ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, -+ /* Out */ -+ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, -+ /* OutReverse */ -+ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, -+ /* Atop */ -+ {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, -+ /* AtopReverse */ -+ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, -+ /* Xor */ -+ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, -+ /* Add */ -+ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, -+}; -+ -+struct formatinfo { -+ unsigned int fmt; -+ uint32_t card_fmt; -+}; -+ -+static struct formatinfo R600TexFormats[] = { -+ {PICT_a8r8g8b8, FMT_8_8_8_8}, -+ {PICT_x8r8g8b8, FMT_8_8_8_8}, -+ {PICT_a8b8g8r8, FMT_8_8_8_8}, -+ {PICT_x8b8g8r8, FMT_8_8_8_8}, -+ {PICT_r5g6b5, FMT_5_6_5}, -+ {PICT_a1r5g5b5, FMT_1_5_5_5}, -+ {PICT_x1r5g5b5, FMT_1_5_5_5}, -+ {PICT_a8, FMT_8}, -+}; -+ -+static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) -+{ -+ uint32_t sblend, dblend; -+ -+ sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; -+ dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; -+ -+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat -+ * it as always 1. -+ */ -+ if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { -+ if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) -+ sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); -+ else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) -+ sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); -+ } -+ -+ /* If the source alpha is being used, then we should only be in a case where -+ * the source blend factor is 0, and the source blend value is the mask -+ * channels multiplied by the source picture's alpha. -+ */ -+ if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { -+ if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { -+ dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); -+ } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { -+ dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); -+ } -+ } -+ -+ return sblend | dblend; -+} -+ -+static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) -+{ -+ switch (pDstPicture->format) { -+ case PICT_a8r8g8b8: -+ case PICT_x8r8g8b8: -+ *dst_format = COLOR_8_8_8_8; -+ break; -+ case PICT_r5g6b5: -+ *dst_format = COLOR_5_6_5; -+ break; -+ case PICT_a1r5g5b5: -+ case PICT_x1r5g5b5: -+ *dst_format = COLOR_1_5_5_5; -+ break; -+ case PICT_a8: -+ *dst_format = COLOR_8; -+ break; -+ default: -+ RADEON_FALLBACK(("Unsupported dest format 0x%x\n", -+ (int)pDstPicture->format)); -+ } -+ return TRUE; -+} -+ -+static Bool R600CheckCompositeTexture(PicturePtr pPict, -+ PicturePtr pDstPict, -+ int op, -+ int unit) -+{ -+ int w = pPict->pDrawable->width; -+ int h = pPict->pDrawable->height; -+ unsigned int i; -+ int max_tex_w, max_tex_h; -+ -+ max_tex_w = 8192; -+ max_tex_h = 8192; -+ -+ if ((w > max_tex_w) || (h > max_tex_h)) -+ RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); -+ -+ for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { -+ if (R600TexFormats[i].fmt == pPict->format) -+ break; -+ } -+ if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) -+ RADEON_FALLBACK(("Unsupported picture format 0x%x\n", -+ (int)pPict->format)); -+ -+ if (pPict->filter != PictFilterNearest && -+ pPict->filter != PictFilterBilinear) -+ RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); -+ -+ /* for REPEAT_NONE, Render semantics are that sampling outside the source -+ * picture results in alpha=0 pixels. We can implement this with a border color -+ * *if* our source texture has an alpha channel, otherwise we need to fall -+ * back. If we're not transformed then we hope that upper layers have clipped -+ * rendering to the bounds of the source drawable, in which case it doesn't -+ * matter. I have not, however, verified that the X server always does such -+ * clipping. -+ */ -+ /* FIXME R6xx */ -+ if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) { -+ if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) -+ RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); -+ } -+ -+ return TRUE; -+} -+ -+static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, -+ int unit) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ int w = pPict->pDrawable->width; -+ int h = pPict->pDrawable->height; -+ unsigned int i; -+ tex_resource_t tex_res; -+ tex_sampler_t tex_samp; -+ -+ CLEAR (tex_res); -+ CLEAR (tex_samp); -+ -+ accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; -+ accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); -+ accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height; -+ -+ if (accel_state->src_pitch[1] & 7) -+ RADEON_FALLBACK(("Bad pitch %d 0x%x\n", (int)accel_state->src_pitch[unit], unit)); -+ -+ if (accel_state->src_mc_addr[1] & 0xff) -+ RADEON_FALLBACK(("Bad offset %d 0x%x\n", (int)accel_state->src_mc_addr[unit], unit)); -+ -+ for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { -+ if (R600TexFormats[i].fmt == pPict->format) -+ break; -+ } -+ -+ accel_state->texW[unit] = w; -+ accel_state->texH[unit] = h; -+ -+ /* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */ -+ -+ /* flush texture cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->src_size[unit], accel_state->src_mc_addr[unit]); -+ -+ /* Texture */ -+ tex_res.id = unit; -+ tex_res.w = w; -+ tex_res.h = h; -+ tex_res.pitch = accel_state->src_pitch[unit]; -+ tex_res.depth = 0; -+ tex_res.dim = SQ_TEX_DIM_2D; -+ tex_res.base = accel_state->src_mc_addr[unit]; -+ tex_res.mip_base = accel_state->src_mc_addr[unit]; -+ tex_res.format = R600TexFormats[i].card_fmt; -+ tex_res.request_size = 1; -+ -+ /* component swizzles */ -+ switch (pPict->format) { -+ case PICT_a1r5g5b5: -+ case PICT_a8r8g8b8: -+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_X; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_W; /* A */ -+ break; -+ case PICT_a8b8g8r8: -+ tex_res.dst_sel_x = SQ_SEL_X; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_Z; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_W; /* A */ -+ break; -+ case PICT_x8b8g8r8: -+ tex_res.dst_sel_x = SQ_SEL_X; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_Z; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_1; /* A */ -+ break; -+ case PICT_x1r5g5b5: -+ case PICT_x8r8g8b8: -+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_X; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_1; /* A */ -+ break; -+ case PICT_r5g6b5: -+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_X; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_1; /* A */ -+ break; -+ case PICT_a8: -+ tex_res.dst_sel_x = SQ_SEL_0; /* R */ -+ tex_res.dst_sel_y = SQ_SEL_0; /* G */ -+ tex_res.dst_sel_z = SQ_SEL_0; /* B */ -+ tex_res.dst_sel_w = SQ_SEL_X; /* A */ -+ break; -+ default: -+ RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); -+ } -+ -+ tex_res.base_level = 0; -+ tex_res.last_level = 0; -+ tex_res.perf_modulation = 0; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ tex_samp.id = unit; -+ tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; -+ -+ if (pPict->repeat) { -+ switch (pPict->repeatType) { -+ case RepeatNormal: -+ tex_samp.clamp_x = SQ_TEX_WRAP; -+ tex_samp.clamp_y = SQ_TEX_WRAP; -+ break; -+ case RepeatPad: -+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; -+ break; -+ case RepeatReflect: -+ tex_samp.clamp_x = SQ_TEX_MIRROR; -+ tex_samp.clamp_y = SQ_TEX_MIRROR; -+ break; -+ case RepeatNone: -+ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; -+ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; -+ break; -+ default: -+ RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType)); -+ } -+ } else { -+ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; -+ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; -+ } -+ -+ switch (pPict->filter) { -+ case PictFilterNearest: -+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; -+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; -+ break; -+ case PictFilterBilinear: -+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; -+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; -+ break; -+ default: -+ RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); -+ } -+ -+ tex_samp.clamp_z = SQ_TEX_WRAP; -+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; -+ tex_samp.mip_filter = 0; /* no mipmap */ -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ -+ if (pPict->transform != 0) { -+ accel_state->is_transform[unit] = TRUE; -+ accel_state->transform[unit] = pPict->transform; -+ } else -+ accel_state->is_transform[unit] = FALSE; -+ -+ return TRUE; -+} -+ -+static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, -+ PicturePtr pDstPicture) -+{ -+ uint32_t tmp1; -+ PixmapPtr pSrcPixmap, pDstPixmap; -+ int max_tex_w, max_tex_h, max_dst_w, max_dst_h; -+ -+ /* Check for unsupported compositing operations. */ -+ if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) -+ RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); -+ -+ pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); -+ -+ max_tex_w = 8192; -+ max_tex_h = 8192; -+ max_dst_w = 8192; -+ max_dst_h = 8192; -+ -+ if (pSrcPixmap->drawable.width >= max_tex_w || -+ pSrcPixmap->drawable.height >= max_tex_h) { -+ RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", -+ pSrcPixmap->drawable.width, -+ pSrcPixmap->drawable.height)); -+ } -+ -+ pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); -+ -+ if (pDstPixmap->drawable.width >= max_dst_w || -+ pDstPixmap->drawable.height >= max_dst_h) { -+ RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", -+ pDstPixmap->drawable.width, -+ pDstPixmap->drawable.height)); -+ } -+ -+ if (pMaskPicture) { -+ PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); -+ -+ if (pMaskPixmap->drawable.width >= max_tex_w || -+ pMaskPixmap->drawable.height >= max_tex_h) { -+ RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", -+ pMaskPixmap->drawable.width, -+ pMaskPixmap->drawable.height)); -+ } -+ -+ if (pMaskPicture->componentAlpha) { -+ /* Check if it's component alpha that relies on a source alpha and -+ * on the source value. We can only get one of those into the -+ * single source value that we get to blend with. -+ */ -+ if (R600BlendOp[op].src_alpha && -+ (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != -+ (BLEND_ZERO << COLOR_SRCBLEND_shift)) { -+ RADEON_FALLBACK(("Component alpha not supported with source " -+ "alpha and source value blending.\n")); -+ } -+ } -+ -+ if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) -+ return FALSE; -+ } -+ -+ if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) -+ return FALSE; -+ -+ if (!R600GetDestFormat(pDstPicture, &tmp1)) -+ return FALSE; -+ -+ return TRUE; -+ -+} -+ -+static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, -+ PicturePtr pMaskPicture, PicturePtr pDstPicture, -+ PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ uint32_t blendcntl, dst_format; -+ cb_config_t cb_conf; -+ shader_config_t vs_conf, ps_conf; -+ uint32_t ps[24]; -+ -+ /* return FALSE; */ -+ -+ if (pMask) -+ accel_state->has_mask = TRUE; -+ else -+ accel_state->has_mask = FALSE; -+ -+ accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -+ accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); -+ accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; -+ -+ if (accel_state->dst_pitch & 7) -+ RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch)); -+ -+ if (accel_state->dst_mc_addr & 0xff) -+ RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr)); -+ -+ if (!R600GetDestFormat(pDstPicture, &dst_format)) -+ return FALSE; -+ -+ if (pMask) { -+ int src_a, src_r, src_g, src_b; -+ int mask_a, mask_r, mask_g, mask_b; -+ -+ /* setup pixel shader */ -+ if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) { -+ src_r = SQ_SEL_0; -+ src_g = SQ_SEL_0; -+ src_b = SQ_SEL_0; -+ } else { -+ src_r = SQ_SEL_X; -+ src_g = SQ_SEL_Y; -+ src_b = SQ_SEL_Z; -+ } -+ -+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { -+ src_a = SQ_SEL_1; -+ } else { -+ src_a = SQ_SEL_W; -+ } -+ -+ if (pMaskPicture->componentAlpha) { -+ if (R600BlendOp[op].src_alpha) { -+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { -+ src_r = SQ_SEL_1; -+ src_g = SQ_SEL_1; -+ src_b = SQ_SEL_1; -+ src_a = SQ_SEL_1; -+ } else { -+ src_r = SQ_SEL_W; -+ src_g = SQ_SEL_W; -+ src_b = SQ_SEL_W; -+ src_a = SQ_SEL_W; -+ } -+ -+ mask_r = SQ_SEL_X; -+ mask_g = SQ_SEL_Y; -+ mask_b = SQ_SEL_Z; -+ -+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { -+ mask_a = SQ_SEL_1; -+ } else { -+ mask_a = SQ_SEL_W; -+ } -+ } else { -+ src_r = SQ_SEL_X; -+ src_g = SQ_SEL_Y; -+ src_b = SQ_SEL_Z; -+ -+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { -+ src_a = SQ_SEL_1; -+ } else { -+ src_a = SQ_SEL_W; -+ } -+ -+ mask_r = SQ_SEL_X; -+ mask_g = SQ_SEL_Y; -+ mask_b = SQ_SEL_Z; -+ -+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { -+ mask_a = SQ_SEL_1; -+ } else { -+ mask_a = SQ_SEL_W; -+ } -+ } -+ } else { -+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { -+ mask_r = SQ_SEL_1; -+ mask_g = SQ_SEL_1; -+ mask_b = SQ_SEL_1; -+ } else { -+ mask_r = SQ_SEL_W; -+ mask_g = SQ_SEL_W; -+ mask_b = SQ_SEL_W; -+ } -+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { -+ mask_a = SQ_SEL_1; -+ } else { -+ mask_a = SQ_SEL_W; -+ } -+ } -+ -+ R600_comp_mask_ps(info->ChipFamily, ps, -+ src_a, src_r, src_g, src_b, -+ mask_a, mask_r, mask_g, mask_b); -+ -+ } else { -+ int src_a, src_r, src_g, src_b; -+ /* setup pixel shader */ -+ if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) { -+ src_r = SQ_SEL_0; -+ src_g = SQ_SEL_0; -+ src_b = SQ_SEL_0; -+ } else { -+ src_r = SQ_SEL_X; -+ src_g = SQ_SEL_Y; -+ src_b = SQ_SEL_Z; -+ } -+ -+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { -+ src_a = SQ_SEL_1; -+ } else { -+ src_a = SQ_SEL_W; -+ } -+ -+ R600_comp_ps(info->ChipFamily, ps, -+ src_a, src_r, src_g, src_b); -+ -+ } -+ -+ CLEAR (cb_conf); -+ CLEAR (vs_conf); -+ CLEAR (ps_conf); -+ -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ -+ /* Init */ -+ start_3d(pScrn, accel_state->ib); -+ -+ set_default_state(pScrn, accel_state->ib); -+ -+ /* Scissor / viewport */ -+ EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); -+ EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); -+ -+ if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { -+ R600IBDiscard(pScrn, accel_state->ib); -+ return FALSE; -+ } -+ -+ if (pMask) { -+ if (!R600TextureSetup(pMaskPicture, pMask, 1)) { -+ R600IBDiscard(pScrn, accel_state->ib); -+ return FALSE; -+ } -+ } else -+ accel_state->is_transform[1] = FALSE; -+ -+ /* VS bool constant */ -+ if (pMask) -+ set_bool_const(pScrn, accel_state->ib, 1, 1); -+ else -+ set_bool_const(pScrn, accel_state->ib, 1, 0); -+ -+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->comp_vs_offset; -+ -+ memcpy ((char *)accel_state->ib->address + (accel_state->ib->total / 2) - 256, ps, sizeof(ps)); -+ accel_state->ps_mc_addr = info->gartLocation + info->dri->bufStart + -+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2) - 256; -+ -+ accel_state->vs_size = 512; -+ accel_state->ps_size = 512; -+ -+ /* Shader */ -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->vs_size, accel_state->vs_mc_addr); -+ -+ vs_conf.shader_addr = accel_state->vs_mc_addr; -+ vs_conf.num_gprs = 3; -+ vs_conf.stack_size = 1; -+ vs_setup (pScrn, accel_state->ib, &vs_conf); -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->ps_size, accel_state->ps_mc_addr); -+ -+ ps_conf.shader_addr = accel_state->ps_mc_addr; -+ ps_conf.num_gprs = 3; -+ ps_conf.stack_size = 0; -+ ps_conf.uncached_first_inst = 1; -+ ps_conf.clamp_consts = 0; -+ ps_conf.export_mode = 2; -+ ps_setup (pScrn, accel_state->ib, &ps_conf); -+ -+ EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); -+ EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); -+ -+ blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); -+ -+ if (info->ChipFamily == CHIP_FAMILY_R600) { -+ /* no per-MRT blend on R600 */ -+ EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); -+ EREG(accel_state->ib, CB_BLEND_CONTROL, blendcntl); -+ } else { -+ EREG(accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] | -+ (1 << TARGET_BLEND_ENABLE_shift) | -+ PER_MRT_BLEND_bit)); -+ EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); -+ } -+ -+ cb_conf.id = 0; -+ cb_conf.w = accel_state->dst_pitch; -+ cb_conf.h = pDst->drawable.height; -+ cb_conf.base = accel_state->dst_mc_addr; -+ cb_conf.format = dst_format; -+ -+ switch (pDstPicture->format) { -+ case PICT_a8r8g8b8: -+ case PICT_x8r8g8b8: -+ case PICT_a1r5g5b5: -+ case PICT_x1r5g5b5: -+ default: -+ cb_conf.comp_swap = 1; /* ARGB */ -+ break; -+ case PICT_r5g6b5: -+ cb_conf.comp_swap = 2; /* RGB */ -+ break; -+ case PICT_a8: -+ cb_conf.comp_swap = 3; /* A */ -+ break; -+ } -+ cb_conf.source_format = 1; -+ cb_conf.blend_clamp = 1; -+ set_render_target(pScrn, accel_state->ib, &cb_conf); -+ -+ EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); -+ EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ -+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ -+ -+ /* Interpolator setup */ -+ if (pMask) { -+ /* export 2 tex coords from VS */ -+ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); -+ /* src = semantic id 0; mask = semantic id 1 */ -+ EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | -+ (1 << SEMANTIC_1_shift))); -+ /* input 2 tex coords from VS */ -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift)); -+ } else { -+ /* export 1 tex coords from VS */ -+ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); -+ /* src = semantic id 0 */ -+ EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); -+ /* input 1 tex coords from VS */ -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); -+ } -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); -+ /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ -+ EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | -+ (0x01 << DEFAULT_VAL_shift) | -+ SEL_CENTROID_bit)); -+ /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ -+ EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) | -+ (0x01 << DEFAULT_VAL_shift) | -+ SEL_CENTROID_bit)); -+ EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); -+ -+ accel_state->vb_index = 0; -+ -+ return TRUE; -+} -+ -+static void R600Composite(PixmapPtr pDst, -+ int srcX, int srcY, -+ int maskX, int maskY, -+ int dstX, int dstY, -+ int w, int h) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ float *vb; -+ xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; -+ -+ /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", -+ srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ -+ -+ srcTopLeft.x = IntToxFixed(srcX); -+ srcTopLeft.y = IntToxFixed(srcY); -+ srcTopRight.x = IntToxFixed(srcX + w); -+ srcTopRight.y = IntToxFixed(srcY); -+ srcBottomLeft.x = IntToxFixed(srcX); -+ srcBottomLeft.y = IntToxFixed(srcY + h); -+ srcBottomRight.x = IntToxFixed(srcX + w); -+ srcBottomRight.y = IntToxFixed(srcY + h); -+ -+ /* XXX do transform in vertex shader */ -+ if (accel_state->is_transform[0]) { -+ transformPoint(accel_state->transform[0], &srcTopLeft); -+ transformPoint(accel_state->transform[0], &srcTopRight); -+ transformPoint(accel_state->transform[0], &srcBottomLeft); -+ transformPoint(accel_state->transform[0], &srcBottomRight); -+ } -+ -+ if (accel_state->has_mask) { -+ xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; -+ -+ if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) { -+ R600DoneComposite(pDst); -+ accel_state->vb_index = 0; -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ } -+ -+ vb = (pointer)((char*)accel_state->ib->address + -+ (accel_state->ib->total / 2) + -+ accel_state->vb_index * 24); -+ -+ maskTopLeft.x = IntToxFixed(maskX); -+ maskTopLeft.y = IntToxFixed(maskY); -+ maskTopRight.x = IntToxFixed(maskX + w); -+ maskTopRight.y = IntToxFixed(maskY); -+ maskBottomLeft.x = IntToxFixed(maskX); -+ maskBottomLeft.y = IntToxFixed(maskY + h); -+ maskBottomRight.x = IntToxFixed(maskX + w); -+ maskBottomRight.y = IntToxFixed(maskY + h); -+ -+ if (accel_state->is_transform[1]) { -+ transformPoint(accel_state->transform[1], &maskTopLeft); -+ transformPoint(accel_state->transform[1], &maskTopRight); -+ transformPoint(accel_state->transform[1], &maskBottomLeft); -+ transformPoint(accel_state->transform[1], &maskBottomRight); -+ } -+ -+ vb[0] = (float)dstX; -+ vb[1] = (float)dstY; -+ vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; -+ vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; -+ vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1]; -+ vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1]; -+ -+ vb[6] = (float)dstX; -+ vb[7] = (float)(dstY + h); -+ vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; -+ vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; -+ vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1]; -+ vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1]; -+ -+ vb[12] = (float)(dstX + w); -+ vb[13] = (float)(dstY + h); -+ vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; -+ vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; -+ vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1]; -+ vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1]; -+ -+ } else { -+ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { -+ R600DoneComposite(pDst); -+ accel_state->vb_index = 0; -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ } -+ -+ vb = (pointer)((char*)accel_state->ib->address + -+ (accel_state->ib->total / 2) + -+ accel_state->vb_index * 16); -+ -+ vb[0] = (float)dstX; -+ vb[1] = (float)dstY; -+ vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; -+ vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; -+ -+ vb[4] = (float)dstX; -+ vb[5] = (float)(dstY + h); -+ vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; -+ vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; -+ -+ vb[8] = (float)(dstX + w); -+ vb[9] = (float)(dstY + h); -+ vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; -+ vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; -+ } -+ -+ accel_state->vb_index += 3; -+ -+} -+ -+static void R600DoneComposite(PixmapPtr pDst) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ draw_config_t draw_conf; -+ vtx_resource_t vtx_res; -+ -+ CLEAR (draw_conf); -+ CLEAR (vtx_res); -+ -+ if (accel_state->vb_index == 0) { -+ R600IBDiscard(pScrn, accel_state->ib); -+ return; -+ } -+ -+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + -+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); -+ -+ -+ /* Vertex buffer setup */ -+ if (accel_state->has_mask) { -+ accel_state->vb_size = accel_state->vb_index * 24; -+ vtx_res.id = SQ_VTX_RESOURCE_vs; -+ vtx_res.vtx_size_dw = 24 / 4; -+ vtx_res.vtx_num_entries = accel_state->vb_size / 4; -+ vtx_res.mem_req_size = 1; -+ vtx_res.vb_addr = accel_state->vb_mc_addr; -+ } else { -+ accel_state->vb_size = accel_state->vb_index * 16; -+ vtx_res.id = SQ_VTX_RESOURCE_vs; -+ vtx_res.vtx_size_dw = 16 / 4; -+ vtx_res.vtx_num_entries = accel_state->vb_size / 4; -+ vtx_res.mem_req_size = 1; -+ vtx_res.vb_addr = accel_state->vb_mc_addr; -+ } -+ /* flush vertex cache */ -+ if ((info->ChipFamily == CHIP_FAMILY_RV610) || -+ (info->ChipFamily == CHIP_FAMILY_RV620) || -+ (info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RV710)) -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ else -+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ -+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); -+ -+ draw_conf.prim_type = DI_PT_RECTLIST; -+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; -+ draw_conf.num_instances = 1; -+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; -+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; -+ -+ draw_auto(pScrn, accel_state->ib, &draw_conf); -+ -+ wait_3d_idle_clean(pScrn, accel_state->ib); -+ -+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), -+ accel_state->dst_size, accel_state->dst_mc_addr); -+ -+ R600CPFlushIndirect(pScrn, accel_state->ib); -+} -+ -+Bool -+R600CopyToVRAM(ScrnInfoPtr pScrn, -+ char *src, int src_pitch, -+ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, -+ int x, int y, int w, int h) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ uint32_t scratch_mc_addr; -+ int wpass = w * (bpp/8); -+ int scratch_pitch_bytes = (wpass + 255) & ~255; -+ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); -+ int scratch_offset = 0, hpass, temph; -+ char *dst; -+ drmBufPtr scratch; -+ -+ if (dst_pitch & 7) -+ return FALSE; -+ -+ if (dst_mc_addr & 0xff) -+ return FALSE; -+ -+ scratch = RADEONCPGetBuffer(pScrn); -+ if (scratch == NULL) -+ return FALSE; -+ -+ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); -+ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); -+ dst = (char *)scratch->address; -+ -+ /* memcopy from sys to scratch */ -+ while (temph--) { -+ memcpy (dst, src, wpass); -+ src += src_pitch; -+ dst += scratch_pitch_bytes; -+ } -+ -+ while (h) { -+ uint32_t offset = scratch_mc_addr + scratch_offset; -+ int oldhpass = hpass; -+ h -= oldhpass; -+ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); -+ -+ if (hpass) { -+ scratch_offset = scratch->total/2 - scratch_offset; -+ dst = (char *)scratch->address + scratch_offset; -+ /* wait for the engine to be idle */ -+ RADEONWaitForIdleCP(pScrn); -+ //memcopy from sys to scratch -+ while (temph--) { -+ memcpy (dst, src, wpass); -+ src += src_pitch; -+ dst += scratch_pitch_bytes; -+ } -+ } -+ /* blit from scratch to vram */ -+ R600DoPrepareCopy(pScrn, -+ scratch_pitch, w, oldhpass, offset, bpp, -+ dst_pitch, dst_height, dst_mc_addr, bpp, -+ 3, 0xffffffff); -+ R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); -+ R600DoCopy(pScrn); -+ y += oldhpass; -+ } -+ -+ R600IBDiscard(pScrn, scratch); -+ -+ return TRUE; -+} -+ -+static Bool -+R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, -+ char *src, int src_pitch) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); -+ uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -+ uint32_t dst_height = pDst->drawable.height; -+ int bpp = pDst->drawable.bitsPerPixel; -+ -+ return R600CopyToVRAM(pScrn, -+ src, src_pitch, -+ dst_pitch, dst_mc_addr, dst_height, bpp, -+ x, y, w, h); -+} -+ -+static Bool -+R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, -+ char *dst, int dst_pitch) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); -+ uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; -+ uint32_t src_width = pSrc->drawable.width; -+ uint32_t src_height = pSrc->drawable.height; -+ int bpp = pSrc->drawable.bitsPerPixel; -+ uint32_t scratch_mc_addr; -+ int scratch_pitch_bytes = (dst_pitch + 255) & ~255; -+ int scratch_offset = 0, hpass; -+ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); -+ int wpass = w * (bpp/8); -+ drmBufPtr scratch; -+ -+ if (src_pitch & 7) -+ return FALSE; -+ -+ scratch = RADEONCPGetBuffer(pScrn); -+ if (scratch == NULL) -+ return FALSE; -+ -+ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); -+ hpass = min(h, scratch->total/2 / scratch_pitch_bytes); -+ -+ /* blit from vram to scratch */ -+ R600DoPrepareCopy(pScrn, -+ src_pitch, src_width, src_height, src_mc_addr, bpp, -+ scratch_pitch, hpass, scratch_mc_addr, bpp, -+ 3, 0xffffffff); -+ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); -+ R600DoCopy(pScrn); -+ -+ while (h) { -+ char *src = (char *)scratch->address + scratch_offset; -+ int oldhpass = hpass; -+ h -= oldhpass; -+ y += oldhpass; -+ hpass = min(h, scratch->total/2 / scratch_pitch_bytes); -+ -+ if (hpass) { -+ scratch_offset = scratch->total/2 - scratch_offset; -+ /* blit from vram to scratch */ -+ R600DoPrepareCopy(pScrn, -+ src_pitch, src_width, src_height, src_mc_addr, bpp, -+ scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp, -+ 3, 0xffffffff); -+ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); -+ R600DoCopy(pScrn); -+ } -+ -+ /* wait for the engine to be idle */ -+ RADEONWaitForIdleCP(pScrn); -+ /* memcopy from scratch to sys */ -+ while (oldhpass--) { -+ memcpy (dst, src, wpass); -+ dst += dst_pitch; -+ src += scratch_pitch_bytes; -+ } -+ } -+ -+ R600IBDiscard(pScrn, scratch); -+ -+ return TRUE; -+ -+} -+ -+static int -+R600MarkSync(ScreenPtr pScreen) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ return ++accel_state->exaSyncMarker; -+ -+} -+ -+static void -+R600Sync(ScreenPtr pScreen, int marker) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ if (accel_state->exaMarkerSynced != marker) { -+ RADEONWaitForIdleCP(pScrn); -+ accel_state->exaMarkerSynced = marker; -+ } -+ -+} -+ -+static Bool -+R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ /* 512 bytes per shader for now */ -+ int size = 512 * 9; -+ -+ accel_state->shaders = NULL; -+ -+ accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, -+ TRUE, NULL, NULL); -+ -+ if (accel_state->shaders == NULL) -+ return FALSE; -+ return TRUE; -+} -+ -+Bool -+R600LoadShaders(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ RADEONChipFamily ChipSet = info->ChipFamily; -+ uint32_t *shader; -+ -+ shader = (pointer)((char *)info->FB + accel_state->shaders->offset); -+ -+ /* solid vs --------------------------------------- */ -+ accel_state->solid_vs_offset = 0; -+ R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); -+ -+ /* solid ps --------------------------------------- */ -+ accel_state->solid_ps_offset = 512; -+ R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); -+ -+ /* copy vs --------------------------------------- */ -+ accel_state->copy_vs_offset = 1024; -+ R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); -+ -+ /* copy ps --------------------------------------- */ -+ accel_state->copy_ps_offset = 1536; -+ R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); -+ -+ /* comp vs --------------------------------------- */ -+ accel_state->comp_vs_offset = 2048; -+ R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); -+ -+ /* comp ps --------------------------------------- */ -+ accel_state->comp_ps_offset = 2560; -+ /* not yet */ -+ -+ /* comp mask ps --------------------------------------- */ -+ accel_state->comp_mask_ps_offset = 3072; -+ /* not yet */ -+ -+ /* xv vs --------------------------------------- */ -+ accel_state->xv_vs_offset = 3584; -+ R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); -+ -+ /* xv ps --------------------------------------- */ -+ accel_state->xv_ps_offset = 4096; -+ R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); -+ -+ return TRUE; -+} -+ -+static Bool -+R600PrepareAccess(PixmapPtr pPix, int index) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ -+ /* flush HDP read/write caches */ -+ OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); -+ -+ return TRUE; -+} -+ -+static void -+R600FinishAccess(PixmapPtr pPix, int index) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ -+ /* flush HDP read/write caches */ -+ OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); -+ -+} -+ -+ -+Bool -+R600DrawInit(ScreenPtr pScreen) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->accel_state->exa == NULL) { -+ xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); -+ return FALSE; -+ } -+ -+ info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; -+ info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; -+ -+ info->accel_state->exa->PrepareSolid = R600PrepareSolid; -+ info->accel_state->exa->Solid = R600Solid; -+ info->accel_state->exa->DoneSolid = R600DoneSolid; -+ -+ info->accel_state->exa->PrepareCopy = R600PrepareCopy; -+ info->accel_state->exa->Copy = R600Copy; -+ info->accel_state->exa->DoneCopy = R600DoneCopy; -+ -+ info->accel_state->exa->MarkSync = R600MarkSync; -+ info->accel_state->exa->WaitMarker = R600Sync; -+ -+ info->accel_state->exa->PrepareAccess = R600PrepareAccess; -+ info->accel_state->exa->FinishAccess = R600FinishAccess; -+ -+ info->accel_state->exa->UploadToScreen = R600UploadToScreen; -+ info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; -+ -+ info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; -+#ifdef EXA_SUPPORTS_PREPARE_AUX -+ info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; -+#endif -+ info->accel_state->exa->pixmapOffsetAlign = 256; -+ info->accel_state->exa->pixmapPitchAlign = 256; -+ -+ info->accel_state->exa->CheckComposite = R600CheckComposite; -+ info->accel_state->exa->PrepareComposite = R600PrepareComposite; -+ info->accel_state->exa->Composite = R600Composite; -+ info->accel_state->exa->DoneComposite = R600DoneComposite; -+ -+#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); -+ -+ info->accel_state->exa->maxPitchBytes = 16320; -+ info->accel_state->exa->maxX = 8192; -+#else -+ info->accel_state->exa->maxX = 16320 / 4; -+#endif -+ info->accel_state->exa->maxY = 8192; -+ -+ if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); -+ info->accel_state->vsync = TRUE; -+ } else -+ info->accel_state->vsync = FALSE; -+ -+ if (!exaDriverInit(pScreen, info->accel_state->exa)) { -+ xfree(info->accel_state->exa); -+ return FALSE; -+ } -+ -+ if (!info->gartLocation) -+ return FALSE; -+ -+ info->accel_state->XInited3D = FALSE; -+ info->accel_state->copy_area = NULL; -+ -+ if (!R600AllocShaders(pScrn, pScreen)) -+ return FALSE; -+ -+ if (!R600LoadShaders(pScrn)) -+ return FALSE; -+ -+ exaMarkSync(pScreen); -+ -+ return TRUE; -+ -+} -+ -diff --git a/src/r600_reg.h b/src/r600_reg.h -new file mode 100644 -index 0000000..9036e2a ---- /dev/null -+++ b/src/r600_reg.h -@@ -0,0 +1,132 @@ -+/* -+ * RadeonHD R6xx, R7xx Register documentation -+ * -+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. -+ * Copyright (C) 2008-2009 Matthias Hopf -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included -+ * in all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef _R600_REG_H_ -+#define _R600_REG_H_ -+ -+/* -+ * Register definitions -+ */ -+ -+#include "r600_reg_auto_r6xx.h" -+#include "r600_reg_r6xx.h" -+#include "r600_reg_r7xx.h" -+ -+ -+/* SET_*_REG offsets + ends */ -+enum { -+ SET_CONFIG_REG_offset = 0x00008000, -+ SET_CONFIG_REG_end = 0x0000ac00, -+ SET_CONTEXT_REG_offset = 0x00028000, -+ SET_CONTEXT_REG_end = 0x00029000, -+ SET_ALU_CONST_offset = 0x00030000, -+ SET_ALU_CONST_end = 0x00032000, -+ SET_RESOURCE_offset = 0x00038000, -+ SET_RESOURCE_end = 0x0003c000, -+ SET_SAMPLER_offset = 0x0003c000, -+ SET_SAMPLER_end = 0x0003cff0, -+ SET_CTL_CONST_offset = 0x0003cff0, -+ SET_CTL_CONST_end = 0x0003e200, -+ SET_LOOP_CONST_offset = 0x0003e200, -+ SET_LOOP_CONST_end = 0x0003e380, -+ SET_BOOL_CONST_offset = 0x0003e380, -+ SET_BOOL_CONST_end = 0x00040000, -+} ; -+ -+/* packet3 IT_SURFACE_BASE_UPDATE bits */ -+enum { -+ DEPTH_BASE = (1 << 0), -+ COLOR0_BASE = (1 << 1), -+ COLOR1_BASE = (1 << 2), -+ COLOR2_BASE = (1 << 3), -+ COLOR3_BASE = (1 << 4), -+ COLOR4_BASE = (1 << 5), -+ COLOR5_BASE = (1 << 6), -+ COLOR6_BASE = (1 << 7), -+ COLOR7_BASE = (1 << 8), -+ STRMOUT_BASE0 = (1 << 9), -+ STRMOUT_BASE1 = (1 << 10), -+ STRMOUT_BASE2 = (1 << 11), -+ STRMOUT_BASE3 = (1 << 12), -+ COHER_BASE0 = (1 << 13), -+ COHER_BASE1 = (1 << 14), -+}; -+ -+/* Packet3 commands */ -+enum { -+ IT_NOP = 0x10, -+ IT_INDIRECT_BUFFER_END = 0x17, -+ IT_SET_PREDICATION = 0x20, -+ IT_REG_RMW = 0x21, -+ IT_COND_EXEC = 0x22, -+ IT_PRED_EXEC = 0x23, -+ IT_START_3D_CMDBUF = 0x24, -+ IT_DRAW_INDEX_2 = 0x27, -+ IT_CONTEXT_CONTROL = 0x28, -+ IT_DRAW_INDEX_IMMD_BE = 0x29, -+ IT_INDEX_TYPE = 0x2A, -+ IT_DRAW_INDEX = 0x2B, -+ IT_DRAW_INDEX_AUTO = 0x2D, -+ IT_DRAW_INDEX_IMMD = 0x2E, -+ IT_NUM_INSTANCES = 0x2F, -+ IT_STRMOUT_BUFFER_UPDATE = 0x34, -+ IT_INDIRECT_BUFFER_MP = 0x38, -+ IT_MEM_SEMAPHORE = 0x39, -+ IT_MPEG_INDEX = 0x3A, -+ IT_WAIT_REG_MEM = 0x3C, -+ IT_MEM_WRITE = 0x3D, -+ IT_INDIRECT_BUFFER = 0x32, -+ IT_CP_INTERRUPT = 0x40, -+ IT_SURFACE_SYNC = 0x43, -+ IT_ME_INITIALIZE = 0x44, -+ IT_COND_WRITE = 0x45, -+ IT_EVENT_WRITE = 0x46, -+ IT_EVENT_WRITE_EOP = 0x47, -+ IT_ONE_REG_WRITE = 0x57, -+ IT_SET_CONFIG_REG = 0x68, -+ IT_SET_CONTEXT_REG = 0x69, -+ IT_SET_ALU_CONST = 0x6A, -+ IT_SET_BOOL_CONST = 0x6B, -+ IT_SET_LOOP_CONST = 0x6C, -+ IT_SET_RESOURCE = 0x6D, -+ IT_SET_SAMPLER = 0x6E, -+ IT_SET_CTL_CONST = 0x6F, -+ IT_SURFACE_BASE_UPDATE = 0x73, -+} ; -+ -+/* IT_WAIT_REG_MEM operation encoding */ -+ -+#define IT_WAIT_ALWAYS (0<<0) -+#define IT_WAIT_LT (1<<0) -+#define IT_WAIT_LE (2<<0) -+#define IT_WAIT_EQ (3<<0) -+#define IT_WAIT_NE (4<<0) -+#define IT_WAIT_GE (5<<0) -+#define IT_WAIT_GT (6<<0) -+#define IT_WAIT_REG (0<<4) -+#define IT_WAIT_MEM (1<<4) -+ -+#define IT_WAIT_ADDR(x) ((x) >> 2) -+ -+#endif -diff --git a/src/r600_reg_auto_r6xx.h b/src/r600_reg_auto_r6xx.h -new file mode 100644 -index 0000000..9d5aa3c ---- /dev/null -+++ b/src/r600_reg_auto_r6xx.h -@@ -0,0 +1,3087 @@ -+/* -+ * RadeonHD R6xx, R7xx Register documentation -+ * -+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. -+ * Copyright (C) 2008-2009 Matthias Hopf -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included -+ * in all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef _AUTOREGS -+#define _AUTOREGS -+ -+enum { -+ -+ VGT_VTX_VECT_EJECT_REG = 0x000088b0, -+ PRIM_COUNT_mask = 0x3ff << 0, -+ PRIM_COUNT_shift = 0, -+ VGT_LAST_COPY_STATE = 0x000088c0, -+ SRC_STATE_ID_mask = 0x07 << 0, -+ SRC_STATE_ID_shift = 0, -+ DST_STATE_ID_mask = 0x07 << 16, -+ DST_STATE_ID_shift = 16, -+ VGT_CACHE_INVALIDATION = 0x000088c4, -+ CACHE_INVALIDATION_mask = 0x03 << 0, -+ CACHE_INVALIDATION_shift = 0, -+ VC_ONLY = 0x00, -+ TC_ONLY = 0x01, -+ VC_AND_TC = 0x02, -+ VS_NO_EXTRA_BUFFER_bit = 1 << 5, -+ VGT_GS_PER_ES = 0x000088c8, -+ VGT_ES_PER_GS = 0x000088cc, -+ VGT_GS_VERTEX_REUSE = 0x000088d4, -+ VERT_REUSE_mask = 0x1f << 0, -+ VERT_REUSE_shift = 0, -+ VGT_MC_LAT_CNTL = 0x000088d8, -+ MC_TIME_STAMP_RES_mask = 0x03 << 0, -+ MC_TIME_STAMP_RES_shift = 0, -+ X_0_992_MAX_LATENCY = 0x00, -+ X_0_496_MAX_LATENCY = 0x01, -+ X_0_248_MAX_LATENCY = 0x02, -+ X_0_124_MAX_LATENCY = 0x03, -+ VGT_GS_PER_VS = 0x000088e8, -+ GS_PER_VS_mask = 0x0f << 0, -+ GS_PER_VS_shift = 0, -+ VGT_CNTL_STATUS = 0x000088f0, -+ VGT_OUT_INDX_BUSY_bit = 1 << 0, -+ VGT_OUT_BUSY_bit = 1 << 1, -+ VGT_PT_BUSY_bit = 1 << 2, -+ VGT_TE_BUSY_bit = 1 << 3, -+ VGT_VR_BUSY_bit = 1 << 4, -+ VGT_GRP_BUSY_bit = 1 << 5, -+ VGT_DMA_REQ_BUSY_bit = 1 << 6, -+ VGT_DMA_BUSY_bit = 1 << 7, -+ VGT_GS_BUSY_bit = 1 << 8, -+ VGT_BUSY_bit = 1 << 9, -+ VGT_PRIMITIVE_TYPE = 0x00008958, -+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, -+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0, -+ DI_PT_NONE = 0x00, -+ DI_PT_POINTLIST = 0x01, -+ DI_PT_LINELIST = 0x02, -+ DI_PT_LINESTRIP = 0x03, -+ DI_PT_TRILIST = 0x04, -+ DI_PT_TRIFAN = 0x05, -+ DI_PT_TRISTRIP = 0x06, -+ DI_PT_UNUSED_0 = 0x07, -+ DI_PT_UNUSED_1 = 0x08, -+ DI_PT_UNUSED_2 = 0x09, -+ DI_PT_LINELIST_ADJ = 0x0a, -+ DI_PT_LINESTRIP_ADJ = 0x0b, -+ DI_PT_TRILIST_ADJ = 0x0c, -+ DI_PT_TRISTRIP_ADJ = 0x0d, -+ DI_PT_UNUSED_3 = 0x0e, -+ DI_PT_UNUSED_4 = 0x0f, -+ DI_PT_TRI_WITH_WFLAGS = 0x10, -+ DI_PT_RECTLIST = 0x11, -+ DI_PT_LINELOOP = 0x12, -+ DI_PT_QUADLIST = 0x13, -+ DI_PT_QUADSTRIP = 0x14, -+ DI_PT_POLYGON = 0x15, -+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, -+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, -+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, -+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, -+ DI_PT_2D_FILL_RECT_LIST = 0x1a, -+ DI_PT_2D_LINE_STRIP = 0x1b, -+ DI_PT_2D_TRI_STRIP = 0x1c, -+ VGT_INDEX_TYPE = 0x0000895c, -+ INDEX_TYPE_mask = 0x03 << 0, -+ INDEX_TYPE_shift = 0, -+ DI_INDEX_SIZE_16_BIT = 0x00, -+ DI_INDEX_SIZE_32_BIT = 0x01, -+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, -+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, -+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, -+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, -+ VGT_NUM_INDICES = 0x00008970, -+ VGT_NUM_INSTANCES = 0x00008974, -+ PA_CL_CNTL_STATUS = 0x00008a10, -+ CL_BUSY_bit = 1 << 31, -+ PA_CL_ENHANCE = 0x00008a14, -+ CLIP_VTX_REORDER_ENA_bit = 1 << 0, -+ NUM_CLIP_SEQ_mask = 0x03 << 1, -+ NUM_CLIP_SEQ_shift = 1, -+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, -+ VE_NAN_PROC_DISABLE_bit = 1 << 4, -+ PA_SU_CNTL_STATUS = 0x00008a50, -+ SU_BUSY_bit = 1 << 31, -+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10, -+ CURRENT_PTR_mask = 0x0f << 0, -+ CURRENT_PTR_shift = 0, -+ CURRENT_COUNT_mask = 0xff << 8, -+ CURRENT_COUNT_shift = 8, -+ PA_SC_MULTI_CHIP_CNTL = 0x00008b20, -+ LOG2_NUM_CHIPS_mask = 0x07 << 0, -+ LOG2_NUM_CHIPS_shift = 0, -+ MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3, -+ MULTI_CHIP_TILE_SIZE_shift = 3, -+ X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00, -+ X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01, -+ X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02, -+ X_128X128_PIXEL_TILE_PER_CHIP = 0x03, -+ CHIP_TILE_X_LOC_mask = 0x07 << 5, -+ CHIP_TILE_X_LOC_shift = 5, -+ CHIP_TILE_Y_LOC_mask = 0x07 << 8, -+ CHIP_TILE_Y_LOC_shift = 8, -+ CHIP_SUPER_TILE_B_bit = 1 << 11, -+ PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40, -+ S0_X_mask = 0x0f << 0, -+ S0_X_shift = 0, -+ S0_Y_mask = 0x0f << 4, -+ S0_Y_shift = 4, -+ S1_X_mask = 0x0f << 8, -+ S1_X_shift = 8, -+ S1_Y_mask = 0x0f << 12, -+ S1_Y_shift = 12, -+ PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44, -+/* S0_X_mask = 0x0f << 0, */ -+/* S0_X_shift = 0, */ -+/* S0_Y_mask = 0x0f << 4, */ -+/* S0_Y_shift = 4, */ -+/* S1_X_mask = 0x0f << 8, */ -+/* S1_X_shift = 8, */ -+/* S1_Y_mask = 0x0f << 12, */ -+/* S1_Y_shift = 12, */ -+ S2_X_mask = 0x0f << 16, -+ S2_X_shift = 16, -+ S2_Y_mask = 0x0f << 20, -+ S2_Y_shift = 20, -+ S3_X_mask = 0x0f << 24, -+ S3_X_shift = 24, -+ S3_Y_mask = 0x0f << 28, -+ S3_Y_shift = 28, -+ PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48, -+/* S0_X_mask = 0x0f << 0, */ -+/* S0_X_shift = 0, */ -+/* S0_Y_mask = 0x0f << 4, */ -+/* S0_Y_shift = 4, */ -+/* S1_X_mask = 0x0f << 8, */ -+/* S1_X_shift = 8, */ -+/* S1_Y_mask = 0x0f << 12, */ -+/* S1_Y_shift = 12, */ -+/* S2_X_mask = 0x0f << 16, */ -+/* S2_X_shift = 16, */ -+/* S2_Y_mask = 0x0f << 20, */ -+/* S2_Y_shift = 20, */ -+/* S3_X_mask = 0x0f << 24, */ -+/* S3_X_shift = 24, */ -+/* S3_Y_mask = 0x0f << 28, */ -+/* S3_Y_shift = 28, */ -+ PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c, -+ S4_X_mask = 0x0f << 0, -+ S4_X_shift = 0, -+ S4_Y_mask = 0x0f << 4, -+ S4_Y_shift = 4, -+ S5_X_mask = 0x0f << 8, -+ S5_X_shift = 8, -+ S5_Y_mask = 0x0f << 12, -+ S5_Y_shift = 12, -+ S6_X_mask = 0x0f << 16, -+ S6_X_shift = 16, -+ S6_Y_mask = 0x0f << 20, -+ S6_Y_shift = 20, -+ S7_X_mask = 0x0f << 24, -+ S7_X_shift = 24, -+ S7_Y_mask = 0x0f << 28, -+ S7_Y_shift = 28, -+ PA_SC_CNTL_STATUS = 0x00008be0, -+ MPASS_OVERFLOW_bit = 1 << 30, -+ PA_SC_ENHANCE = 0x00008bf0, -+ FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0, -+ FORCE_EOV_MAX_CLK_CNT_shift = 0, -+ FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12, -+ FORCE_EOV_MAX_TILE_CNT_shift = 12, -+ SQ_CONFIG = 0x00008c00, -+ VC_ENABLE_bit = 1 << 0, -+ EXPORT_SRC_C_bit = 1 << 1, -+ DX9_CONSTS_bit = 1 << 2, -+ ALU_INST_PREFER_VECTOR_bit = 1 << 3, -+ SQ_CONFIG__DX10_CLAMP_bit = 1 << 4, -+ ALU_PREFER_ONE_WATERFALL_bit = 1 << 5, -+ ALU_MAX_ONE_WATERFALL_bit = 1 << 6, -+ CLAUSE_SEQ_PRIO_mask = 0x03 << 8, -+ CLAUSE_SEQ_PRIO_shift = 8, -+ SQ_CL_PRIO_RND_ROBIN = 0x00, -+ SQ_CL_PRIO_MACRO_SEQ = 0x01, -+ SQ_CL_PRIO_NONE = 0x02, -+ PS_PRIO_mask = 0x03 << 24, -+ PS_PRIO_shift = 24, -+ VS_PRIO_mask = 0x03 << 26, -+ VS_PRIO_shift = 26, -+ GS_PRIO_mask = 0x03 << 28, -+ GS_PRIO_shift = 28, -+ ES_PRIO_mask = 0x03 << 30, -+ ES_PRIO_shift = 30, -+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, -+ NUM_PS_GPRS_mask = 0xff << 0, -+ NUM_PS_GPRS_shift = 0, -+ NUM_VS_GPRS_mask = 0xff << 16, -+ NUM_VS_GPRS_shift = 16, -+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, -+ NUM_CLAUSE_TEMP_GPRS_shift = 28, -+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, -+ NUM_GS_GPRS_mask = 0xff << 0, -+ NUM_GS_GPRS_shift = 0, -+ NUM_ES_GPRS_mask = 0xff << 16, -+ NUM_ES_GPRS_shift = 16, -+ SQ_THREAD_RESOURCE_MGMT = 0x00008c0c, -+ NUM_PS_THREADS_mask = 0xff << 0, -+ NUM_PS_THREADS_shift = 0, -+ NUM_VS_THREADS_mask = 0xff << 8, -+ NUM_VS_THREADS_shift = 8, -+ NUM_GS_THREADS_mask = 0xff << 16, -+ NUM_GS_THREADS_shift = 16, -+ NUM_ES_THREADS_mask = 0xff << 24, -+ NUM_ES_THREADS_shift = 24, -+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10, -+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, -+ NUM_PS_STACK_ENTRIES_shift = 0, -+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, -+ NUM_VS_STACK_ENTRIES_shift = 16, -+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14, -+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, -+ NUM_GS_STACK_ENTRIES_shift = 0, -+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, -+ NUM_ES_STACK_ENTRIES_shift = 16, -+ SQ_ESGS_RING_BASE = 0x00008c40, -+ SQ_ESGS_RING_SIZE = 0x00008c44, -+ SQ_GSVS_RING_BASE = 0x00008c48, -+ SQ_GSVS_RING_SIZE = 0x00008c4c, -+ SQ_ESTMP_RING_BASE = 0x00008c50, -+ SQ_ESTMP_RING_SIZE = 0x00008c54, -+ SQ_GSTMP_RING_BASE = 0x00008c58, -+ SQ_GSTMP_RING_SIZE = 0x00008c5c, -+ SQ_VSTMP_RING_BASE = 0x00008c60, -+ SQ_VSTMP_RING_SIZE = 0x00008c64, -+ SQ_PSTMP_RING_BASE = 0x00008c68, -+ SQ_PSTMP_RING_SIZE = 0x00008c6c, -+ SQ_FBUF_RING_BASE = 0x00008c70, -+ SQ_FBUF_RING_SIZE = 0x00008c74, -+ SQ_REDUC_RING_BASE = 0x00008c78, -+ SQ_REDUC_RING_SIZE = 0x00008c7c, -+ SQ_ALU_WORD1_OP3 = 0x00008dfc, -+ SRC2_SEL_mask = 0x1ff << 0, -+ SRC2_SEL_shift = 0, -+ SQ_ALU_SRC_0 = 0xf8, -+ SQ_ALU_SRC_1 = 0xf9, -+ SQ_ALU_SRC_1_INT = 0xfa, -+ SQ_ALU_SRC_M_1_INT = 0xfb, -+ SQ_ALU_SRC_0_5 = 0xfc, -+ SQ_ALU_SRC_LITERAL = 0xfd, -+ SQ_ALU_SRC_PV = 0xfe, -+ SQ_ALU_SRC_PS = 0xff, -+ SRC2_REL_bit = 1 << 9, -+ SRC2_CHAN_mask = 0x03 << 10, -+ SRC2_CHAN_shift = 10, -+ SQ_CHAN_X = 0x00, -+ SQ_CHAN_Y = 0x01, -+ SQ_CHAN_Z = 0x02, -+ SQ_CHAN_W = 0x03, -+ SRC2_NEG_bit = 1 << 12, -+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, -+ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13, -+ SQ_OP3_INST_MUL_LIT = 0x0c, -+ SQ_OP3_INST_MUL_LIT_M2 = 0x0d, -+ SQ_OP3_INST_MUL_LIT_M4 = 0x0e, -+ SQ_OP3_INST_MUL_LIT_D2 = 0x0f, -+ SQ_OP3_INST_MULADD = 0x10, -+ SQ_OP3_INST_MULADD_M2 = 0x11, -+ SQ_OP3_INST_MULADD_M4 = 0x12, -+ SQ_OP3_INST_MULADD_D2 = 0x13, -+ SQ_OP3_INST_MULADD_IEEE = 0x14, -+ SQ_OP3_INST_MULADD_IEEE_M2 = 0x15, -+ SQ_OP3_INST_MULADD_IEEE_M4 = 0x16, -+ SQ_OP3_INST_MULADD_IEEE_D2 = 0x17, -+ SQ_OP3_INST_CNDE = 0x18, -+ SQ_OP3_INST_CNDGT = 0x19, -+ SQ_OP3_INST_CNDGE = 0x1a, -+ SQ_OP3_INST_CNDE_INT = 0x1c, -+ SQ_OP3_INST_CNDGT_INT = 0x1d, -+ SQ_OP3_INST_CNDGE_INT = 0x1e, -+ SQ_TEX_WORD2 = 0x00008dfc, -+ OFFSET_X_mask = 0x1f << 0, -+ OFFSET_X_shift = 0, -+ OFFSET_Y_mask = 0x1f << 5, -+ OFFSET_Y_shift = 5, -+ OFFSET_Z_mask = 0x1f << 10, -+ OFFSET_Z_shift = 10, -+ SAMPLER_ID_mask = 0x1f << 15, -+ SAMPLER_ID_shift = 15, -+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, -+ SQ_TEX_WORD2__SRC_SEL_X_shift = 20, -+ SQ_SEL_X = 0x00, -+ SQ_SEL_Y = 0x01, -+ SQ_SEL_Z = 0x02, -+ SQ_SEL_W = 0x03, -+ SQ_SEL_0 = 0x04, -+ SQ_SEL_1 = 0x05, -+ SRC_SEL_Y_mask = 0x07 << 23, -+ SRC_SEL_Y_shift = 23, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SRC_SEL_Z_mask = 0x07 << 26, -+ SRC_SEL_Z_shift = 26, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SRC_SEL_W_mask = 0x07 << 29, -+ SRC_SEL_W_shift = 29, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, -+ BURST_COUNT_mask = 0x0f << 17, -+ BURST_COUNT_shift = 17, -+ END_OF_PROGRAM_bit = 1 << 21, -+ VALID_PIXEL_MODE_bit = 1 << 22, -+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, -+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23, -+ SQ_CF_INST_MEM_STREAM0 = 0x20, -+ SQ_CF_INST_MEM_STREAM1 = 0x21, -+ SQ_CF_INST_MEM_STREAM2 = 0x22, -+ SQ_CF_INST_MEM_STREAM3 = 0x23, -+ SQ_CF_INST_MEM_SCRATCH = 0x24, -+ SQ_CF_INST_MEM_REDUCTION = 0x25, -+ SQ_CF_INST_MEM_RING = 0x26, -+ SQ_CF_INST_EXPORT = 0x27, -+ SQ_CF_INST_EXPORT_DONE = 0x28, -+ WHOLE_QUAD_MODE_bit = 1 << 30, -+ BARRIER_bit = 1 << 31, -+ SQ_CF_ALU_WORD1 = 0x00008dfc, -+ KCACHE_MODE1_mask = 0x03 << 0, -+ KCACHE_MODE1_shift = 0, -+ SQ_CF_KCACHE_NOP = 0x00, -+ SQ_CF_KCACHE_LOCK_1 = 0x01, -+ SQ_CF_KCACHE_LOCK_2 = 0x02, -+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, -+ KCACHE_ADDR0_mask = 0xff << 2, -+ KCACHE_ADDR0_shift = 2, -+ KCACHE_ADDR1_mask = 0xff << 10, -+ KCACHE_ADDR1_shift = 10, -+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, -+ SQ_CF_ALU_WORD1__COUNT_shift = 18, -+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, -+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, -+ SQ_CF_ALU_WORD1__CF_INST_shift = 26, -+ SQ_CF_INST_ALU = 0x08, -+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, -+ SQ_CF_INST_ALU_POP_AFTER = 0x0a, -+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b, -+ SQ_CF_INST_ALU_CONTINUE = 0x0d, -+ SQ_CF_INST_ALU_BREAK = 0x0e, -+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, -+/* WHOLE_QUAD_MODE_bit = 1 << 30, */ -+/* BARRIER_bit = 1 << 31, */ -+ SQ_TEX_WORD1 = 0x00008dfc, -+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, -+ SQ_TEX_WORD1__DST_GPR_shift = 0, -+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7, -+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, -+ SQ_TEX_WORD1__DST_SEL_X_shift = 9, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SQ_SEL_MASK = 0x07, -+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, -+ SQ_TEX_WORD1__DST_SEL_Y_shift = 12, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, -+ SQ_TEX_WORD1__DST_SEL_Z_shift = 15, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, -+ SQ_TEX_WORD1__DST_SEL_W_shift = 18, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, -+ SQ_TEX_WORD1__LOD_BIAS_shift = 21, -+ COORD_TYPE_X_bit = 1 << 28, -+ COORD_TYPE_Y_bit = 1 << 29, -+ COORD_TYPE_Z_bit = 1 << 30, -+ COORD_TYPE_W_bit = 1 << 31, -+ SQ_VTX_WORD0 = 0x00008dfc, -+ VTX_INST_mask = 0x1f << 0, -+ VTX_INST_shift = 0, -+ SQ_VTX_INST_FETCH = 0x00, -+ SQ_VTX_INST_SEMANTIC = 0x01, -+ FETCH_TYPE_mask = 0x03 << 5, -+ FETCH_TYPE_shift = 5, -+ SQ_VTX_FETCH_VERTEX_DATA = 0x00, -+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01, -+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, -+ FETCH_WHOLE_QUAD_bit = 1 << 7, -+ BUFFER_ID_mask = 0xff << 8, -+ BUFFER_ID_shift = 8, -+ SRC_GPR_mask = 0x7f << 16, -+ SRC_GPR_shift = 16, -+ SRC_REL_bit = 1 << 23, -+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, -+ SQ_VTX_WORD0__SRC_SEL_X_shift = 24, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+ MEGA_FETCH_COUNT_mask = 0x3f << 26, -+ MEGA_FETCH_COUNT_shift = 26, -+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, -+ SEL_X_mask = 0x07 << 0, -+ SEL_X_shift = 0, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SEL_Y_mask = 0x07 << 3, -+ SEL_Y_shift = 3, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SEL_Z_mask = 0x07 << 6, -+ SEL_Z_shift = 6, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SEL_W_mask = 0x07 << 9, -+ SEL_W_shift = 9, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_ALU_WORD1 = 0x00008dfc, -+ ENCODING_mask = 0x07 << 15, -+ ENCODING_shift = 15, -+ BANK_SWIZZLE_mask = 0x07 << 18, -+ BANK_SWIZZLE_shift = 18, -+ SQ_ALU_VEC_012 = 0x00, -+ SQ_ALU_VEC_021 = 0x01, -+ SQ_ALU_VEC_120 = 0x02, -+ SQ_ALU_VEC_102 = 0x03, -+ SQ_ALU_VEC_201 = 0x04, -+ SQ_ALU_VEC_210 = 0x05, -+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, -+ SQ_ALU_WORD1__DST_GPR_shift = 21, -+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28, -+ DST_CHAN_mask = 0x03 << 29, -+ DST_CHAN_shift = 29, -+ CHAN_X = 0x00, -+ CHAN_Y = 0x01, -+ CHAN_Z = 0x02, -+ CHAN_W = 0x03, -+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31, -+ SQ_CF_ALU_WORD0 = 0x00008dfc, -+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, -+ SQ_CF_ALU_WORD0__ADDR_shift = 0, -+ KCACHE_BANK0_mask = 0x0f << 22, -+ KCACHE_BANK0_shift = 22, -+ KCACHE_BANK1_mask = 0x0f << 26, -+ KCACHE_BANK1_shift = 26, -+ KCACHE_MODE0_mask = 0x03 << 30, -+ KCACHE_MODE0_shift = 30, -+/* SQ_CF_KCACHE_NOP = 0x00, */ -+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ -+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ -+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ -+ SQ_VTX_WORD2 = 0x00008dfc, -+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, -+ SQ_VTX_WORD2__OFFSET_shift = 0, -+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, -+ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16, -+ SQ_ENDIAN_NONE = 0x00, -+ SQ_ENDIAN_8IN16 = 0x01, -+ SQ_ENDIAN_8IN32 = 0x02, -+ CONST_BUF_NO_STRIDE_bit = 1 << 18, -+ MEGA_FETCH_bit = 1 << 19, -+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, -+ SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, -+ SRC0_ABS_bit = 1 << 0, -+ SRC1_ABS_bit = 1 << 1, -+ UPDATE_EXECUTE_MASK_bit = 1 << 2, -+ UPDATE_PRED_bit = 1 << 3, -+ WRITE_MASK_bit = 1 << 4, -+ SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5, -+ SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5, -+ SQ_ALU_OMOD_OFF = 0x00, -+ SQ_ALU_OMOD_M2 = 0x01, -+ SQ_ALU_OMOD_M4 = 0x02, -+ SQ_ALU_OMOD_D2 = 0x03, -+ SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, -+ SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, -+ SQ_OP2_INST_ADD = 0x00, -+ SQ_OP2_INST_MUL = 0x01, -+ SQ_OP2_INST_MUL_IEEE = 0x02, -+ SQ_OP2_INST_MAX = 0x03, -+ SQ_OP2_INST_MIN = 0x04, -+ SQ_OP2_INST_MAX_DX10 = 0x05, -+ SQ_OP2_INST_MIN_DX10 = 0x06, -+ SQ_OP2_INST_SETE = 0x08, -+ SQ_OP2_INST_SETGT = 0x09, -+ SQ_OP2_INST_SETGE = 0x0a, -+ SQ_OP2_INST_SETNE = 0x0b, -+ SQ_OP2_INST_SETE_DX10 = 0x0c, -+ SQ_OP2_INST_SETGT_DX10 = 0x0d, -+ SQ_OP2_INST_SETGE_DX10 = 0x0e, -+ SQ_OP2_INST_SETNE_DX10 = 0x0f, -+ SQ_OP2_INST_FRACT = 0x10, -+ SQ_OP2_INST_TRUNC = 0x11, -+ SQ_OP2_INST_CEIL = 0x12, -+ SQ_OP2_INST_RNDNE = 0x13, -+ SQ_OP2_INST_FLOOR = 0x14, -+ SQ_OP2_INST_MOVA = 0x15, -+ SQ_OP2_INST_MOVA_FLOOR = 0x16, -+ SQ_OP2_INST_MOVA_INT = 0x18, -+ SQ_OP2_INST_MOV = 0x19, -+ SQ_OP2_INST_NOP = 0x1a, -+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, -+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, -+ SQ_OP2_INST_PRED_SETE = 0x20, -+ SQ_OP2_INST_PRED_SETGT = 0x21, -+ SQ_OP2_INST_PRED_SETGE = 0x22, -+ SQ_OP2_INST_PRED_SETNE = 0x23, -+ SQ_OP2_INST_PRED_SET_INV = 0x24, -+ SQ_OP2_INST_PRED_SET_POP = 0x25, -+ SQ_OP2_INST_PRED_SET_CLR = 0x26, -+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27, -+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28, -+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, -+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, -+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, -+ SQ_OP2_INST_KILLE = 0x2c, -+ SQ_OP2_INST_KILLGT = 0x2d, -+ SQ_OP2_INST_KILLGE = 0x2e, -+ SQ_OP2_INST_KILLNE = 0x2f, -+ SQ_OP2_INST_AND_INT = 0x30, -+ SQ_OP2_INST_OR_INT = 0x31, -+ SQ_OP2_INST_XOR_INT = 0x32, -+ SQ_OP2_INST_NOT_INT = 0x33, -+ SQ_OP2_INST_ADD_INT = 0x34, -+ SQ_OP2_INST_SUB_INT = 0x35, -+ SQ_OP2_INST_MAX_INT = 0x36, -+ SQ_OP2_INST_MIN_INT = 0x37, -+ SQ_OP2_INST_MAX_UINT = 0x38, -+ SQ_OP2_INST_MIN_UINT = 0x39, -+ SQ_OP2_INST_SETE_INT = 0x3a, -+ SQ_OP2_INST_SETGT_INT = 0x3b, -+ SQ_OP2_INST_SETGE_INT = 0x3c, -+ SQ_OP2_INST_SETNE_INT = 0x3d, -+ SQ_OP2_INST_SETGT_UINT = 0x3e, -+ SQ_OP2_INST_SETGE_UINT = 0x3f, -+ SQ_OP2_INST_KILLGT_UINT = 0x40, -+ SQ_OP2_INST_KILLGE_UINT = 0x41, -+ SQ_OP2_INST_PRED_SETE_INT = 0x42, -+ SQ_OP2_INST_PRED_SETGT_INT = 0x43, -+ SQ_OP2_INST_PRED_SETGE_INT = 0x44, -+ SQ_OP2_INST_PRED_SETNE_INT = 0x45, -+ SQ_OP2_INST_KILLE_INT = 0x46, -+ SQ_OP2_INST_KILLGT_INT = 0x47, -+ SQ_OP2_INST_KILLGE_INT = 0x48, -+ SQ_OP2_INST_KILLNE_INT = 0x49, -+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, -+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, -+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, -+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, -+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, -+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, -+ SQ_OP2_INST_DOT4 = 0x50, -+ SQ_OP2_INST_DOT4_IEEE = 0x51, -+ SQ_OP2_INST_CUBE = 0x52, -+ SQ_OP2_INST_MAX4 = 0x53, -+ SQ_OP2_INST_MOVA_GPR_INT = 0x60, -+ SQ_OP2_INST_EXP_IEEE = 0x61, -+ SQ_OP2_INST_LOG_CLAMPED = 0x62, -+ SQ_OP2_INST_LOG_IEEE = 0x63, -+ SQ_OP2_INST_RECIP_CLAMPED = 0x64, -+ SQ_OP2_INST_RECIP_FF = 0x65, -+ SQ_OP2_INST_RECIP_IEEE = 0x66, -+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, -+ SQ_OP2_INST_RECIPSQRT_FF = 0x68, -+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, -+ SQ_OP2_INST_SQRT_IEEE = 0x6a, -+ SQ_OP2_INST_FLT_TO_INT = 0x6b, -+ SQ_OP2_INST_INT_TO_FLT = 0x6c, -+ SQ_OP2_INST_UINT_TO_FLT = 0x6d, -+ SQ_OP2_INST_SIN = 0x6e, -+ SQ_OP2_INST_COS = 0x6f, -+ SQ_OP2_INST_ASHR_INT = 0x70, -+ SQ_OP2_INST_LSHR_INT = 0x71, -+ SQ_OP2_INST_LSHL_INT = 0x72, -+ SQ_OP2_INST_MULLO_INT = 0x73, -+ SQ_OP2_INST_MULHI_INT = 0x74, -+ SQ_OP2_INST_MULLO_UINT = 0x75, -+ SQ_OP2_INST_MULHI_UINT = 0x76, -+ SQ_OP2_INST_RECIP_INT = 0x77, -+ SQ_OP2_INST_RECIP_UINT = 0x78, -+ SQ_OP2_INST_FLT_TO_UINT = 0x79, -+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, -+ ARRAY_SIZE_mask = 0xfff << 0, -+ ARRAY_SIZE_shift = 0, -+ COMP_MASK_mask = 0x0f << 12, -+ COMP_MASK_shift = 12, -+ SQ_CF_WORD0 = 0x00008dfc, -+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, -+ ARRAY_BASE_mask = 0x1fff << 0, -+ ARRAY_BASE_shift = 0, -+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, -+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13, -+ SQ_EXPORT_PIXEL = 0x00, -+ SQ_EXPORT_POS = 0x01, -+ SQ_EXPORT_PARAM = 0x02, -+ X_UNUSED_FOR_SX_EXPORTS = 0x03, -+ RW_GPR_mask = 0x7f << 15, -+ RW_GPR_shift = 15, -+ RW_REL_bit = 1 << 22, -+ INDEX_GPR_mask = 0x7f << 23, -+ INDEX_GPR_shift = 23, -+ ELEM_SIZE_mask = 0x03 << 30, -+ ELEM_SIZE_shift = 30, -+ SQ_VTX_WORD1 = 0x00008dfc, -+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, -+ SQ_VTX_WORD1__DST_SEL_X_shift = 9, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, -+ SQ_VTX_WORD1__DST_SEL_Y_shift = 12, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, -+ SQ_VTX_WORD1__DST_SEL_Z_shift = 15, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, -+ SQ_VTX_WORD1__DST_SEL_W_shift = 18, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+/* SQ_SEL_MASK = 0x07, */ -+ USE_CONST_FIELDS_bit = 1 << 21, -+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, -+ SQ_VTX_WORD1__DATA_FORMAT_shift = 22, -+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, -+ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28, -+ SQ_NUM_FORMAT_NORM = 0x00, -+ SQ_NUM_FORMAT_INT = 0x01, -+ SQ_NUM_FORMAT_SCALED = 0x02, -+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, -+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, -+ SQ_ALU_WORD1_OP2 = 0x00008dfc, -+/* SRC0_ABS_bit = 1 << 0, */ -+/* SRC1_ABS_bit = 1 << 1, */ -+/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */ -+/* UPDATE_PRED_bit = 1 << 3, */ -+/* WRITE_MASK_bit = 1 << 4, */ -+ FOG_MERGE_bit = 1 << 5, -+ SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6, -+ SQ_ALU_WORD1_OP2__OMOD_shift = 6, -+/* SQ_ALU_OMOD_OFF = 0x00, */ -+/* SQ_ALU_OMOD_M2 = 0x01, */ -+/* SQ_ALU_OMOD_M4 = 0x02, */ -+/* SQ_ALU_OMOD_D2 = 0x03, */ -+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, -+ SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, -+/* SQ_OP2_INST_ADD = 0x00, */ -+/* SQ_OP2_INST_MUL = 0x01, */ -+/* SQ_OP2_INST_MUL_IEEE = 0x02, */ -+/* SQ_OP2_INST_MAX = 0x03, */ -+/* SQ_OP2_INST_MIN = 0x04, */ -+/* SQ_OP2_INST_MAX_DX10 = 0x05, */ -+/* SQ_OP2_INST_MIN_DX10 = 0x06, */ -+/* SQ_OP2_INST_SETE = 0x08, */ -+/* SQ_OP2_INST_SETGT = 0x09, */ -+/* SQ_OP2_INST_SETGE = 0x0a, */ -+/* SQ_OP2_INST_SETNE = 0x0b, */ -+/* SQ_OP2_INST_SETE_DX10 = 0x0c, */ -+/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */ -+/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */ -+/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */ -+/* SQ_OP2_INST_FRACT = 0x10, */ -+/* SQ_OP2_INST_TRUNC = 0x11, */ -+/* SQ_OP2_INST_CEIL = 0x12, */ -+/* SQ_OP2_INST_RNDNE = 0x13, */ -+/* SQ_OP2_INST_FLOOR = 0x14, */ -+/* SQ_OP2_INST_MOVA = 0x15, */ -+/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */ -+/* SQ_OP2_INST_MOVA_INT = 0x18, */ -+/* SQ_OP2_INST_MOV = 0x19, */ -+/* SQ_OP2_INST_NOP = 0x1a, */ -+/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */ -+/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */ -+/* SQ_OP2_INST_PRED_SETE = 0x20, */ -+/* SQ_OP2_INST_PRED_SETGT = 0x21, */ -+/* SQ_OP2_INST_PRED_SETGE = 0x22, */ -+/* SQ_OP2_INST_PRED_SETNE = 0x23, */ -+/* SQ_OP2_INST_PRED_SET_INV = 0x24, */ -+/* SQ_OP2_INST_PRED_SET_POP = 0x25, */ -+/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */ -+/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */ -+/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */ -+/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */ -+/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */ -+/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */ -+/* SQ_OP2_INST_KILLE = 0x2c, */ -+/* SQ_OP2_INST_KILLGT = 0x2d, */ -+/* SQ_OP2_INST_KILLGE = 0x2e, */ -+/* SQ_OP2_INST_KILLNE = 0x2f, */ -+/* SQ_OP2_INST_AND_INT = 0x30, */ -+/* SQ_OP2_INST_OR_INT = 0x31, */ -+/* SQ_OP2_INST_XOR_INT = 0x32, */ -+/* SQ_OP2_INST_NOT_INT = 0x33, */ -+/* SQ_OP2_INST_ADD_INT = 0x34, */ -+/* SQ_OP2_INST_SUB_INT = 0x35, */ -+/* SQ_OP2_INST_MAX_INT = 0x36, */ -+/* SQ_OP2_INST_MIN_INT = 0x37, */ -+/* SQ_OP2_INST_MAX_UINT = 0x38, */ -+/* SQ_OP2_INST_MIN_UINT = 0x39, */ -+/* SQ_OP2_INST_SETE_INT = 0x3a, */ -+/* SQ_OP2_INST_SETGT_INT = 0x3b, */ -+/* SQ_OP2_INST_SETGE_INT = 0x3c, */ -+/* SQ_OP2_INST_SETNE_INT = 0x3d, */ -+/* SQ_OP2_INST_SETGT_UINT = 0x3e, */ -+/* SQ_OP2_INST_SETGE_UINT = 0x3f, */ -+/* SQ_OP2_INST_KILLGT_UINT = 0x40, */ -+/* SQ_OP2_INST_KILLGE_UINT = 0x41, */ -+/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */ -+/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */ -+/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */ -+/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */ -+/* SQ_OP2_INST_KILLE_INT = 0x46, */ -+/* SQ_OP2_INST_KILLGT_INT = 0x47, */ -+/* SQ_OP2_INST_KILLGE_INT = 0x48, */ -+/* SQ_OP2_INST_KILLNE_INT = 0x49, */ -+/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */ -+/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */ -+/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */ -+/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */ -+/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */ -+/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */ -+/* SQ_OP2_INST_DOT4 = 0x50, */ -+/* SQ_OP2_INST_DOT4_IEEE = 0x51, */ -+/* SQ_OP2_INST_CUBE = 0x52, */ -+/* SQ_OP2_INST_MAX4 = 0x53, */ -+/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */ -+/* SQ_OP2_INST_EXP_IEEE = 0x61, */ -+/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */ -+/* SQ_OP2_INST_LOG_IEEE = 0x63, */ -+/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */ -+/* SQ_OP2_INST_RECIP_FF = 0x65, */ -+/* SQ_OP2_INST_RECIP_IEEE = 0x66, */ -+/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */ -+/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */ -+/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */ -+/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */ -+/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */ -+/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */ -+/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */ -+/* SQ_OP2_INST_SIN = 0x6e, */ -+/* SQ_OP2_INST_COS = 0x6f, */ -+/* SQ_OP2_INST_ASHR_INT = 0x70, */ -+/* SQ_OP2_INST_LSHR_INT = 0x71, */ -+/* SQ_OP2_INST_LSHL_INT = 0x72, */ -+/* SQ_OP2_INST_MULLO_INT = 0x73, */ -+/* SQ_OP2_INST_MULHI_INT = 0x74, */ -+/* SQ_OP2_INST_MULLO_UINT = 0x75, */ -+/* SQ_OP2_INST_MULHI_UINT = 0x76, */ -+/* SQ_OP2_INST_RECIP_INT = 0x77, */ -+/* SQ_OP2_INST_RECIP_UINT = 0x78, */ -+/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */ -+ SQ_CF_WORD1 = 0x00008dfc, -+ POP_COUNT_mask = 0x07 << 0, -+ POP_COUNT_shift = 0, -+ CF_CONST_mask = 0x1f << 3, -+ CF_CONST_shift = 3, -+ COND_mask = 0x03 << 8, -+ COND_shift = 8, -+ SQ_CF_COND_ACTIVE = 0x00, -+ SQ_CF_COND_FALSE = 0x01, -+ SQ_CF_COND_BOOL = 0x02, -+ SQ_CF_COND_NOT_BOOL = 0x03, -+ SQ_CF_WORD1__COUNT_mask = 0x07 << 10, -+ SQ_CF_WORD1__COUNT_shift = 10, -+ CALL_COUNT_mask = 0x3f << 13, -+ CALL_COUNT_shift = 13, -+ COUNT_3_bit = 1 << 19, -+/* END_OF_PROGRAM_bit = 1 << 21, */ -+/* VALID_PIXEL_MODE_bit = 1 << 22, */ -+ SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, -+ SQ_CF_WORD1__CF_INST_shift = 23, -+ SQ_CF_INST_NOP = 0x00, -+ SQ_CF_INST_TEX = 0x01, -+ SQ_CF_INST_VTX = 0x02, -+ SQ_CF_INST_VTX_TC = 0x03, -+ SQ_CF_INST_LOOP_START = 0x04, -+ SQ_CF_INST_LOOP_END = 0x05, -+ SQ_CF_INST_LOOP_START_DX10 = 0x06, -+ SQ_CF_INST_LOOP_START_NO_AL = 0x07, -+ SQ_CF_INST_LOOP_CONTINUE = 0x08, -+ SQ_CF_INST_LOOP_BREAK = 0x09, -+ SQ_CF_INST_JUMP = 0x0a, -+ SQ_CF_INST_PUSH = 0x0b, -+ SQ_CF_INST_PUSH_ELSE = 0x0c, -+ SQ_CF_INST_ELSE = 0x0d, -+ SQ_CF_INST_POP = 0x0e, -+ SQ_CF_INST_POP_JUMP = 0x0f, -+ SQ_CF_INST_POP_PUSH = 0x10, -+ SQ_CF_INST_POP_PUSH_ELSE = 0x11, -+ SQ_CF_INST_CALL = 0x12, -+ SQ_CF_INST_CALL_FS = 0x13, -+ SQ_CF_INST_RETURN = 0x14, -+ SQ_CF_INST_EMIT_VERTEX = 0x15, -+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, -+ SQ_CF_INST_CUT_VERTEX = 0x17, -+ SQ_CF_INST_KILL = 0x18, -+/* WHOLE_QUAD_MODE_bit = 1 << 30, */ -+/* BARRIER_bit = 1 << 31, */ -+ SQ_VTX_WORD1_SEM = 0x00008dfc, -+ SEMANTIC_ID_mask = 0xff << 0, -+ SEMANTIC_ID_shift = 0, -+ SQ_TEX_WORD0 = 0x00008dfc, -+ TEX_INST_mask = 0x1f << 0, -+ TEX_INST_shift = 0, -+ SQ_TEX_INST_VTX_FETCH = 0x00, -+ SQ_TEX_INST_VTX_SEMANTIC = 0x01, -+ SQ_TEX_INST_LD = 0x03, -+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, -+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, -+ SQ_TEX_INST_GET_LOD = 0x06, -+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07, -+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08, -+ SQ_TEX_INST_GET_LERP = 0x09, -+ SQ_TEX_INST_RESERVED_10 = 0x0a, -+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, -+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, -+ SQ_TEX_INST_PASS = 0x0d, -+ X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e, -+ SQ_TEX_INST_SAMPLE = 0x10, -+ SQ_TEX_INST_SAMPLE_L = 0x11, -+ SQ_TEX_INST_SAMPLE_LB = 0x12, -+ SQ_TEX_INST_SAMPLE_LZ = 0x13, -+ SQ_TEX_INST_SAMPLE_G = 0x14, -+ SQ_TEX_INST_SAMPLE_G_L = 0x15, -+ SQ_TEX_INST_SAMPLE_G_LB = 0x16, -+ SQ_TEX_INST_SAMPLE_G_LZ = 0x17, -+ SQ_TEX_INST_SAMPLE_C = 0x18, -+ SQ_TEX_INST_SAMPLE_C_L = 0x19, -+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a, -+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, -+ SQ_TEX_INST_SAMPLE_C_G = 0x1c, -+ SQ_TEX_INST_SAMPLE_C_G_L = 0x1d, -+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, -+ SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f, -+ BC_FRAC_MODE_bit = 1 << 5, -+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ -+ RESOURCE_ID_mask = 0xff << 8, -+ RESOURCE_ID_shift = 8, -+/* SRC_GPR_mask = 0x7f << 16, */ -+/* SRC_GPR_shift = 16, */ -+/* SRC_REL_bit = 1 << 23, */ -+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, -+ SQ_VTX_WORD1_GPR = 0x00008dfc, -+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, -+ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0, -+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, -+ SQ_ALU_WORD0 = 0x00008dfc, -+ SRC0_SEL_mask = 0x1ff << 0, -+ SRC0_SEL_shift = 0, -+/* SQ_ALU_SRC_0 = 0xf8, */ -+/* SQ_ALU_SRC_1 = 0xf9, */ -+/* SQ_ALU_SRC_1_INT = 0xfa, */ -+/* SQ_ALU_SRC_M_1_INT = 0xfb, */ -+/* SQ_ALU_SRC_0_5 = 0xfc, */ -+/* SQ_ALU_SRC_LITERAL = 0xfd, */ -+/* SQ_ALU_SRC_PV = 0xfe, */ -+/* SQ_ALU_SRC_PS = 0xff, */ -+ SRC0_REL_bit = 1 << 9, -+ SRC0_CHAN_mask = 0x03 << 10, -+ SRC0_CHAN_shift = 10, -+/* SQ_CHAN_X = 0x00, */ -+/* SQ_CHAN_Y = 0x01, */ -+/* SQ_CHAN_Z = 0x02, */ -+/* SQ_CHAN_W = 0x03, */ -+ SRC0_NEG_bit = 1 << 12, -+ SRC1_SEL_mask = 0x1ff << 13, -+ SRC1_SEL_shift = 13, -+/* SQ_ALU_SRC_0 = 0xf8, */ -+/* SQ_ALU_SRC_1 = 0xf9, */ -+/* SQ_ALU_SRC_1_INT = 0xfa, */ -+/* SQ_ALU_SRC_M_1_INT = 0xfb, */ -+/* SQ_ALU_SRC_0_5 = 0xfc, */ -+/* SQ_ALU_SRC_LITERAL = 0xfd, */ -+/* SQ_ALU_SRC_PV = 0xfe, */ -+/* SQ_ALU_SRC_PS = 0xff, */ -+ SRC1_REL_bit = 1 << 22, -+ SRC1_CHAN_mask = 0x03 << 23, -+ SRC1_CHAN_shift = 23, -+/* SQ_CHAN_X = 0x00, */ -+/* SQ_CHAN_Y = 0x01, */ -+/* SQ_CHAN_Z = 0x02, */ -+/* SQ_CHAN_W = 0x03, */ -+ SRC1_NEG_bit = 1 << 25, -+ INDEX_MODE_mask = 0x07 << 26, -+ INDEX_MODE_shift = 26, -+ SQ_INDEX_AR_X = 0x00, -+ SQ_INDEX_AR_Y = 0x01, -+ SQ_INDEX_AR_Z = 0x02, -+ SQ_INDEX_AR_W = 0x03, -+ SQ_INDEX_LOOP = 0x04, -+ PRED_SEL_mask = 0x03 << 29, -+ PRED_SEL_shift = 29, -+ SQ_PRED_SEL_OFF = 0x00, -+ SQ_PRED_SEL_ZERO = 0x02, -+ SQ_PRED_SEL_ONE = 0x03, -+ LAST_bit = 1 << 31, -+ SX_EXPORT_BUFFER_SIZES = 0x0000900c, -+ COLOR_BUFFER_SIZE_mask = 0xff << 0, -+ COLOR_BUFFER_SIZE_shift = 0, -+ POSITION_BUFFER_SIZE_mask = 0xff << 8, -+ POSITION_BUFFER_SIZE_shift = 8, -+ SMX_BUFFER_SIZE_mask = 0xff << 16, -+ SMX_BUFFER_SIZE_shift = 16, -+ SX_MEMORY_EXPORT_BASE = 0x00009010, -+ SX_MEMORY_EXPORT_SIZE = 0x00009014, -+ SPI_CONFIG_CNTL = 0x00009100, -+ GPR_WRITE_PRIORITY_mask = 0x1f << 0, -+ GPR_WRITE_PRIORITY_shift = 0, -+ X_PRIORITY_ORDER = 0x00, -+ X_PRIORITY_ORDER_VS = 0x01, -+ DISABLE_INTERP_1_bit = 1 << 5, -+ DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6, -+ DEBUG_THREAD_TYPE_SEL_shift = 6, -+ DEBUG_GROUP_SEL_mask = 0x1f << 8, -+ DEBUG_GROUP_SEL_shift = 8, -+ DEBUG_GRBM_OVERRIDE_bit = 1 << 13, -+ SPI_CONFIG_CNTL_1 = 0x0000913c, -+ VTX_DONE_DELAY_mask = 0x0f << 0, -+ VTX_DONE_DELAY_shift = 0, -+ X_DELAY_10_CLKS = 0x00, -+ X_DELAY_11_CLKS = 0x01, -+ X_DELAY_12_CLKS = 0x02, -+ X_DELAY_13_CLKS = 0x03, -+ X_DELAY_14_CLKS = 0x04, -+ X_DELAY_15_CLKS = 0x05, -+ X_DELAY_16_CLKS = 0x06, -+ X_DELAY_17_CLKS = 0x07, -+ X_DELAY_2_CLKS = 0x08, -+ X_DELAY_3_CLKS = 0x09, -+ X_DELAY_4_CLKS = 0x0a, -+ X_DELAY_5_CLKS = 0x0b, -+ X_DELAY_6_CLKS = 0x0c, -+ X_DELAY_7_CLKS = 0x0d, -+ X_DELAY_8_CLKS = 0x0e, -+ X_DELAY_9_CLKS = 0x0f, -+ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4, -+ TD_FILTER4 = 0x00009400, -+ WEIGHT_1_mask = 0x7ff << 0, -+ WEIGHT_1_shift = 0, -+ WEIGHT_0_mask = 0x7ff << 11, -+ WEIGHT_0_shift = 11, -+ WEIGHT_PAIR_bit = 1 << 22, -+ PHASE_mask = 0x0f << 23, -+ PHASE_shift = 23, -+ DIRECTION_bit = 1 << 27, -+ TD_FILTER4_1 = 0x00009404, -+ TD_FILTER4_1_num = 35, -+/* WEIGHT_1_mask = 0x7ff << 0, */ -+/* WEIGHT_1_shift = 0, */ -+/* WEIGHT_0_mask = 0x7ff << 11, */ -+/* WEIGHT_0_shift = 11, */ -+ TD_CNTL = 0x00009490, -+ SYNC_PHASE_SH_mask = 0x03 << 0, -+ SYNC_PHASE_SH_shift = 0, -+ SYNC_PHASE_VC_SMX_mask = 0x03 << 4, -+ SYNC_PHASE_VC_SMX_shift = 4, -+ TD0_CNTL = 0x00009494, -+ TD0_CNTL_num = 4, -+ ID_OVERRIDE_mask = 0x03 << 28, -+ ID_OVERRIDE_shift = 28, -+ TD0_STATUS = 0x000094a4, -+ TD0_STATUS_num = 4, -+ BUSY_bit = 1 << 31, -+ TA_CNTL = 0x00009504, -+ GRADIENT_CREDIT_mask = 0x1f << 0, -+ GRADIENT_CREDIT_shift = 0, -+ WALKER_CREDIT_mask = 0x1f << 8, -+ WALKER_CREDIT_shift = 8, -+ ALIGNER_CREDIT_mask = 0x1f << 16, -+ ALIGNER_CREDIT_shift = 16, -+ TD_FIFO_CREDIT_mask = 0x3ff << 22, -+ TD_FIFO_CREDIT_shift = 22, -+ TA_CNTL_AUX = 0x00009508, -+ DISABLE_CUBE_WRAP_bit = 1 << 0, -+ SYNC_GRADIENT_bit = 1 << 24, -+ SYNC_WALKER_bit = 1 << 25, -+ SYNC_ALIGNER_bit = 1 << 26, -+ BILINEAR_PRECISION_bit = 1 << 31, -+ TA0_CNTL = 0x00009510, -+/* ID_OVERRIDE_mask = 0x03 << 28, */ -+/* ID_OVERRIDE_shift = 28, */ -+ TA1_CNTL = 0x00009514, -+/* ID_OVERRIDE_mask = 0x03 << 28, */ -+/* ID_OVERRIDE_shift = 28, */ -+ TA2_CNTL = 0x00009518, -+/* ID_OVERRIDE_mask = 0x03 << 28, */ -+/* ID_OVERRIDE_shift = 28, */ -+ TA3_CNTL = 0x0000951c, -+/* ID_OVERRIDE_mask = 0x03 << 28, */ -+/* ID_OVERRIDE_shift = 28, */ -+ TA0_STATUS = 0x00009520, -+ FG_PFIFO_EMPTYB_bit = 1 << 12, -+ FG_LFIFO_EMPTYB_bit = 1 << 13, -+ FG_SFIFO_EMPTYB_bit = 1 << 14, -+ FL_PFIFO_EMPTYB_bit = 1 << 16, -+ FL_LFIFO_EMPTYB_bit = 1 << 17, -+ FL_SFIFO_EMPTYB_bit = 1 << 18, -+ FA_PFIFO_EMPTYB_bit = 1 << 20, -+ FA_LFIFO_EMPTYB_bit = 1 << 21, -+ FA_SFIFO_EMPTYB_bit = 1 << 22, -+ IN_BUSY_bit = 1 << 24, -+ FG_BUSY_bit = 1 << 25, -+ FL_BUSY_bit = 1 << 27, -+ TA_BUSY_bit = 1 << 28, -+ FA_BUSY_bit = 1 << 29, -+ AL_BUSY_bit = 1 << 30, -+/* BUSY_bit = 1 << 31, */ -+ TA1_STATUS = 0x00009524, -+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ -+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ -+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ -+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ -+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ -+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ -+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ -+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ -+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ -+/* IN_BUSY_bit = 1 << 24, */ -+/* FG_BUSY_bit = 1 << 25, */ -+/* FL_BUSY_bit = 1 << 27, */ -+/* TA_BUSY_bit = 1 << 28, */ -+/* FA_BUSY_bit = 1 << 29, */ -+/* AL_BUSY_bit = 1 << 30, */ -+/* BUSY_bit = 1 << 31, */ -+ TA2_STATUS = 0x00009528, -+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ -+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ -+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ -+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ -+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ -+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ -+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ -+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ -+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ -+/* IN_BUSY_bit = 1 << 24, */ -+/* FG_BUSY_bit = 1 << 25, */ -+/* FL_BUSY_bit = 1 << 27, */ -+/* TA_BUSY_bit = 1 << 28, */ -+/* FA_BUSY_bit = 1 << 29, */ -+/* AL_BUSY_bit = 1 << 30, */ -+/* BUSY_bit = 1 << 31, */ -+ TA3_STATUS = 0x0000952c, -+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ -+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ -+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ -+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ -+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ -+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ -+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ -+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ -+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ -+/* IN_BUSY_bit = 1 << 24, */ -+/* FG_BUSY_bit = 1 << 25, */ -+/* FL_BUSY_bit = 1 << 27, */ -+/* TA_BUSY_bit = 1 << 28, */ -+/* FA_BUSY_bit = 1 << 29, */ -+/* AL_BUSY_bit = 1 << 30, */ -+/* BUSY_bit = 1 << 31, */ -+ TC_STATUS = 0x00009600, -+ TC_BUSY_bit = 1 << 0, -+ TC_INVALIDATE = 0x00009604, -+ START_bit = 1 << 0, -+ TC_CNTL = 0x00009608, -+ FORCE_HIT_bit = 1 << 0, -+ FORCE_MISS_bit = 1 << 1, -+ L2_SIZE_mask = 0x0f << 5, -+ L2_SIZE_shift = 5, -+ _256K = 0x00, -+ _224K = 0x01, -+ _192K = 0x02, -+ _160K = 0x03, -+ _128K = 0x04, -+ _96K = 0x05, -+ _64K = 0x06, -+ _32K = 0x07, -+ L2_DISABLE_LATE_HIT_bit = 1 << 9, -+ DISABLE_VERT_PERF_bit = 1 << 10, -+ DISABLE_INVAL_BUSY_bit = 1 << 11, -+ DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12, -+ PARTITION_MODE_mask = 0x03 << 13, -+ PARTITION_MODE_shift = 13, -+ X_VERTEX = 0x00, -+ MISS_ARB_MODE_bit = 1 << 15, -+ HIT_ARB_MODE_bit = 1 << 16, -+ DISABLE_WRITE_DELAY_bit = 1 << 17, -+ HIT_FIFO_DEPTH_bit = 1 << 18, -+ VC_CNTL = 0x00009700, -+ L2_INVALIDATE_bit = 1 << 0, -+ RESERVED_bit = 1 << 1, -+ CC_FORCE_MISS_bit = 1 << 2, -+ MI_CHAN_SEL_mask = 0x03 << 3, -+ MI_CHAN_SEL_shift = 3, -+ X_MC0_USES_CH_0_1 = 0x00, -+ X_MC0_USES_CH_0_3 = 0x01, -+ X_VC_MC0_IS_ACTIVE = 0x02, -+ X_VC_MC1_IS_DISABLED = 0x03, -+ MI_STEER_DISABLE_bit = 1 << 5, -+ MI_CREDIT_CTR_mask = 0x0f << 6, -+ MI_CREDIT_CTR_shift = 6, -+ MI_CREDIT_WE_bit = 1 << 10, -+ MI_REQ_STALL_THLD_mask = 0x07 << 11, -+ MI_REQ_STALL_THLD_shift = 11, -+ X_LATENCY_EXCEEDS_399_CLOCKS = 0x00, -+ X_LATENCY_EXCEEDS_415_CLOCKS = 0x01, -+ X_LATENCY_EXCEEDS_431_CLOCKS = 0x02, -+ X_LATENCY_EXCEEDS_447_CLOCKS = 0x03, -+ X_LATENCY_EXCEEDS_463_CLOCKS = 0x04, -+ X_LATENCY_EXCEEDS_479_CLOCKS = 0x05, -+ X_LATENCY_EXCEEDS_495_CLOCKS = 0x06, -+ X_LATENCY_EXCEEDS_511_CLOCKS = 0x07, -+ VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14, -+ VC_CNTL__MI_TIMESTAMP_RES_shift = 14, -+ X_1X_SYSTEM_CLOCK = 0x00, -+ X_2X_SYSTEM_CLOCK = 0x01, -+ X_4X_SYSTEM_CLOCK = 0x02, -+ X_8X_SYSTEM_CLOCK = 0x03, -+ X_16X_SYSTEM_CLOCK = 0x04, -+ X_32X_SYSTEM_CLOCK = 0x05, -+ X_64X_SYSTEM_CLOCK = 0x06, -+ X_128X_SYSTEM_CLOCK = 0x07, -+ X_256X_SYSTEM_CLOCK = 0x08, -+ X_512X_SYSTEM_CLOCK = 0x09, -+ X_1024X_SYSTEM_CLOCK = 0x0a, -+ X_2048X_SYSTEM_CLOCK = 0x0b, -+ X_4092X_SYSTEM_CLOCK = 0x0c, -+ X_8192X_SYSTEM_CLOCK = 0x0d, -+ X_16384X_SYSTEM_CLOCK = 0x0e, -+ X_32768X_SYSTEM_CLOCK = 0x0f, -+ VC_CNTL_STATUS = 0x00009704, -+ RP_BUSY_bit = 1 << 0, -+ RG_BUSY_bit = 1 << 1, -+ VC_BUSY_bit = 1 << 2, -+ CLAMP_DETECT_bit = 1 << 3, -+ VC_CONFIG = 0x00009718, -+ WRITE_DIS_bit = 1 << 0, -+ GPR_DATA_PHASE_ADJ_mask = 0x07 << 1, -+ GPR_DATA_PHASE_ADJ_shift = 1, -+ X_LATENCY_BASE_0_CYCLES = 0x00, -+ X_LATENCY_BASE_1_CYCLES = 0x01, -+ X_LATENCY_BASE_2_CYCLES = 0x02, -+ X_LATENCY_BASE_3_CYCLES = 0x03, -+ TD_SIMD_SYNC_ADJ_mask = 0x07 << 4, -+ TD_SIMD_SYNC_ADJ_shift = 4, -+ X_0_CYCLES_DELAY = 0x00, -+ X_1_CYCLES_DELAY = 0x01, -+ X_2_CYCLES_DELAY = 0x02, -+ X_3_CYCLES_DELAY = 0x03, -+ X_4_CYCLES_DELAY = 0x04, -+ X_5_CYCLES_DELAY = 0x05, -+ X_6_CYCLES_DELAY = 0x06, -+ X_7_CYCLES_DELAY = 0x07, -+ SMX_DC_CTL0 = 0x0000a020, -+ WR_GATHER_STREAM0_bit = 1 << 0, -+ WR_GATHER_STREAM1_bit = 1 << 1, -+ WR_GATHER_STREAM2_bit = 1 << 2, -+ WR_GATHER_STREAM3_bit = 1 << 3, -+ WR_GATHER_SCRATCH_bit = 1 << 4, -+ WR_GATHER_REDUC_BUF_bit = 1 << 5, -+ WR_GATHER_RING_BUF_bit = 1 << 6, -+ WR_GATHER_F_BUF_bit = 1 << 7, -+ DISABLE_CACHES_bit = 1 << 8, -+ AUTO_FLUSH_INVAL_EN_bit = 1 << 10, -+ AUTO_FLUSH_EN_bit = 1 << 11, -+ AUTO_FLUSH_CNT_mask = 0xffff << 12, -+ AUTO_FLUSH_CNT_shift = 12, -+ MC_RD_STALL_FACTOR_mask = 0x03 << 28, -+ MC_RD_STALL_FACTOR_shift = 28, -+ MC_WR_STALL_FACTOR_mask = 0x03 << 30, -+ MC_WR_STALL_FACTOR_shift = 30, -+ SMX_DC_CTL1 = 0x0000a024, -+ OP_FIFO_SKID_mask = 0x7f << 0, -+ OP_FIFO_SKID_shift = 0, -+ CACHE_LINE_SIZE_bit = 1 << 8, -+ MULTI_FLUSH_MODE_bit = 1 << 9, -+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10, -+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10, -+ DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16, -+ DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17, -+ DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18, -+ DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19, -+ SMX_DC_CTL2 = 0x0000a028, -+ INVALIDATE_CACHES_bit = 1 << 0, -+ CACHES_INVALID_bit = 1 << 1, -+ CACHES_DIRTY_bit = 1 << 2, -+ FLUSH_ALL_bit = 1 << 4, -+ FLUSH_GS_THREADS_bit = 1 << 8, -+ FLUSH_ES_THREADS_bit = 1 << 9, -+ SMX_DC_MC_INTF_CTL = 0x0000a02c, -+ MC_RD_REQ_CRED_mask = 0xff << 0, -+ MC_RD_REQ_CRED_shift = 0, -+ MC_WR_REQ_CRED_mask = 0xff << 16, -+ MC_WR_REQ_CRED_shift = 16, -+ TD_PS_SAMPLER0_BORDER_RED = 0x0000a400, -+ TD_PS_SAMPLER0_BORDER_RED_num = 18, -+ TD_PS_SAMPLER0_BORDER_RED_offset = 16, -+ TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404, -+ TD_PS_SAMPLER0_BORDER_GREEN_num = 18, -+ TD_PS_SAMPLER0_BORDER_GREEN_offset = 16, -+ TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408, -+ TD_PS_SAMPLER0_BORDER_BLUE_num = 18, -+ TD_PS_SAMPLER0_BORDER_BLUE_offset = 16, -+ TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c, -+ TD_PS_SAMPLER0_BORDER_ALPHA_num = 18, -+ TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16, -+ TD_VS_SAMPLER0_BORDER_RED = 0x0000a600, -+ TD_VS_SAMPLER0_BORDER_RED_num = 18, -+ TD_VS_SAMPLER0_BORDER_RED_offset = 16, -+ TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604, -+ TD_VS_SAMPLER0_BORDER_GREEN_num = 18, -+ TD_VS_SAMPLER0_BORDER_GREEN_offset = 16, -+ TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608, -+ TD_VS_SAMPLER0_BORDER_BLUE_num = 18, -+ TD_VS_SAMPLER0_BORDER_BLUE_offset = 16, -+ TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c, -+ TD_VS_SAMPLER0_BORDER_ALPHA_num = 18, -+ TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16, -+ TD_GS_SAMPLER0_BORDER_RED = 0x0000a800, -+ TD_GS_SAMPLER0_BORDER_RED_num = 18, -+ TD_GS_SAMPLER0_BORDER_RED_offset = 16, -+ TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804, -+ TD_GS_SAMPLER0_BORDER_GREEN_num = 18, -+ TD_GS_SAMPLER0_BORDER_GREEN_offset = 16, -+ TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808, -+ TD_GS_SAMPLER0_BORDER_BLUE_num = 18, -+ TD_GS_SAMPLER0_BORDER_BLUE_offset = 16, -+ TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c, -+ TD_GS_SAMPLER0_BORDER_ALPHA_num = 18, -+ TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16, -+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00, -+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18, -+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0, -+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0, -+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3, -+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3, -+ DB_DEPTH_SIZE = 0x00028000, -+ PITCH_TILE_MAX_mask = 0x3ff << 0, -+ PITCH_TILE_MAX_shift = 0, -+ SLICE_TILE_MAX_mask = 0xfffff << 10, -+ SLICE_TILE_MAX_shift = 10, -+ DB_DEPTH_VIEW = 0x00028004, -+ SLICE_START_mask = 0x7ff << 0, -+ SLICE_START_shift = 0, -+ SLICE_MAX_mask = 0x7ff << 13, -+ SLICE_MAX_shift = 13, -+ DB_DEPTH_BASE = 0x0002800c, -+ DB_DEPTH_INFO = 0x00028010, -+ DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0, -+ DB_DEPTH_INFO__FORMAT_shift = 0, -+ DEPTH_INVALID = 0x00, -+ DEPTH_16 = 0x01, -+ DEPTH_X8_24 = 0x02, -+ DEPTH_8_24 = 0x03, -+ DEPTH_X8_24_FLOAT = 0x04, -+ DEPTH_8_24_FLOAT = 0x05, -+ DEPTH_32_FLOAT = 0x06, -+ DEPTH_X24_8_32_FLOAT = 0x07, -+ DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, -+ DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, -+ DB_DEPTH_INFO__ARRAY_MODE_shift = 15, -+ ARRAY_2D_TILED_THIN1 = 0x04, -+ TILE_SURFACE_ENABLE_bit = 1 << 25, -+ TILE_COMPACT_bit = 1 << 26, -+ ZRANGE_PRECISION_bit = 1 << 31, -+ DB_HTILE_DATA_BASE = 0x00028014, -+ DB_STENCIL_CLEAR = 0x00028028, -+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, -+ DB_STENCIL_CLEAR__CLEAR_shift = 0, -+ MIN_mask = 0xff << 16, -+ MIN_shift = 16, -+ DB_DEPTH_CLEAR = 0x0002802c, -+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030, -+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0, -+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0, -+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, -+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16, -+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034, -+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0, -+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0, -+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, -+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16, -+ CB_COLOR0_BASE = 0x00028040, -+ CB_COLOR0_BASE_num = 8, -+ CB_COLOR0_SIZE = 0x00028060, -+ CB_COLOR0_SIZE_num = 8, -+/* PITCH_TILE_MAX_mask = 0x3ff << 0, */ -+/* PITCH_TILE_MAX_shift = 0, */ -+/* SLICE_TILE_MAX_mask = 0xfffff << 10, */ -+/* SLICE_TILE_MAX_shift = 10, */ -+ CB_COLOR0_VIEW = 0x00028080, -+ CB_COLOR0_VIEW_num = 8, -+/* SLICE_START_mask = 0x7ff << 0, */ -+/* SLICE_START_shift = 0, */ -+/* SLICE_MAX_mask = 0x7ff << 13, */ -+/* SLICE_MAX_shift = 13, */ -+ CB_COLOR0_INFO = 0x000280a0, -+ CB_COLOR0_INFO_num = 8, -+ ENDIAN_mask = 0x03 << 0, -+ ENDIAN_shift = 0, -+ ENDIAN_NONE = 0x00, -+ ENDIAN_8IN16 = 0x01, -+ ENDIAN_8IN32 = 0x02, -+ ENDIAN_8IN64 = 0x03, -+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, -+ CB_COLOR0_INFO__FORMAT_shift = 2, -+ COLOR_INVALID = 0x00, -+ COLOR_8 = 0x01, -+ COLOR_4_4 = 0x02, -+ COLOR_3_3_2 = 0x03, -+ COLOR_16 = 0x05, -+ COLOR_16_FLOAT = 0x06, -+ COLOR_8_8 = 0x07, -+ COLOR_5_6_5 = 0x08, -+ COLOR_6_5_5 = 0x09, -+ COLOR_1_5_5_5 = 0x0a, -+ COLOR_4_4_4_4 = 0x0b, -+ COLOR_5_5_5_1 = 0x0c, -+ COLOR_32 = 0x0d, -+ COLOR_32_FLOAT = 0x0e, -+ COLOR_16_16 = 0x0f, -+ COLOR_16_16_FLOAT = 0x10, -+ COLOR_8_24 = 0x11, -+ COLOR_8_24_FLOAT = 0x12, -+ COLOR_24_8 = 0x13, -+ COLOR_24_8_FLOAT = 0x14, -+ COLOR_10_11_11 = 0x15, -+ COLOR_10_11_11_FLOAT = 0x16, -+ COLOR_11_11_10 = 0x17, -+ COLOR_11_11_10_FLOAT = 0x18, -+ COLOR_2_10_10_10 = 0x19, -+ COLOR_8_8_8_8 = 0x1a, -+ COLOR_10_10_10_2 = 0x1b, -+ COLOR_X24_8_32_FLOAT = 0x1c, -+ COLOR_32_32 = 0x1d, -+ COLOR_32_32_FLOAT = 0x1e, -+ COLOR_16_16_16_16 = 0x1f, -+ COLOR_16_16_16_16_FLOAT = 0x20, -+ COLOR_32_32_32_32 = 0x22, -+ COLOR_32_32_32_32_FLOAT = 0x23, -+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, -+ CB_COLOR0_INFO__ARRAY_MODE_shift = 8, -+ ARRAY_LINEAR_GENERAL = 0x00, -+ ARRAY_LINEAR_ALIGNED = 0x01, -+/* ARRAY_2D_TILED_THIN1 = 0x04, */ -+ NUMBER_TYPE_mask = 0x07 << 12, -+ NUMBER_TYPE_shift = 12, -+ NUMBER_UNORM = 0x00, -+ NUMBER_SNORM = 0x01, -+ NUMBER_USCALED = 0x02, -+ NUMBER_SSCALED = 0x03, -+ NUMBER_UINT = 0x04, -+ NUMBER_SINT = 0x05, -+ NUMBER_SRGB = 0x06, -+ NUMBER_FLOAT = 0x07, -+ CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15, -+ COMP_SWAP_mask = 0x03 << 16, -+ COMP_SWAP_shift = 16, -+ SWAP_STD = 0x00, -+ SWAP_ALT = 0x01, -+ SWAP_STD_REV = 0x02, -+ SWAP_ALT_REV = 0x03, -+ CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18, -+ CB_COLOR0_INFO__TILE_MODE_shift = 18, -+ TILE_DISABLE = 0x00, -+ TILE_CLEAR_ENABLE = 0x01, -+ TILE_FRAG_ENABLE = 0x02, -+ BLEND_CLAMP_bit = 1 << 20, -+ CLEAR_COLOR_bit = 1 << 21, -+ BLEND_BYPASS_bit = 1 << 22, -+ BLEND_FLOAT32_bit = 1 << 23, -+ SIMPLE_FLOAT_bit = 1 << 24, -+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25, -+/* TILE_COMPACT_bit = 1 << 26, */ -+ SOURCE_FORMAT_bit = 1 << 27, -+ CB_COLOR0_TILE = 0x000280c0, -+ CB_COLOR0_TILE_num = 8, -+ CB_COLOR0_FRAG = 0x000280e0, -+ CB_COLOR0_FRAG_num = 8, -+ CB_COLOR0_MASK = 0x00028100, -+ CB_COLOR0_MASK_num = 8, -+ CMASK_BLOCK_MAX_mask = 0xfff << 0, -+ CMASK_BLOCK_MAX_shift = 0, -+ FMASK_TILE_MAX_mask = 0xfffff << 12, -+ FMASK_TILE_MAX_shift = 12, -+ CB_CLEAR_RED = 0x00028120, -+ CB_CLEAR_GREEN = 0x00028124, -+ CB_CLEAR_BLUE = 0x00028128, -+ CB_CLEAR_ALPHA = 0x0002812c, -+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, -+ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16, -+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, -+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0, -+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, -+ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16, -+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, -+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0, -+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, -+ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16, -+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, -+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0, -+ PA_SC_WINDOW_OFFSET = 0x00028200, -+ WINDOW_X_OFFSET_mask = 0x7fff << 0, -+ WINDOW_X_OFFSET_shift = 0, -+ WINDOW_Y_OFFSET_mask = 0x7fff << 16, -+ WINDOW_Y_OFFSET_shift = 16, -+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204, -+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0, -+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0, -+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, -+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16, -+ WINDOW_OFFSET_DISABLE_bit = 1 << 31, -+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208, -+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0, -+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0, -+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, -+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16, -+ PA_SC_CLIPRECT_RULE = 0x0002820c, -+ CLIP_RULE_mask = 0xffff << 0, -+ CLIP_RULE_shift = 0, -+ PA_SC_CLIPRECT_0_TL = 0x00028210, -+ PA_SC_CLIPRECT_0_TL_num = 4, -+ PA_SC_CLIPRECT_0_TL_offset = 8, -+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0, -+ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0, -+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16, -+ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16, -+ PA_SC_CLIPRECT_0_BR = 0x00028214, -+ PA_SC_CLIPRECT_0_BR_num = 4, -+ PA_SC_CLIPRECT_0_BR_offset = 8, -+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0, -+ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0, -+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16, -+ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16, -+ CB_TARGET_MASK = 0x00028238, -+ TARGET0_ENABLE_mask = 0x0f << 0, -+ TARGET0_ENABLE_shift = 0, -+ TARGET1_ENABLE_mask = 0x0f << 4, -+ TARGET1_ENABLE_shift = 4, -+ TARGET2_ENABLE_mask = 0x0f << 8, -+ TARGET2_ENABLE_shift = 8, -+ TARGET3_ENABLE_mask = 0x0f << 12, -+ TARGET3_ENABLE_shift = 12, -+ TARGET4_ENABLE_mask = 0x0f << 16, -+ TARGET4_ENABLE_shift = 16, -+ TARGET5_ENABLE_mask = 0x0f << 20, -+ TARGET5_ENABLE_shift = 20, -+ TARGET6_ENABLE_mask = 0x0f << 24, -+ TARGET6_ENABLE_shift = 24, -+ TARGET7_ENABLE_mask = 0x0f << 28, -+ TARGET7_ENABLE_shift = 28, -+ CB_SHADER_MASK = 0x0002823c, -+ OUTPUT0_ENABLE_mask = 0x0f << 0, -+ OUTPUT0_ENABLE_shift = 0, -+ OUTPUT1_ENABLE_mask = 0x0f << 4, -+ OUTPUT1_ENABLE_shift = 4, -+ OUTPUT2_ENABLE_mask = 0x0f << 8, -+ OUTPUT2_ENABLE_shift = 8, -+ OUTPUT3_ENABLE_mask = 0x0f << 12, -+ OUTPUT3_ENABLE_shift = 12, -+ OUTPUT4_ENABLE_mask = 0x0f << 16, -+ OUTPUT4_ENABLE_shift = 16, -+ OUTPUT5_ENABLE_mask = 0x0f << 20, -+ OUTPUT5_ENABLE_shift = 20, -+ OUTPUT6_ENABLE_mask = 0x0f << 24, -+ OUTPUT6_ENABLE_shift = 24, -+ OUTPUT7_ENABLE_mask = 0x0f << 28, -+ OUTPUT7_ENABLE_shift = 28, -+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240, -+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0, -+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0, -+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, -+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16, -+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ -+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244, -+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0, -+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0, -+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, -+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16, -+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, -+ PA_SC_VPORT_SCISSOR_0_TL_num = 16, -+ PA_SC_VPORT_SCISSOR_0_TL_offset = 8, -+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0, -+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0, -+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16, -+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16, -+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ -+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, -+ PA_SC_VPORT_SCISSOR_0_BR_num = 16, -+ PA_SC_VPORT_SCISSOR_0_BR_offset = 8, -+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0, -+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0, -+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16, -+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16, -+ PA_SC_VPORT_ZMIN_0 = 0x000282d0, -+ PA_SC_VPORT_ZMIN_0_num = 16, -+ PA_SC_VPORT_ZMIN_0_offset = 8, -+ PA_SC_VPORT_ZMAX_0 = 0x000282d4, -+ PA_SC_VPORT_ZMAX_0_num = 16, -+ PA_SC_VPORT_ZMAX_0_offset = 8, -+ SX_MISC = 0x00028350, -+ MULTIPASS_bit = 1 << 0, -+ SQ_VTX_SEMANTIC_0 = 0x00028380, -+ SQ_VTX_SEMANTIC_0_num = 32, -+/* SEMANTIC_ID_mask = 0xff << 0, */ -+/* SEMANTIC_ID_shift = 0, */ -+ VGT_MAX_VTX_INDX = 0x00028400, -+ VGT_MIN_VTX_INDX = 0x00028404, -+ VGT_INDX_OFFSET = 0x00028408, -+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, -+ SX_ALPHA_TEST_CONTROL = 0x00028410, -+ ALPHA_FUNC_mask = 0x07 << 0, -+ ALPHA_FUNC_shift = 0, -+ REF_NEVER = 0x00, -+ REF_LESS = 0x01, -+ REF_EQUAL = 0x02, -+ REF_LEQUAL = 0x03, -+ REF_GREATER = 0x04, -+ REF_NOTEQUAL = 0x05, -+ REF_GEQUAL = 0x06, -+ REF_ALWAYS = 0x07, -+ ALPHA_TEST_ENABLE_bit = 1 << 3, -+ ALPHA_TEST_BYPASS_bit = 1 << 8, -+ CB_BLEND_RED = 0x00028414, -+ CB_BLEND_GREEN = 0x00028418, -+ CB_BLEND_BLUE = 0x0002841c, -+ CB_BLEND_ALPHA = 0x00028420, -+ CB_FOG_RED = 0x00028424, -+ CB_FOG_GREEN = 0x00028428, -+ CB_FOG_BLUE = 0x0002842c, -+ DB_STENCILREFMASK = 0x00028430, -+ STENCILREF_mask = 0xff << 0, -+ STENCILREF_shift = 0, -+ STENCILMASK_mask = 0xff << 8, -+ STENCILMASK_shift = 8, -+ STENCILWRITEMASK_mask = 0xff << 16, -+ STENCILWRITEMASK_shift = 16, -+ DB_STENCILREFMASK_BF = 0x00028434, -+ STENCILREF_BF_mask = 0xff << 0, -+ STENCILREF_BF_shift = 0, -+ STENCILMASK_BF_mask = 0xff << 8, -+ STENCILMASK_BF_shift = 8, -+ STENCILWRITEMASK_BF_mask = 0xff << 16, -+ STENCILWRITEMASK_BF_shift = 16, -+ SX_ALPHA_REF = 0x00028438, -+ PA_CL_VPORT_XSCALE_0 = 0x0002843c, -+ PA_CL_VPORT_XSCALE_0_num = 16, -+ PA_CL_VPORT_XSCALE_0_offset = 24, -+ PA_CL_VPORT_XOFFSET_0 = 0x00028440, -+ PA_CL_VPORT_XOFFSET_0_num = 16, -+ PA_CL_VPORT_XOFFSET_0_offset = 24, -+ PA_CL_VPORT_YSCALE_0 = 0x00028444, -+ PA_CL_VPORT_YSCALE_0_num = 16, -+ PA_CL_VPORT_YSCALE_0_offset = 24, -+ PA_CL_VPORT_YOFFSET_0 = 0x00028448, -+ PA_CL_VPORT_YOFFSET_0_num = 16, -+ PA_CL_VPORT_YOFFSET_0_offset = 24, -+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c, -+ PA_CL_VPORT_ZSCALE_0_num = 16, -+ PA_CL_VPORT_ZSCALE_0_offset = 24, -+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450, -+ PA_CL_VPORT_ZOFFSET_0_num = 16, -+ PA_CL_VPORT_ZOFFSET_0_offset = 24, -+ SPI_VS_OUT_ID_0 = 0x00028614, -+ SPI_VS_OUT_ID_0_num = 10, -+ SEMANTIC_0_mask = 0xff << 0, -+ SEMANTIC_0_shift = 0, -+ SEMANTIC_1_mask = 0xff << 8, -+ SEMANTIC_1_shift = 8, -+ SEMANTIC_2_mask = 0xff << 16, -+ SEMANTIC_2_shift = 16, -+ SEMANTIC_3_mask = 0xff << 24, -+ SEMANTIC_3_shift = 24, -+ SPI_PS_INPUT_CNTL_0 = 0x00028644, -+ SPI_PS_INPUT_CNTL_0_num = 32, -+ SEMANTIC_mask = 0xff << 0, -+ SEMANTIC_shift = 0, -+ DEFAULT_VAL_mask = 0x03 << 8, -+ DEFAULT_VAL_shift = 8, -+ X_0_0F = 0x00, -+ FLAT_SHADE_bit = 1 << 10, -+ SEL_CENTROID_bit = 1 << 11, -+ SEL_LINEAR_bit = 1 << 12, -+ CYL_WRAP_mask = 0x0f << 13, -+ CYL_WRAP_shift = 13, -+ PT_SPRITE_TEX_bit = 1 << 17, -+ SEL_SAMPLE_bit = 1 << 18, -+ SPI_VS_OUT_CONFIG = 0x000286c4, -+ VS_PER_COMPONENT_bit = 1 << 0, -+ VS_EXPORT_COUNT_mask = 0x1f << 1, -+ VS_EXPORT_COUNT_shift = 1, -+ VS_EXPORTS_FOG_bit = 1 << 8, -+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, -+ VS_OUT_FOG_VEC_ADDR_shift = 9, -+ SPI_PS_IN_CONTROL_0 = 0x000286cc, -+ NUM_INTERP_mask = 0x3f << 0, -+ NUM_INTERP_shift = 0, -+ POSITION_ENA_bit = 1 << 8, -+ POSITION_CENTROID_bit = 1 << 9, -+ POSITION_ADDR_mask = 0x1f << 10, -+ POSITION_ADDR_shift = 10, -+ PARAM_GEN_mask = 0x0f << 15, -+ PARAM_GEN_shift = 15, -+ PARAM_GEN_ADDR_mask = 0x7f << 19, -+ PARAM_GEN_ADDR_shift = 19, -+ BARYC_SAMPLE_CNTL_mask = 0x03 << 26, -+ BARYC_SAMPLE_CNTL_shift = 26, -+ CENTROIDS_ONLY = 0x00, -+ CENTERS_ONLY = 0x01, -+ CENTROIDS_AND_CENTERS = 0x02, -+ UNDEF = 0x03, -+ PERSP_GRADIENT_ENA_bit = 1 << 28, -+ LINEAR_GRADIENT_ENA_bit = 1 << 29, -+ POSITION_SAMPLE_bit = 1 << 30, -+ BARYC_AT_SAMPLE_ENA_bit = 1 << 31, -+ SPI_PS_IN_CONTROL_1 = 0x000286d0, -+ GEN_INDEX_PIX_bit = 1 << 0, -+ GEN_INDEX_PIX_ADDR_mask = 0x7f << 1, -+ GEN_INDEX_PIX_ADDR_shift = 1, -+ FRONT_FACE_ENA_bit = 1 << 8, -+ FRONT_FACE_CHAN_mask = 0x03 << 9, -+ FRONT_FACE_CHAN_shift = 9, -+ FRONT_FACE_ALL_BITS_bit = 1 << 11, -+ FRONT_FACE_ADDR_mask = 0x1f << 12, -+ FRONT_FACE_ADDR_shift = 12, -+ FOG_ADDR_mask = 0x7f << 17, -+ FOG_ADDR_shift = 17, -+ FIXED_PT_POSITION_ENA_bit = 1 << 24, -+ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25, -+ FIXED_PT_POSITION_ADDR_shift = 25, -+ SPI_INTERP_CONTROL_0 = 0x000286d4, -+ FLAT_SHADE_ENA_bit = 1 << 0, -+ PNT_SPRITE_ENA_bit = 1 << 1, -+ PNT_SPRITE_OVRD_X_mask = 0x07 << 2, -+ PNT_SPRITE_OVRD_X_shift = 2, -+ SPI_PNT_SPRITE_SEL_0 = 0x00, -+ SPI_PNT_SPRITE_SEL_1 = 0x01, -+ SPI_PNT_SPRITE_SEL_S = 0x02, -+ SPI_PNT_SPRITE_SEL_T = 0x03, -+ SPI_PNT_SPRITE_SEL_NONE = 0x04, -+ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5, -+ PNT_SPRITE_OVRD_Y_shift = 5, -+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ -+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ -+/* SPI_PNT_SPRITE_SEL_S = 0x02, */ -+/* SPI_PNT_SPRITE_SEL_T = 0x03, */ -+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ -+ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8, -+ PNT_SPRITE_OVRD_Z_shift = 8, -+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ -+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ -+/* SPI_PNT_SPRITE_SEL_S = 0x02, */ -+/* SPI_PNT_SPRITE_SEL_T = 0x03, */ -+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ -+ PNT_SPRITE_OVRD_W_mask = 0x07 << 11, -+ PNT_SPRITE_OVRD_W_shift = 11, -+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ -+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ -+/* SPI_PNT_SPRITE_SEL_S = 0x02, */ -+/* SPI_PNT_SPRITE_SEL_T = 0x03, */ -+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ -+ PNT_SPRITE_TOP_1_bit = 1 << 14, -+ SPI_INPUT_Z = 0x000286d8, -+ PROVIDE_Z_TO_SPI_bit = 1 << 0, -+ SPI_FOG_CNTL = 0x000286dc, -+ PASS_FOG_THROUGH_PS_bit = 1 << 0, -+ PIXEL_FOG_FUNC_mask = 0x03 << 1, -+ PIXEL_FOG_FUNC_shift = 1, -+ SPI_FOG_NONE = 0x00, -+ SPI_FOG_EXP = 0x01, -+ SPI_FOG_EXP2 = 0x02, -+ SPI_FOG_LINEAR = 0x03, -+ PIXEL_FOG_SRC_SEL_bit = 1 << 3, -+ VS_FOG_CLAMP_DISABLE_bit = 1 << 4, -+ SPI_FOG_FUNC_SCALE = 0x000286e0, -+ SPI_FOG_FUNC_BIAS = 0x000286e4, -+ CB_BLEND0_CONTROL = 0x00028780, -+ CB_BLEND0_CONTROL_num = 8, -+ COLOR_SRCBLEND_mask = 0x1f << 0, -+ COLOR_SRCBLEND_shift = 0, -+ COLOR_COMB_FCN_mask = 0x07 << 5, -+ COLOR_COMB_FCN_shift = 5, -+ COLOR_DESTBLEND_mask = 0x1f << 8, -+ COLOR_DESTBLEND_shift = 8, -+ OPACITY_WEIGHT_bit = 1 << 13, -+ ALPHA_SRCBLEND_mask = 0x1f << 16, -+ ALPHA_SRCBLEND_shift = 16, -+ ALPHA_COMB_FCN_mask = 0x07 << 21, -+ ALPHA_COMB_FCN_shift = 21, -+ ALPHA_DESTBLEND_mask = 0x1f << 24, -+ ALPHA_DESTBLEND_shift = 24, -+ SEPARATE_ALPHA_BLEND_bit = 1 << 29, -+ VGT_DMA_BASE_HI = 0x000287e4, -+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, -+ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0, -+ VGT_DMA_BASE = 0x000287e8, -+ VGT_DRAW_INITIATOR = 0x000287f0, -+ SOURCE_SELECT_mask = 0x03 << 0, -+ SOURCE_SELECT_shift = 0, -+ DI_SRC_SEL_DMA = 0x00, -+ DI_SRC_SEL_IMMEDIATE = 0x01, -+ DI_SRC_SEL_AUTO_INDEX = 0x02, -+ DI_SRC_SEL_RESERVED = 0x03, -+ MAJOR_MODE_mask = 0x03 << 2, -+ MAJOR_MODE_shift = 2, -+ DI_MAJOR_MODE_0 = 0x00, -+ DI_MAJOR_MODE_1 = 0x01, -+ SPRITE_EN_bit = 1 << 4, -+ NOT_EOP_bit = 1 << 5, -+ USE_OPAQUE_bit = 1 << 6, -+ VGT_IMMED_DATA = 0x000287f4, -+ VGT_EVENT_ADDRESS_REG = 0x000287f8, -+ ADDRESS_LOW_mask = 0xfffffff << 0, -+ ADDRESS_LOW_shift = 0, -+ DB_DEPTH_CONTROL = 0x00028800, -+ STENCIL_ENABLE_bit = 1 << 0, -+ Z_ENABLE_bit = 1 << 1, -+ Z_WRITE_ENABLE_bit = 1 << 2, -+ ZFUNC_mask = 0x07 << 4, -+ ZFUNC_shift = 4, -+ FRAG_NEVER = 0x00, -+ FRAG_LESS = 0x01, -+ FRAG_EQUAL = 0x02, -+ FRAG_LEQUAL = 0x03, -+ FRAG_GREATER = 0x04, -+ FRAG_NOTEQUAL = 0x05, -+ FRAG_GEQUAL = 0x06, -+ FRAG_ALWAYS = 0x07, -+ BACKFACE_ENABLE_bit = 1 << 7, -+ STENCILFUNC_mask = 0x07 << 8, -+ STENCILFUNC_shift = 8, -+/* REF_NEVER = 0x00, */ -+/* REF_LESS = 0x01, */ -+/* REF_EQUAL = 0x02, */ -+/* REF_LEQUAL = 0x03, */ -+/* REF_GREATER = 0x04, */ -+/* REF_NOTEQUAL = 0x05, */ -+/* REF_GEQUAL = 0x06, */ -+/* REF_ALWAYS = 0x07, */ -+ STENCILFAIL_mask = 0x07 << 11, -+ STENCILFAIL_shift = 11, -+ STENCIL_KEEP = 0x00, -+ STENCIL_ZERO = 0x01, -+ STENCIL_REPLACE = 0x02, -+ STENCIL_INCR_CLAMP = 0x03, -+ STENCIL_DECR_CLAMP = 0x04, -+ STENCIL_INVERT = 0x05, -+ STENCIL_INCR_WRAP = 0x06, -+ STENCIL_DECR_WRAP = 0x07, -+ STENCILZPASS_mask = 0x07 << 14, -+ STENCILZPASS_shift = 14, -+/* STENCIL_KEEP = 0x00, */ -+/* STENCIL_ZERO = 0x01, */ -+/* STENCIL_REPLACE = 0x02, */ -+/* STENCIL_INCR_CLAMP = 0x03, */ -+/* STENCIL_DECR_CLAMP = 0x04, */ -+/* STENCIL_INVERT = 0x05, */ -+/* STENCIL_INCR_WRAP = 0x06, */ -+/* STENCIL_DECR_WRAP = 0x07, */ -+ STENCILZFAIL_mask = 0x07 << 17, -+ STENCILZFAIL_shift = 17, -+/* STENCIL_KEEP = 0x00, */ -+/* STENCIL_ZERO = 0x01, */ -+/* STENCIL_REPLACE = 0x02, */ -+/* STENCIL_INCR_CLAMP = 0x03, */ -+/* STENCIL_DECR_CLAMP = 0x04, */ -+/* STENCIL_INVERT = 0x05, */ -+/* STENCIL_INCR_WRAP = 0x06, */ -+/* STENCIL_DECR_WRAP = 0x07, */ -+ STENCILFUNC_BF_mask = 0x07 << 20, -+ STENCILFUNC_BF_shift = 20, -+/* REF_NEVER = 0x00, */ -+/* REF_LESS = 0x01, */ -+/* REF_EQUAL = 0x02, */ -+/* REF_LEQUAL = 0x03, */ -+/* REF_GREATER = 0x04, */ -+/* REF_NOTEQUAL = 0x05, */ -+/* REF_GEQUAL = 0x06, */ -+/* REF_ALWAYS = 0x07, */ -+ STENCILFAIL_BF_mask = 0x07 << 23, -+ STENCILFAIL_BF_shift = 23, -+/* STENCIL_KEEP = 0x00, */ -+/* STENCIL_ZERO = 0x01, */ -+/* STENCIL_REPLACE = 0x02, */ -+/* STENCIL_INCR_CLAMP = 0x03, */ -+/* STENCIL_DECR_CLAMP = 0x04, */ -+/* STENCIL_INVERT = 0x05, */ -+/* STENCIL_INCR_WRAP = 0x06, */ -+/* STENCIL_DECR_WRAP = 0x07, */ -+ STENCILZPASS_BF_mask = 0x07 << 26, -+ STENCILZPASS_BF_shift = 26, -+/* STENCIL_KEEP = 0x00, */ -+/* STENCIL_ZERO = 0x01, */ -+/* STENCIL_REPLACE = 0x02, */ -+/* STENCIL_INCR_CLAMP = 0x03, */ -+/* STENCIL_DECR_CLAMP = 0x04, */ -+/* STENCIL_INVERT = 0x05, */ -+/* STENCIL_INCR_WRAP = 0x06, */ -+/* STENCIL_DECR_WRAP = 0x07, */ -+ STENCILZFAIL_BF_mask = 0x07 << 29, -+ STENCILZFAIL_BF_shift = 29, -+/* STENCIL_KEEP = 0x00, */ -+/* STENCIL_ZERO = 0x01, */ -+/* STENCIL_REPLACE = 0x02, */ -+/* STENCIL_INCR_CLAMP = 0x03, */ -+/* STENCIL_DECR_CLAMP = 0x04, */ -+/* STENCIL_INVERT = 0x05, */ -+/* STENCIL_INCR_WRAP = 0x06, */ -+/* STENCIL_DECR_WRAP = 0x07, */ -+ CB_BLEND_CONTROL = 0x00028804, -+/* COLOR_SRCBLEND_mask = 0x1f << 0, */ -+/* COLOR_SRCBLEND_shift = 0, */ -+ BLEND_ZERO = 0x00, -+ BLEND_ONE = 0x01, -+ BLEND_SRC_COLOR = 0x02, -+ BLEND_ONE_MINUS_SRC_COLOR = 0x03, -+ BLEND_SRC_ALPHA = 0x04, -+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05, -+ BLEND_DST_ALPHA = 0x06, -+ BLEND_ONE_MINUS_DST_ALPHA = 0x07, -+ BLEND_DST_COLOR = 0x08, -+ BLEND_ONE_MINUS_DST_COLOR = 0x09, -+ BLEND_SRC_ALPHA_SATURATE = 0x0a, -+ BLEND_BOTH_SRC_ALPHA = 0x0b, -+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c, -+ BLEND_CONSTANT_COLOR = 0x0d, -+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, -+ BLEND_SRC1_COLOR = 0x0f, -+ BLEND_INV_SRC1_COLOR = 0x10, -+ BLEND_SRC1_ALPHA = 0x11, -+ BLEND_INV_SRC1_ALPHA = 0x12, -+ BLEND_CONSTANT_ALPHA = 0x13, -+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, -+/* COLOR_COMB_FCN_mask = 0x07 << 5, */ -+/* COLOR_COMB_FCN_shift = 5, */ -+ COMB_DST_PLUS_SRC = 0x00, -+ COMB_SRC_MINUS_DST = 0x01, -+ COMB_MIN_DST_SRC = 0x02, -+ COMB_MAX_DST_SRC = 0x03, -+ COMB_DST_MINUS_SRC = 0x04, -+/* COLOR_DESTBLEND_mask = 0x1f << 8, */ -+/* COLOR_DESTBLEND_shift = 8, */ -+/* BLEND_ZERO = 0x00, */ -+/* BLEND_ONE = 0x01, */ -+/* BLEND_SRC_COLOR = 0x02, */ -+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ -+/* BLEND_SRC_ALPHA = 0x04, */ -+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ -+/* BLEND_DST_ALPHA = 0x06, */ -+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ -+/* BLEND_DST_COLOR = 0x08, */ -+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ -+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ -+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ -+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ -+/* BLEND_CONSTANT_COLOR = 0x0d, */ -+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ -+/* BLEND_SRC1_COLOR = 0x0f, */ -+/* BLEND_INV_SRC1_COLOR = 0x10, */ -+/* BLEND_SRC1_ALPHA = 0x11, */ -+/* BLEND_INV_SRC1_ALPHA = 0x12, */ -+/* BLEND_CONSTANT_ALPHA = 0x13, */ -+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ -+/* OPACITY_WEIGHT_bit = 1 << 13, */ -+/* ALPHA_SRCBLEND_mask = 0x1f << 16, */ -+/* ALPHA_SRCBLEND_shift = 16, */ -+/* BLEND_ZERO = 0x00, */ -+/* BLEND_ONE = 0x01, */ -+/* BLEND_SRC_COLOR = 0x02, */ -+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ -+/* BLEND_SRC_ALPHA = 0x04, */ -+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ -+/* BLEND_DST_ALPHA = 0x06, */ -+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ -+/* BLEND_DST_COLOR = 0x08, */ -+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ -+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ -+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ -+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ -+/* BLEND_CONSTANT_COLOR = 0x0d, */ -+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ -+/* BLEND_SRC1_COLOR = 0x0f, */ -+/* BLEND_INV_SRC1_COLOR = 0x10, */ -+/* BLEND_SRC1_ALPHA = 0x11, */ -+/* BLEND_INV_SRC1_ALPHA = 0x12, */ -+/* BLEND_CONSTANT_ALPHA = 0x13, */ -+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ -+/* ALPHA_COMB_FCN_mask = 0x07 << 21, */ -+/* ALPHA_COMB_FCN_shift = 21, */ -+/* COMB_DST_PLUS_SRC = 0x00, */ -+/* COMB_SRC_MINUS_DST = 0x01, */ -+/* COMB_MIN_DST_SRC = 0x02, */ -+/* COMB_MAX_DST_SRC = 0x03, */ -+/* COMB_DST_MINUS_SRC = 0x04, */ -+/* ALPHA_DESTBLEND_mask = 0x1f << 24, */ -+/* ALPHA_DESTBLEND_shift = 24, */ -+/* BLEND_ZERO = 0x00, */ -+/* BLEND_ONE = 0x01, */ -+/* BLEND_SRC_COLOR = 0x02, */ -+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ -+/* BLEND_SRC_ALPHA = 0x04, */ -+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ -+/* BLEND_DST_ALPHA = 0x06, */ -+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ -+/* BLEND_DST_COLOR = 0x08, */ -+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ -+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ -+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ -+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ -+/* BLEND_CONSTANT_COLOR = 0x0d, */ -+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ -+/* BLEND_SRC1_COLOR = 0x0f, */ -+/* BLEND_INV_SRC1_COLOR = 0x10, */ -+/* BLEND_SRC1_ALPHA = 0x11, */ -+/* BLEND_INV_SRC1_ALPHA = 0x12, */ -+/* BLEND_CONSTANT_ALPHA = 0x13, */ -+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ -+/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */ -+ CB_COLOR_CONTROL = 0x00028808, -+ FOG_ENABLE_bit = 1 << 0, -+ MULTIWRITE_ENABLE_bit = 1 << 1, -+ DITHER_ENABLE_bit = 1 << 2, -+ DEGAMMA_ENABLE_bit = 1 << 3, -+ SPECIAL_OP_mask = 0x07 << 4, -+ SPECIAL_OP_shift = 4, -+ SPECIAL_NORMAL = 0x00, -+ SPECIAL_DISABLE = 0x01, -+ SPECIAL_FAST_CLEAR = 0x02, -+ SPECIAL_FORCE_CLEAR = 0x03, -+ SPECIAL_EXPAND_COLOR = 0x04, -+ SPECIAL_EXPAND_TEXTURE = 0x05, -+ SPECIAL_EXPAND_SAMPLES = 0x06, -+ SPECIAL_RESOLVE_BOX = 0x07, -+ PER_MRT_BLEND_bit = 1 << 7, -+ TARGET_BLEND_ENABLE_mask = 0xff << 8, -+ TARGET_BLEND_ENABLE_shift = 8, -+ ROP3_mask = 0xff << 16, -+ ROP3_shift = 16, -+ DB_SHADER_CONTROL = 0x0002880c, -+ Z_EXPORT_ENABLE_bit = 1 << 0, -+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, -+ Z_ORDER_mask = 0x03 << 4, -+ Z_ORDER_shift = 4, -+ LATE_Z = 0x00, -+ EARLY_Z_THEN_LATE_Z = 0x01, -+ RE_Z = 0x02, -+ EARLY_Z_THEN_RE_Z = 0x03, -+ KILL_ENABLE_bit = 1 << 6, -+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, -+ MASK_EXPORT_ENABLE_bit = 1 << 8, -+ DUAL_EXPORT_ENABLE_bit = 1 << 9, -+ EXEC_ON_HIER_FAIL_bit = 1 << 10, -+ EXEC_ON_NOOP_bit = 1 << 11, -+ PA_CL_CLIP_CNTL = 0x00028810, -+ UCP_ENA_0_bit = 1 << 0, -+ UCP_ENA_1_bit = 1 << 1, -+ UCP_ENA_2_bit = 1 << 2, -+ UCP_ENA_3_bit = 1 << 3, -+ UCP_ENA_4_bit = 1 << 4, -+ UCP_ENA_5_bit = 1 << 5, -+ PS_UCP_Y_SCALE_NEG_bit = 1 << 13, -+ PS_UCP_MODE_mask = 0x03 << 14, -+ PS_UCP_MODE_shift = 14, -+ CLIP_DISABLE_bit = 1 << 16, -+ UCP_CULL_ONLY_ENA_bit = 1 << 17, -+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, -+ DX_CLIP_SPACE_DEF_bit = 1 << 19, -+ DIS_CLIP_ERR_DETECT_bit = 1 << 20, -+ VTX_KILL_OR_bit = 1 << 21, -+ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24, -+ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25, -+ ZCLIP_NEAR_DISABLE_bit = 1 << 26, -+ ZCLIP_FAR_DISABLE_bit = 1 << 27, -+ PA_SU_SC_MODE_CNTL = 0x00028814, -+ CULL_FRONT_bit = 1 << 0, -+ CULL_BACK_bit = 1 << 1, -+ FACE_bit = 1 << 2, -+ POLY_MODE_mask = 0x03 << 3, -+ POLY_MODE_shift = 3, -+ X_DISABLE_POLY_MODE = 0x00, -+ X_DUAL_MODE = 0x01, -+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, -+ POLYMODE_FRONT_PTYPE_shift = 5, -+ X_DRAW_POINTS = 0x00, -+ X_DRAW_LINES = 0x01, -+ X_DRAW_TRIANGLES = 0x02, -+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8, -+ POLYMODE_BACK_PTYPE_shift = 8, -+/* X_DRAW_POINTS = 0x00, */ -+/* X_DRAW_LINES = 0x01, */ -+/* X_DRAW_TRIANGLES = 0x02, */ -+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, -+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, -+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, -+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, -+ PROVOKING_VTX_LAST_bit = 1 << 19, -+ PERSP_CORR_DIS_bit = 1 << 20, -+ MULTI_PRIM_IB_ENA_bit = 1 << 21, -+ PA_CL_VTE_CNTL = 0x00028818, -+ VPORT_X_SCALE_ENA_bit = 1 << 0, -+ VPORT_X_OFFSET_ENA_bit = 1 << 1, -+ VPORT_Y_SCALE_ENA_bit = 1 << 2, -+ VPORT_Y_OFFSET_ENA_bit = 1 << 3, -+ VPORT_Z_SCALE_ENA_bit = 1 << 4, -+ VPORT_Z_OFFSET_ENA_bit = 1 << 5, -+ VTX_XY_FMT_bit = 1 << 8, -+ VTX_Z_FMT_bit = 1 << 9, -+ VTX_W0_FMT_bit = 1 << 10, -+ PERFCOUNTER_REF_bit = 1 << 11, -+ PA_CL_VS_OUT_CNTL = 0x0002881c, -+ CLIP_DIST_ENA_0_bit = 1 << 0, -+ CLIP_DIST_ENA_1_bit = 1 << 1, -+ CLIP_DIST_ENA_2_bit = 1 << 2, -+ CLIP_DIST_ENA_3_bit = 1 << 3, -+ CLIP_DIST_ENA_4_bit = 1 << 4, -+ CLIP_DIST_ENA_5_bit = 1 << 5, -+ CLIP_DIST_ENA_6_bit = 1 << 6, -+ CLIP_DIST_ENA_7_bit = 1 << 7, -+ CULL_DIST_ENA_0_bit = 1 << 8, -+ CULL_DIST_ENA_1_bit = 1 << 9, -+ CULL_DIST_ENA_2_bit = 1 << 10, -+ CULL_DIST_ENA_3_bit = 1 << 11, -+ CULL_DIST_ENA_4_bit = 1 << 12, -+ CULL_DIST_ENA_5_bit = 1 << 13, -+ CULL_DIST_ENA_6_bit = 1 << 14, -+ CULL_DIST_ENA_7_bit = 1 << 15, -+ USE_VTX_POINT_SIZE_bit = 1 << 16, -+ USE_VTX_EDGE_FLAG_bit = 1 << 17, -+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, -+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19, -+ USE_VTX_KILL_FLAG_bit = 1 << 20, -+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21, -+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, -+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, -+ PA_CL_NANINF_CNTL = 0x00028820, -+ VTE_XY_INF_DISCARD_bit = 1 << 0, -+ VTE_Z_INF_DISCARD_bit = 1 << 1, -+ VTE_W_INF_DISCARD_bit = 1 << 2, -+ VTE_0XNANINF_IS_0_bit = 1 << 3, -+ VTE_XY_NAN_RETAIN_bit = 1 << 4, -+ VTE_Z_NAN_RETAIN_bit = 1 << 5, -+ VTE_W_NAN_RETAIN_bit = 1 << 6, -+ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7, -+ VS_XY_NAN_TO_INF_bit = 1 << 8, -+ VS_XY_INF_RETAIN_bit = 1 << 9, -+ VS_Z_NAN_TO_INF_bit = 1 << 10, -+ VS_Z_INF_RETAIN_bit = 1 << 11, -+ VS_W_NAN_TO_INF_bit = 1 << 12, -+ VS_W_INF_RETAIN_bit = 1 << 13, -+ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14, -+ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20, -+ SQ_PGM_START_PS = 0x00028840, -+ SQ_PGM_RESOURCES_PS = 0x00028850, -+ NUM_GPRS_mask = 0xff << 0, -+ NUM_GPRS_shift = 0, -+ STACK_SIZE_mask = 0xff << 8, -+ STACK_SIZE_shift = 8, -+ SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21, -+ FETCH_CACHE_LINES_mask = 0x07 << 24, -+ FETCH_CACHE_LINES_shift = 24, -+ UNCACHED_FIRST_INST_bit = 1 << 28, -+ CLAMP_CONSTS_bit = 1 << 31, -+ SQ_PGM_EXPORTS_PS = 0x00028854, -+ EXPORT_MODE_mask = 0x1f << 0, -+ EXPORT_MODE_shift = 0, -+ SQ_PGM_START_VS = 0x00028858, -+ SQ_PGM_RESOURCES_VS = 0x00028868, -+/* NUM_GPRS_mask = 0xff << 0, */ -+/* NUM_GPRS_shift = 0, */ -+/* STACK_SIZE_mask = 0xff << 8, */ -+/* STACK_SIZE_shift = 8, */ -+ SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21, -+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ -+/* FETCH_CACHE_LINES_shift = 24, */ -+/* UNCACHED_FIRST_INST_bit = 1 << 28, */ -+ SQ_PGM_START_GS = 0x0002886c, -+ SQ_PGM_RESOURCES_GS = 0x0002887c, -+/* NUM_GPRS_mask = 0xff << 0, */ -+/* NUM_GPRS_shift = 0, */ -+/* STACK_SIZE_mask = 0xff << 8, */ -+/* STACK_SIZE_shift = 8, */ -+ SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21, -+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ -+/* FETCH_CACHE_LINES_shift = 24, */ -+/* UNCACHED_FIRST_INST_bit = 1 << 28, */ -+ SQ_PGM_START_ES = 0x00028880, -+ SQ_PGM_RESOURCES_ES = 0x00028890, -+/* NUM_GPRS_mask = 0xff << 0, */ -+/* NUM_GPRS_shift = 0, */ -+/* STACK_SIZE_mask = 0xff << 8, */ -+/* STACK_SIZE_shift = 8, */ -+ SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21, -+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ -+/* FETCH_CACHE_LINES_shift = 24, */ -+/* UNCACHED_FIRST_INST_bit = 1 << 28, */ -+ SQ_PGM_START_FS = 0x00028894, -+ SQ_PGM_RESOURCES_FS = 0x000288a4, -+/* NUM_GPRS_mask = 0xff << 0, */ -+/* NUM_GPRS_shift = 0, */ -+/* STACK_SIZE_mask = 0xff << 8, */ -+/* STACK_SIZE_shift = 8, */ -+ SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21, -+ SQ_ESGS_RING_ITEMSIZE = 0x000288a8, -+ ITEMSIZE_mask = 0x7fff << 0, -+ ITEMSIZE_shift = 0, -+ SQ_GSVS_RING_ITEMSIZE = 0x000288ac, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_ESTMP_RING_ITEMSIZE = 0x000288b0, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_GSTMP_RING_ITEMSIZE = 0x000288b4, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_VSTMP_RING_ITEMSIZE = 0x000288b8, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_PSTMP_RING_ITEMSIZE = 0x000288bc, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_FBUF_RING_ITEMSIZE = 0x000288c0, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_REDUC_RING_ITEMSIZE = 0x000288c4, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_GS_VERT_ITEMSIZE = 0x000288c8, -+/* ITEMSIZE_mask = 0x7fff << 0, */ -+/* ITEMSIZE_shift = 0, */ -+ SQ_PGM_CF_OFFSET_PS = 0x000288cc, -+ PGM_CF_OFFSET_mask = 0xfffff << 0, -+ PGM_CF_OFFSET_shift = 0, -+ SQ_PGM_CF_OFFSET_VS = 0x000288d0, -+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ -+/* PGM_CF_OFFSET_shift = 0, */ -+ SQ_PGM_CF_OFFSET_GS = 0x000288d4, -+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ -+/* PGM_CF_OFFSET_shift = 0, */ -+ SQ_PGM_CF_OFFSET_ES = 0x000288d8, -+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ -+/* PGM_CF_OFFSET_shift = 0, */ -+ SQ_PGM_CF_OFFSET_FS = 0x000288dc, -+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ -+/* PGM_CF_OFFSET_shift = 0, */ -+ SQ_VTX_SEMANTIC_CLEAR = 0x000288e0, -+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, -+ SQ_ALU_CONST_CACHE_PS_0_num = 16, -+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, -+ SQ_ALU_CONST_CACHE_VS_0_num = 16, -+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, -+ SQ_ALU_CONST_CACHE_GS_0_num = 16, -+ PA_SU_POINT_SIZE = 0x00028a00, -+ PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0, -+ PA_SU_POINT_SIZE__HEIGHT_shift = 0, -+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, -+ PA_SU_POINT_SIZE__WIDTH_shift = 16, -+ PA_SU_POINT_MINMAX = 0x00028a04, -+ MIN_SIZE_mask = 0xffff << 0, -+ MIN_SIZE_shift = 0, -+ MAX_SIZE_mask = 0xffff << 16, -+ MAX_SIZE_shift = 16, -+ PA_SU_LINE_CNTL = 0x00028a08, -+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, -+ PA_SU_LINE_CNTL__WIDTH_shift = 0, -+ PA_SC_LINE_STIPPLE = 0x00028a0c, -+ LINE_PATTERN_mask = 0xffff << 0, -+ LINE_PATTERN_shift = 0, -+ REPEAT_COUNT_mask = 0xff << 16, -+ REPEAT_COUNT_shift = 16, -+ PATTERN_BIT_ORDER_bit = 1 << 28, -+ AUTO_RESET_CNTL_mask = 0x03 << 29, -+ AUTO_RESET_CNTL_shift = 29, -+ VGT_OUTPUT_PATH_CNTL = 0x00028a10, -+ PATH_SELECT_mask = 0x03 << 0, -+ PATH_SELECT_shift = 0, -+ VGT_OUTPATH_VTX_REUSE = 0x00, -+ VGT_OUTPATH_TESS_EN = 0x01, -+ VGT_OUTPATH_PASSTHRU = 0x02, -+ VGT_OUTPATH_GS_BLOCK = 0x03, -+ VGT_HOS_CNTL = 0x00028a14, -+ TESS_MODE_mask = 0x03 << 0, -+ TESS_MODE_shift = 0, -+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, -+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, -+ VGT_HOS_REUSE_DEPTH = 0x00028a20, -+ REUSE_DEPTH_mask = 0xff << 0, -+ REUSE_DEPTH_shift = 0, -+ VGT_GROUP_PRIM_TYPE = 0x00028a24, -+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, -+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0, -+ VGT_GRP_3D_POINT = 0x00, -+ VGT_GRP_3D_LINE = 0x01, -+ VGT_GRP_3D_TRI = 0x02, -+ VGT_GRP_3D_RECT = 0x03, -+ VGT_GRP_3D_QUAD = 0x04, -+ VGT_GRP_2D_COPY_RECT_V0 = 0x05, -+ VGT_GRP_2D_COPY_RECT_V1 = 0x06, -+ VGT_GRP_2D_COPY_RECT_V2 = 0x07, -+ VGT_GRP_2D_COPY_RECT_V3 = 0x08, -+ VGT_GRP_2D_FILL_RECT = 0x09, -+ VGT_GRP_2D_LINE = 0x0a, -+ VGT_GRP_2D_TRI = 0x0b, -+ VGT_GRP_PRIM_INDEX_LINE = 0x0c, -+ VGT_GRP_PRIM_INDEX_TRI = 0x0d, -+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e, -+ VGT_GRP_3D_LINE_ADJ = 0x0f, -+ VGT_GRP_3D_TRI_ADJ = 0x10, -+ RETAIN_ORDER_bit = 1 << 14, -+ RETAIN_QUADS_bit = 1 << 15, -+ PRIM_ORDER_mask = 0x07 << 16, -+ PRIM_ORDER_shift = 16, -+ VGT_GRP_LIST = 0x00, -+ VGT_GRP_STRIP = 0x01, -+ VGT_GRP_FAN = 0x02, -+ VGT_GRP_LOOP = 0x03, -+ VGT_GRP_POLYGON = 0x04, -+ VGT_GROUP_FIRST_DECR = 0x00028a28, -+ FIRST_DECR_mask = 0x0f << 0, -+ FIRST_DECR_shift = 0, -+ VGT_GROUP_DECR = 0x00028a2c, -+ DECR_mask = 0x0f << 0, -+ DECR_shift = 0, -+ VGT_GROUP_VECT_0_CNTL = 0x00028a30, -+ COMP_X_EN_bit = 1 << 0, -+ COMP_Y_EN_bit = 1 << 1, -+ COMP_Z_EN_bit = 1 << 2, -+ COMP_W_EN_bit = 1 << 3, -+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, -+ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8, -+ SHIFT_mask = 0xff << 16, -+ SHIFT_shift = 16, -+ VGT_GROUP_VECT_1_CNTL = 0x00028a34, -+/* COMP_X_EN_bit = 1 << 0, */ -+/* COMP_Y_EN_bit = 1 << 1, */ -+/* COMP_Z_EN_bit = 1 << 2, */ -+/* COMP_W_EN_bit = 1 << 3, */ -+ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8, -+ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8, -+/* SHIFT_mask = 0xff << 16, */ -+/* SHIFT_shift = 16, */ -+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, -+ X_CONV_mask = 0x0f << 0, -+ X_CONV_shift = 0, -+ VGT_GRP_INDEX_16 = 0x00, -+ VGT_GRP_INDEX_32 = 0x01, -+ VGT_GRP_UINT_16 = 0x02, -+ VGT_GRP_UINT_32 = 0x03, -+ VGT_GRP_SINT_16 = 0x04, -+ VGT_GRP_SINT_32 = 0x05, -+ VGT_GRP_FLOAT_32 = 0x06, -+ VGT_GRP_AUTO_PRIM = 0x07, -+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, -+ X_OFFSET_mask = 0x0f << 4, -+ X_OFFSET_shift = 4, -+ Y_CONV_mask = 0x0f << 8, -+ Y_CONV_shift = 8, -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+ Y_OFFSET_mask = 0x0f << 12, -+ Y_OFFSET_shift = 12, -+ Z_CONV_mask = 0x0f << 16, -+ Z_CONV_shift = 16, -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+ Z_OFFSET_mask = 0x0f << 20, -+ Z_OFFSET_shift = 20, -+ W_CONV_mask = 0x0f << 24, -+ W_CONV_shift = 24, -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+ W_OFFSET_mask = 0x0f << 28, -+ W_OFFSET_shift = 28, -+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, -+/* X_CONV_mask = 0x0f << 0, */ -+/* X_CONV_shift = 0, */ -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+/* X_OFFSET_mask = 0x0f << 4, */ -+/* X_OFFSET_shift = 4, */ -+/* Y_CONV_mask = 0x0f << 8, */ -+/* Y_CONV_shift = 8, */ -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+/* Y_OFFSET_mask = 0x0f << 12, */ -+/* Y_OFFSET_shift = 12, */ -+/* Z_CONV_mask = 0x0f << 16, */ -+/* Z_CONV_shift = 16, */ -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+/* Z_OFFSET_mask = 0x0f << 20, */ -+/* Z_OFFSET_shift = 20, */ -+/* W_CONV_mask = 0x0f << 24, */ -+/* W_CONV_shift = 24, */ -+/* VGT_GRP_INDEX_16 = 0x00, */ -+/* VGT_GRP_INDEX_32 = 0x01, */ -+/* VGT_GRP_UINT_16 = 0x02, */ -+/* VGT_GRP_UINT_32 = 0x03, */ -+/* VGT_GRP_SINT_16 = 0x04, */ -+/* VGT_GRP_SINT_32 = 0x05, */ -+/* VGT_GRP_FLOAT_32 = 0x06, */ -+/* VGT_GRP_AUTO_PRIM = 0x07, */ -+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ -+/* W_OFFSET_mask = 0x0f << 28, */ -+/* W_OFFSET_shift = 28, */ -+ VGT_GS_MODE = 0x00028a40, -+ MODE_mask = 0x03 << 0, -+ MODE_shift = 0, -+ GS_OFF = 0x00, -+ GS_SCENARIO_A = 0x01, -+ GS_SCENARIO_B = 0x02, -+ GS_SCENARIO_G = 0x03, -+ ES_PASSTHRU_bit = 1 << 2, -+ CUT_MODE_mask = 0x03 << 3, -+ CUT_MODE_shift = 3, -+ GS_CUT_1024 = 0x00, -+ GS_CUT_512 = 0x01, -+ GS_CUT_256 = 0x02, -+ GS_CUT_128 = 0x03, -+ PA_SC_MPASS_PS_CNTL = 0x00028a48, -+ MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0, -+ MPASS_PIX_VEC_PER_PASS_shift = 0, -+ MPASS_PS_ENA_bit = 1 << 31, -+ PA_SC_MODE_CNTL = 0x00028a4c, -+ MSAA_ENABLE_bit = 1 << 0, -+ CLIPRECT_ENABLE_bit = 1 << 1, -+ LINE_STIPPLE_ENABLE_bit = 1 << 2, -+ MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3, -+ WALK_ORDER_ENABLE_bit = 1 << 4, -+ HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5, -+ WALK_SIZE_bit = 1 << 6, -+ WALK_ALIGNMENT_bit = 1 << 7, -+ WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8, -+ TILE_COVER_NO_SCISSOR_bit = 1 << 9, -+ KILL_PIX_POST_HI_Z_bit = 1 << 10, -+ KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11, -+ MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12, -+ TILE_COVER_DISABLE_bit = 1 << 13, -+ FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14, -+ FORCE_EOV_TILE_ENABLE_bit = 1 << 15, -+ FORCE_EOV_REZ_ENABLE_bit = 1 << 16, -+ PS_ITER_SAMPLE_bit = 1 << 17, -+ VGT_ENHANCE = 0x00028a50, -+ VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0, -+ VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0, -+ X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00, -+ X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01, -+ X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02, -+ X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03, -+ MISC_mask = 0x3fffffff << 2, -+ MISC_shift = 2, -+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, -+ OUTPRIM_TYPE_mask = 0x3f << 0, -+ OUTPRIM_TYPE_shift = 0, -+ POINTLIST = 0x00, -+ LINESTRIP = 0x01, -+ TRISTRIP = 0x02, -+ VGT_DMA_SIZE = 0x00028a74, -+ VGT_DMA_INDEX_TYPE = 0x00028a7c, -+/* INDEX_TYPE_mask = 0x03 << 0, */ -+/* INDEX_TYPE_shift = 0, */ -+ VGT_INDEX_16 = 0x00, -+ VGT_INDEX_32 = 0x01, -+ SWAP_MODE_mask = 0x03 << 2, -+ SWAP_MODE_shift = 2, -+ VGT_DMA_SWAP_NONE = 0x00, -+ VGT_DMA_SWAP_16_BIT = 0x01, -+ VGT_DMA_SWAP_32_BIT = 0x02, -+ VGT_DMA_SWAP_WORD = 0x03, -+ VGT_PRIMITIVEID_EN = 0x00028a84, -+ PRIMITIVEID_EN_bit = 1 << 0, -+ VGT_DMA_NUM_INSTANCES = 0x00028a88, -+ VGT_EVENT_INITIATOR = 0x00028a90, -+ EVENT_TYPE_mask = 0x3f << 0, -+ EVENT_TYPE_shift = 0, -+ CACHE_FLUSH_TS = 0x04, -+ CONTEXT_DONE = 0x05, -+ CACHE_FLUSH = 0x06, -+ VIZQUERY_START = 0x07, -+ VIZQUERY_END = 0x08, -+ SC_WAIT_WC = 0x09, -+ MPASS_PS_CP_REFETCH = 0x0a, -+ MPASS_PS_RST_START = 0x0b, -+ MPASS_PS_INCR_START = 0x0c, -+ RST_PIX_CNT = 0x0d, -+ RST_VTX_CNT = 0x0e, -+ VS_PARTIAL_FLUSH = 0x0f, -+ PS_PARTIAL_FLUSH = 0x10, -+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, -+ ZPASS_DONE = 0x15, -+ CACHE_FLUSH_AND_INV_EVENT = 0x16, -+ PERFCOUNTER_START = 0x17, -+ PERFCOUNTER_STOP = 0x18, -+ PIPELINESTAT_START = 0x19, -+ PIPELINESTAT_STOP = 0x1a, -+ PERFCOUNTER_SAMPLE = 0x1b, -+ FLUSH_ES_OUTPUT = 0x1c, -+ FLUSH_GS_OUTPUT = 0x1d, -+ SAMPLE_PIPELINESTAT = 0x1e, -+ SO_VGTSTREAMOUT_FLUSH = 0x1f, -+ SAMPLE_STREAMOUTSTATS = 0x20, -+ RESET_VTX_CNT = 0x21, -+ BLOCK_CONTEXT_DONE = 0x22, -+ CR_CONTEXT_DONE = 0x23, -+ VGT_FLUSH = 0x24, -+ CR_DONE_TS = 0x25, -+ SQ_NON_EVENT = 0x26, -+ SC_SEND_DB_VPZ = 0x27, -+ BOTTOM_OF_PIPE_TS = 0x28, -+ DB_CACHE_FLUSH_AND_INV = 0x2a, -+ ADDRESS_HI_mask = 0xff << 19, -+ ADDRESS_HI_shift = 19, -+ EXTENDED_EVENT_bit = 1 << 27, -+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, -+ RESET_EN_bit = 1 << 0, -+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, -+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, -+ VGT_STRMOUT_EN = 0x00028ab0, -+ STREAMOUT_bit = 1 << 0, -+ VGT_REUSE_OFF = 0x00028ab4, -+ REUSE_OFF_bit = 1 << 0, -+ VGT_VTX_CNT_EN = 0x00028ab8, -+ VTX_CNT_EN_bit = 1 << 0, -+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, -+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, -+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, -+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0, -+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, -+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, -+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, -+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, -+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, -+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0, -+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, -+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, -+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, -+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, -+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, -+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0, -+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, -+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, -+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, -+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, -+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, -+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0, -+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, -+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, -+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, -+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, -+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, -+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, -+ VGT_STRMOUT_BUFFER_EN = 0x00028b20, -+ BUFFER_0_EN_bit = 1 << 0, -+ BUFFER_1_EN_bit = 1 << 1, -+ BUFFER_2_EN_bit = 1 << 2, -+ BUFFER_3_EN_bit = 1 << 3, -+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, -+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, -+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, -+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, -+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, -+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, -+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, -+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, -+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0, -+ PA_SC_LINE_CNTL = 0x00028c00, -+ BRES_CNTL_mask = 0xff << 0, -+ BRES_CNTL_shift = 0, -+ USE_BRES_CNTL_bit = 1 << 8, -+ EXPAND_LINE_WIDTH_bit = 1 << 9, -+ LAST_PIXEL_bit = 1 << 10, -+ PA_SC_AA_CONFIG = 0x00028c04, -+ MSAA_NUM_SAMPLES_mask = 0x03 << 0, -+ MSAA_NUM_SAMPLES_shift = 0, -+ AA_MASK_CENTROID_DTMN_bit = 1 << 4, -+ MAX_SAMPLE_DIST_mask = 0x0f << 13, -+ MAX_SAMPLE_DIST_shift = 13, -+ PA_SU_VTX_CNTL = 0x00028c08, -+ PIX_CENTER_bit = 1 << 0, -+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, -+ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1, -+ X_TRUNCATE = 0x00, -+ X_ROUND = 0x01, -+ X_ROUND_TO_EVEN = 0x02, -+ X_ROUND_TO_ODD = 0x03, -+ QUANT_MODE_mask = 0x07 << 3, -+ QUANT_MODE_shift = 3, -+ X_1_16TH = 0x00, -+ X_1_8TH = 0x01, -+ X_1_4TH = 0x02, -+ X_1_2 = 0x03, -+ X_1 = 0x04, -+ X_1_256TH = 0x05, -+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, -+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, -+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, -+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, -+ PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c, -+/* S0_X_mask = 0x0f << 0, */ -+/* S0_X_shift = 0, */ -+/* S0_Y_mask = 0x0f << 4, */ -+/* S0_Y_shift = 4, */ -+/* S1_X_mask = 0x0f << 8, */ -+/* S1_X_shift = 8, */ -+/* S1_Y_mask = 0x0f << 12, */ -+/* S1_Y_shift = 12, */ -+/* S2_X_mask = 0x0f << 16, */ -+/* S2_X_shift = 16, */ -+/* S2_Y_mask = 0x0f << 20, */ -+/* S2_Y_shift = 20, */ -+/* S3_X_mask = 0x0f << 24, */ -+/* S3_X_shift = 24, */ -+/* S3_Y_mask = 0x0f << 28, */ -+/* S3_Y_shift = 28, */ -+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20, -+/* S4_X_mask = 0x0f << 0, */ -+/* S4_X_shift = 0, */ -+/* S4_Y_mask = 0x0f << 4, */ -+/* S4_Y_shift = 4, */ -+/* S5_X_mask = 0x0f << 8, */ -+/* S5_X_shift = 8, */ -+/* S5_Y_mask = 0x0f << 12, */ -+/* S5_Y_shift = 12, */ -+/* S6_X_mask = 0x0f << 16, */ -+/* S6_X_shift = 16, */ -+/* S6_Y_mask = 0x0f << 20, */ -+/* S6_Y_shift = 20, */ -+/* S7_X_mask = 0x0f << 24, */ -+/* S7_X_shift = 24, */ -+/* S7_Y_mask = 0x0f << 28, */ -+/* S7_Y_shift = 28, */ -+ CB_CLRCMP_CONTROL = 0x00028c30, -+ CLRCMP_FCN_SRC_mask = 0x07 << 0, -+ CLRCMP_FCN_SRC_shift = 0, -+ CLRCMP_DRAW_ALWAYS = 0x00, -+ CLRCMP_DRAW_NEVER = 0x01, -+ CLRCMP_DRAW_ON_NEQ = 0x04, -+ CLRCMP_DRAW_ON_EQ = 0x05, -+ CLRCMP_FCN_DST_mask = 0x07 << 8, -+ CLRCMP_FCN_DST_shift = 8, -+/* CLRCMP_DRAW_ALWAYS = 0x00, */ -+/* CLRCMP_DRAW_NEVER = 0x01, */ -+/* CLRCMP_DRAW_ON_NEQ = 0x04, */ -+/* CLRCMP_DRAW_ON_EQ = 0x05, */ -+ CLRCMP_FCN_SEL_mask = 0x03 << 24, -+ CLRCMP_FCN_SEL_shift = 24, -+ CLRCMP_SEL_DST = 0x00, -+ CLRCMP_SEL_SRC = 0x01, -+ CLRCMP_SEL_AND = 0x02, -+ CB_CLRCMP_SRC = 0x00028c34, -+ CB_CLRCMP_DST = 0x00028c38, -+ CB_CLRCMP_MSK = 0x00028c3c, -+ PA_SC_AA_MASK = 0x00028c48, -+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, -+ VTX_REUSE_DEPTH_mask = 0xff << 0, -+ VTX_REUSE_DEPTH_shift = 0, -+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c, -+ DEALLOC_DIST_mask = 0x7f << 0, -+ DEALLOC_DIST_shift = 0, -+ DB_RENDER_CONTROL = 0x00028d0c, -+ DEPTH_CLEAR_ENABLE_bit = 1 << 0, -+ STENCIL_CLEAR_ENABLE_bit = 1 << 1, -+ DEPTH_COPY_bit = 1 << 2, -+ STENCIL_COPY_bit = 1 << 3, -+ RESUMMARIZE_ENABLE_bit = 1 << 4, -+ STENCIL_COMPRESS_DISABLE_bit = 1 << 5, -+ DEPTH_COMPRESS_DISABLE_bit = 1 << 6, -+ COPY_CENTROID_bit = 1 << 7, -+ COPY_SAMPLE_mask = 0x07 << 8, -+ COPY_SAMPLE_shift = 8, -+ ZPASS_INCREMENT_DISABLE_bit = 1 << 11, -+ DB_RENDER_OVERRIDE = 0x00028d10, -+ FORCE_HIZ_ENABLE_mask = 0x03 << 0, -+ FORCE_HIZ_ENABLE_shift = 0, -+ FORCE_OFF = 0x00, -+ FORCE_ENABLE = 0x01, -+ FORCE_DISABLE = 0x02, -+ FORCE_RESERVED = 0x03, -+ FORCE_HIS_ENABLE0_mask = 0x03 << 2, -+ FORCE_HIS_ENABLE0_shift = 2, -+/* FORCE_OFF = 0x00, */ -+/* FORCE_ENABLE = 0x01, */ -+/* FORCE_DISABLE = 0x02, */ -+/* FORCE_RESERVED = 0x03, */ -+ FORCE_HIS_ENABLE1_mask = 0x03 << 4, -+ FORCE_HIS_ENABLE1_shift = 4, -+/* FORCE_OFF = 0x00, */ -+/* FORCE_ENABLE = 0x01, */ -+/* FORCE_DISABLE = 0x02, */ -+/* FORCE_RESERVED = 0x03, */ -+ FORCE_SHADER_Z_ORDER_bit = 1 << 6, -+ FAST_Z_DISABLE_bit = 1 << 7, -+ FAST_STENCIL_DISABLE_bit = 1 << 8, -+ NOOP_CULL_DISABLE_bit = 1 << 9, -+ FORCE_COLOR_KILL_bit = 1 << 10, -+ FORCE_Z_READ_bit = 1 << 11, -+ FORCE_STENCIL_READ_bit = 1 << 12, -+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13, -+ FORCE_FULL_Z_RANGE_shift = 13, -+/* FORCE_OFF = 0x00, */ -+/* FORCE_ENABLE = 0x01, */ -+/* FORCE_DISABLE = 0x02, */ -+/* FORCE_RESERVED = 0x03, */ -+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, -+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, -+ IGNORE_SC_ZRANGE_bit = 1 << 17, -+ DB_HTILE_SURFACE = 0x00028d24, -+ HTILE_WIDTH_bit = 1 << 0, -+ HTILE_HEIGHT_bit = 1 << 1, -+ LINEAR_bit = 1 << 2, -+ FULL_CACHE_bit = 1 << 3, -+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4, -+ PRELOAD_bit = 1 << 5, -+ PREFETCH_WIDTH_mask = 0x3f << 6, -+ PREFETCH_WIDTH_shift = 6, -+ PREFETCH_HEIGHT_mask = 0x3f << 12, -+ PREFETCH_HEIGHT_shift = 12, -+ DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, -+ COMPAREFUNC1_mask = 0x07 << 0, -+ COMPAREFUNC1_shift = 0, -+/* REF_NEVER = 0x00, */ -+/* REF_LESS = 0x01, */ -+/* REF_EQUAL = 0x02, */ -+/* REF_LEQUAL = 0x03, */ -+/* REF_GREATER = 0x04, */ -+/* REF_NOTEQUAL = 0x05, */ -+/* REF_GEQUAL = 0x06, */ -+/* REF_ALWAYS = 0x07, */ -+ COMPAREVALUE1_mask = 0xff << 4, -+ COMPAREVALUE1_shift = 4, -+ COMPAREMASK1_mask = 0xff << 12, -+ COMPAREMASK1_shift = 12, -+ ENABLE1_bit = 1 << 24, -+ DB_PRELOAD_CONTROL = 0x00028d30, -+ START_X_mask = 0xff << 0, -+ START_X_shift = 0, -+ START_Y_mask = 0xff << 8, -+ START_Y_shift = 8, -+ MAX_X_mask = 0xff << 16, -+ MAX_X_shift = 16, -+ MAX_Y_mask = 0xff << 24, -+ MAX_Y_shift = 24, -+ DB_PREFETCH_LIMIT = 0x00028d34, -+ DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0, -+ DEPTH_HEIGHT_TILE_MAX_shift = 0, -+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8, -+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, -+ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0, -+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, -+ PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc, -+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00, -+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04, -+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08, -+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c, -+ PA_CL_POINT_X_RAD = 0x00028e10, -+ PA_CL_POINT_Y_RAD = 0x00028e14, -+ PA_CL_POINT_SIZE = 0x00028e18, -+ PA_CL_POINT_CULL_RAD = 0x00028e1c, -+ PA_CL_UCP_0_X = 0x00028e20, -+ PA_CL_UCP_0_X_num = 6, -+ PA_CL_UCP_0_X_offset = 16, -+ PA_CL_UCP_0_Y = 0x00028e24, -+ PA_CL_UCP_0_Y_num = 6, -+ PA_CL_UCP_0_Y_offset = 16, -+ PA_CL_UCP_0_Z = 0x00028e28, -+ PA_CL_UCP_0_Z_num = 6, -+ PA_CL_UCP_0_Z_offset = 16, -+ SQ_ALU_CONSTANT0_0 = 0x00030000, -+ SQ_ALU_CONSTANT1_0 = 0x00030004, -+ SQ_ALU_CONSTANT2_0 = 0x00030008, -+ SQ_ALU_CONSTANT3_0 = 0x0003000c, -+ SQ_VTX_CONSTANT_WORD0_0 = 0x00038000, -+ SQ_TEX_RESOURCE_WORD0_0 = 0x00038000, -+ DIM_mask = 0x07 << 0, -+ DIM_shift = 0, -+ SQ_TEX_DIM_1D = 0x00, -+ SQ_TEX_DIM_2D = 0x01, -+ SQ_TEX_DIM_3D = 0x02, -+ SQ_TEX_DIM_CUBEMAP = 0x03, -+ SQ_TEX_DIM_1D_ARRAY = 0x04, -+ SQ_TEX_DIM_2D_ARRAY = 0x05, -+ SQ_TEX_DIM_2D_MSAA = 0x06, -+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, -+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3, -+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3, -+ TILE_TYPE_bit = 1 << 7, -+ PITCH_mask = 0x7ff << 8, -+ PITCH_shift = 8, -+ TEX_WIDTH_mask = 0x1fff << 19, -+ TEX_WIDTH_shift = 19, -+ SQ_VTX_CONSTANT_WORD1_0 = 0x00038004, -+ SQ_TEX_RESOURCE_WORD1_0 = 0x00038004, -+ TEX_HEIGHT_mask = 0x1fff << 0, -+ TEX_HEIGHT_shift = 0, -+ TEX_DEPTH_mask = 0x1fff << 13, -+ TEX_DEPTH_shift = 13, -+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26, -+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26, -+ SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, -+ BASE_ADDRESS_HI_mask = 0xff << 0, -+ BASE_ADDRESS_HI_shift = 0, -+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8, -+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8, -+ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19, -+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, -+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20, -+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26, -+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26, -+/* SQ_NUM_FORMAT_NORM = 0x00, */ -+/* SQ_NUM_FORMAT_INT = 0x01, */ -+/* SQ_NUM_FORMAT_SCALED = 0x02, */ -+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28, -+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29, -+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30, -+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30, -+/* SQ_ENDIAN_NONE = 0x00, */ -+/* SQ_ENDIAN_8IN16 = 0x01, */ -+/* SQ_ENDIAN_8IN32 = 0x02, */ -+ SQ_TEX_RESOURCE_WORD2_0 = 0x00038008, -+ SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c, -+ MEM_REQUEST_SIZE_mask = 0x03 << 0, -+ MEM_REQUEST_SIZE_shift = 0, -+ SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c, -+ SQ_TEX_RESOURCE_WORD4_0 = 0x00038010, -+ FORMAT_COMP_X_mask = 0x03 << 0, -+ FORMAT_COMP_X_shift = 0, -+ SQ_FORMAT_COMP_UNSIGNED = 0x00, -+ SQ_FORMAT_COMP_SIGNED = 0x01, -+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, -+ FORMAT_COMP_Y_mask = 0x03 << 2, -+ FORMAT_COMP_Y_shift = 2, -+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ -+/* SQ_FORMAT_COMP_SIGNED = 0x01, */ -+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ -+ FORMAT_COMP_Z_mask = 0x03 << 4, -+ FORMAT_COMP_Z_shift = 4, -+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ -+/* SQ_FORMAT_COMP_SIGNED = 0x01, */ -+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ -+ FORMAT_COMP_W_mask = 0x03 << 6, -+ FORMAT_COMP_W_shift = 6, -+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ -+/* SQ_FORMAT_COMP_SIGNED = 0x01, */ -+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ -+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8, -+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8, -+/* SQ_NUM_FORMAT_NORM = 0x00, */ -+/* SQ_NUM_FORMAT_INT = 0x01, */ -+/* SQ_NUM_FORMAT_SCALED = 0x02, */ -+ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10, -+ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11, -+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12, -+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12, -+/* SQ_ENDIAN_NONE = 0x00, */ -+/* SQ_ENDIAN_8IN16 = 0x01, */ -+/* SQ_ENDIAN_8IN32 = 0x02, */ -+ REQUEST_SIZE_mask = 0x03 << 14, -+ REQUEST_SIZE_shift = 14, -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16, -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19, -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22, -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25, -+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25, -+/* SQ_SEL_X = 0x00, */ -+/* SQ_SEL_Y = 0x01, */ -+/* SQ_SEL_Z = 0x02, */ -+/* SQ_SEL_W = 0x03, */ -+/* SQ_SEL_0 = 0x04, */ -+/* SQ_SEL_1 = 0x05, */ -+ BASE_LEVEL_mask = 0x0f << 28, -+ BASE_LEVEL_shift = 28, -+ SQ_TEX_RESOURCE_WORD5_0 = 0x00038014, -+ LAST_LEVEL_mask = 0x0f << 0, -+ LAST_LEVEL_shift = 0, -+ BASE_ARRAY_mask = 0x1fff << 4, -+ BASE_ARRAY_shift = 4, -+ LAST_ARRAY_mask = 0x1fff << 17, -+ LAST_ARRAY_shift = 17, -+ SQ_TEX_RESOURCE_WORD6_0 = 0x00038018, -+ MPEG_CLAMP_mask = 0x03 << 0, -+ MPEG_CLAMP_shift = 0, -+ SQ_TEX_MPEG_CLAMP_OFF = 0x00, -+ SQ_TEX_MPEG_9 = 0x01, -+ SQ_TEX_MPEG_10 = 0x02, -+ PERF_MODULATION_mask = 0x07 << 5, -+ PERF_MODULATION_shift = 5, -+ INTERLACED_bit = 1 << 8, -+ SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30, -+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30, -+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00, -+ SQ_TEX_VTX_INVALID_BUFFER = 0x01, -+ SQ_TEX_VTX_VALID_TEXTURE = 0x02, -+ SQ_TEX_VTX_VALID_BUFFER = 0x03, -+ SQ_VTX_CONSTANT_WORD6_0 = 0x00038018, -+ SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30, -+ SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30, -+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ -+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ -+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ -+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ -+ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000, -+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0, -+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, -+ SQ_TEX_WRAP = 0x00, -+ SQ_TEX_MIRROR = 0x01, -+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02, -+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, -+ SQ_TEX_CLAMP_HALF_BORDER = 0x04, -+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, -+ SQ_TEX_CLAMP_BORDER = 0x06, -+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07, -+ CLAMP_Y_mask = 0x07 << 3, -+ CLAMP_Y_shift = 3, -+/* SQ_TEX_WRAP = 0x00, */ -+/* SQ_TEX_MIRROR = 0x01, */ -+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ -+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ -+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ -+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ -+/* SQ_TEX_CLAMP_BORDER = 0x06, */ -+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ -+ CLAMP_Z_mask = 0x07 << 6, -+ CLAMP_Z_shift = 6, -+/* SQ_TEX_WRAP = 0x00, */ -+/* SQ_TEX_MIRROR = 0x01, */ -+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ -+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ -+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ -+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ -+/* SQ_TEX_CLAMP_BORDER = 0x06, */ -+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ -+ XY_MAG_FILTER_mask = 0x07 << 9, -+ XY_MAG_FILTER_shift = 9, -+ SQ_TEX_XY_FILTER_POINT = 0x00, -+ SQ_TEX_XY_FILTER_BILINEAR = 0x01, -+ SQ_TEX_XY_FILTER_BICUBIC = 0x02, -+ XY_MIN_FILTER_mask = 0x07 << 12, -+ XY_MIN_FILTER_shift = 12, -+/* SQ_TEX_XY_FILTER_POINT = 0x00, */ -+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ -+/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */ -+ Z_FILTER_mask = 0x03 << 15, -+ Z_FILTER_shift = 15, -+ SQ_TEX_Z_FILTER_NONE = 0x00, -+ SQ_TEX_Z_FILTER_POINT = 0x01, -+ SQ_TEX_Z_FILTER_LINEAR = 0x02, -+ MIP_FILTER_mask = 0x03 << 17, -+ MIP_FILTER_shift = 17, -+/* SQ_TEX_Z_FILTER_NONE = 0x00, */ -+/* SQ_TEX_Z_FILTER_POINT = 0x01, */ -+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ -+ BORDER_COLOR_TYPE_mask = 0x03 << 22, -+ BORDER_COLOR_TYPE_shift = 22, -+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, -+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, -+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, -+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03, -+ POINT_SAMPLING_CLAMP_bit = 1 << 24, -+ TEX_ARRAY_OVERRIDE_bit = 1 << 25, -+ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26, -+ DEPTH_COMPARE_FUNCTION_shift = 26, -+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, -+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01, -+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, -+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, -+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, -+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, -+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, -+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, -+ CHROMA_KEY_mask = 0x03 << 29, -+ CHROMA_KEY_shift = 29, -+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00, -+ SQ_TEX_CHROMA_KEY_KILL = 0x01, -+ SQ_TEX_CHROMA_KEY_BLEND = 0x02, -+ LOD_USES_MINOR_AXIS_bit = 1 << 31, -+ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004, -+ MIN_LOD_mask = 0x3ff << 0, -+ MIN_LOD_shift = 0, -+ MAX_LOD_mask = 0x3ff << 10, -+ MAX_LOD_shift = 10, -+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20, -+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20, -+ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008, -+ LOD_BIAS_SEC_mask = 0xfff << 0, -+ LOD_BIAS_SEC_shift = 0, -+ MC_COORD_TRUNCATE_bit = 1 << 12, -+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13, -+ HIGH_PRECISION_FILTER_bit = 1 << 14, -+ PERF_MIP_mask = 0x07 << 15, -+ PERF_MIP_shift = 15, -+ PERF_Z_mask = 0x03 << 18, -+ PERF_Z_shift = 18, -+ FETCH_4_bit = 1 << 26, -+ SAMPLE_IS_PCF_bit = 1 << 27, -+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, -+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0, -+ SQ_VTX_START_INST_LOC = 0x0003cff4, -+ SQ_LOOP_CONST_DX10_0 = 0x0003e200, -+ SQ_LOOP_CONST_0 = 0x0003e200, -+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, -+ SQ_LOOP_CONST_0__COUNT_shift = 0, -+ INIT_mask = 0xfff << 12, -+ INIT_shift = 12, -+ INC_mask = 0xff << 24, -+ INC_shift = 24, -+ SQ_BOOL_CONST_0 = 0x0003e380, -+ SQ_BOOL_CONST_0_num = 3, -+ -+} ; -+ -+#endif /* _AUTOREGS */ -+ -diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h -new file mode 100644 -index 0000000..2e7dfa9 ---- /dev/null -+++ b/src/r600_reg_r6xx.h -@@ -0,0 +1,494 @@ -+/* -+ * RadeonHD R6xx, R7xx Register documentation -+ * -+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. -+ * Copyright (C) 2008-2009 Matthias Hopf -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included -+ * in all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef _R600_REG_R6xx_H_ -+#define _R600_REG_R6xx_H_ -+ -+/* -+ * Registers for R6xx chips that are not documented yet -+ */ -+ -+enum { -+ -+ MM_INDEX = 0x0000, -+ MM_DATA = 0x0004, -+ -+ SRBM_STATUS = 0x0e50, -+ RLC_RQ_PENDING_bit = 1 << 3, -+ RCU_RQ_PENDING_bit = 1 << 4, -+ GRBM_RQ_PENDING_bit = 1 << 5, -+ HI_RQ_PENDING_bit = 1 << 6, -+ IO_EXTERN_SIGNAL_bit = 1 << 7, -+ VMC_BUSY_bit = 1 << 8, -+ MCB_BUSY_bit = 1 << 9, -+ MCDZ_BUSY_bit = 1 << 10, -+ MCDY_BUSY_bit = 1 << 11, -+ MCDX_BUSY_bit = 1 << 12, -+ MCDW_BUSY_bit = 1 << 13, -+ SEM_BUSY_bit = 1 << 14, -+ SRBM_STATUS__RLC_BUSY_bit = 1 << 15, -+ PDMA_BUSY_bit = 1 << 16, -+ IH_BUSY_bit = 1 << 17, -+ CSC_BUSY_bit = 1 << 20, -+ CMC7_BUSY_bit = 1 << 21, -+ CMC6_BUSY_bit = 1 << 22, -+ CMC5_BUSY_bit = 1 << 23, -+ CMC4_BUSY_bit = 1 << 24, -+ CMC3_BUSY_bit = 1 << 25, -+ CMC2_BUSY_bit = 1 << 26, -+ CMC1_BUSY_bit = 1 << 27, -+ CMC0_BUSY_bit = 1 << 28, -+ BIF_BUSY_bit = 1 << 29, -+ IDCT_BUSY_bit = 1 << 30, -+ -+ SRBM_READ_ERROR = 0x0e98, -+ READ_ADDRESS_mask = 0xffff << 2, -+ READ_ADDRESS_shift = 2, -+ READ_REQUESTER_HI_bit = 1 << 24, -+ READ_REQUESTER_GRBM_bit = 1 << 25, -+ READ_REQUESTER_RCU_bit = 1 << 26, -+ READ_REQUESTER_RLC_bit = 1 << 27, -+ READ_ERROR_bit = 1 << 31, -+ -+ SRBM_INT_STATUS = 0x0ea4, -+ RDERR_INT_STAT_bit = 1 << 0, -+ GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1, -+ SRBM_INT_ACK = 0x0ea8, -+ RDERR_INT_ACK_bit = 1 << 0, -+ GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1, -+ -+ R6XX_MC_VM_FB_LOCATION = 0x2180, -+ -+ VENDOR_DEVICE_ID = 0x4000, -+ -+ HDP_MEM_COHERENCY_FLUSH_CNTL = 0x5480, -+ -+ D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110, -+ D1GRPH_PITCH = 0x6120, -+ D1GRPH_Y_END = 0x6138, -+ -+ GRBM_STATUS = 0x8010, -+ CMDFIFO_AVAIL_mask = 0x1f << 0, -+ CMDFIFO_AVAIL_shift = 0, -+ SRBM_RQ_PENDING_bit = 1 << 5, -+ CP_RQ_PENDING_bit = 1 << 6, -+ CF_RQ_PENDING_bit = 1 << 7, -+ PF_RQ_PENDING_bit = 1 << 8, -+ GRBM_EE_BUSY_bit = 1 << 10, -+ GRBM_STATUS__VC_BUSY_bit = 1 << 11, -+ DB03_CLEAN_bit = 1 << 12, -+ CB03_CLEAN_bit = 1 << 13, -+ VGT_BUSY_NO_DMA_bit = 1 << 16, -+ GRBM_STATUS__VGT_BUSY_bit = 1 << 17, -+ TA03_BUSY_bit = 1 << 18, -+ GRBM_STATUS__TC_BUSY_bit = 1 << 19, -+ SX_BUSY_bit = 1 << 20, -+ SH_BUSY_bit = 1 << 21, -+ SPI03_BUSY_bit = 1 << 22, -+ SMX_BUSY_bit = 1 << 23, -+ SC_BUSY_bit = 1 << 24, -+ PA_BUSY_bit = 1 << 25, -+ DB03_BUSY_bit = 1 << 26, -+ CR_BUSY_bit = 1 << 27, -+ CP_COHERENCY_BUSY_bit = 1 << 28, -+ GRBM_STATUS__CP_BUSY_bit = 1 << 29, -+ CB03_BUSY_bit = 1 << 30, -+ GUI_ACTIVE_bit = 1 << 31, -+ GRBM_STATUS2 = 0x8014, -+ CR_CLEAN_bit = 1 << 0, -+ SMX_CLEAN_bit = 1 << 1, -+ SPI0_BUSY_bit = 1 << 8, -+ SPI1_BUSY_bit = 1 << 9, -+ SPI2_BUSY_bit = 1 << 10, -+ SPI3_BUSY_bit = 1 << 11, -+ TA0_BUSY_bit = 1 << 12, -+ TA1_BUSY_bit = 1 << 13, -+ TA2_BUSY_bit = 1 << 14, -+ TA3_BUSY_bit = 1 << 15, -+ DB0_BUSY_bit = 1 << 16, -+ DB1_BUSY_bit = 1 << 17, -+ DB2_BUSY_bit = 1 << 18, -+ DB3_BUSY_bit = 1 << 19, -+ CB0_BUSY_bit = 1 << 20, -+ CB1_BUSY_bit = 1 << 21, -+ CB2_BUSY_bit = 1 << 22, -+ CB3_BUSY_bit = 1 << 23, -+ GRBM_SOFT_RESET = 0x8020, -+ SOFT_RESET_CP_bit = 1 << 0, -+ SOFT_RESET_CB_bit = 1 << 1, -+ SOFT_RESET_CR_bit = 1 << 2, -+ SOFT_RESET_DB_bit = 1 << 3, -+ SOFT_RESET_PA_bit = 1 << 5, -+ SOFT_RESET_SC_bit = 1 << 6, -+ SOFT_RESET_SMX_bit = 1 << 7, -+ SOFT_RESET_SPI_bit = 1 << 8, -+ SOFT_RESET_SH_bit = 1 << 9, -+ SOFT_RESET_SX_bit = 1 << 10, -+ SOFT_RESET_TC_bit = 1 << 11, -+ SOFT_RESET_TA_bit = 1 << 12, -+ SOFT_RESET_VC_bit = 1 << 13, -+ SOFT_RESET_VGT_bit = 1 << 14, -+ SOFT_RESET_GRBM_GCA_bit = 1 << 15, -+ -+ WAIT_UNTIL = 0x8040, -+ WAIT_CP_DMA_IDLE_bit = 1 << 8, -+ WAIT_CMDFIFO_bit = 1 << 10, -+ WAIT_2D_IDLE_bit = 1 << 14, -+ WAIT_3D_IDLE_bit = 1 << 15, -+ WAIT_2D_IDLECLEAN_bit = 1 << 16, -+ WAIT_3D_IDLECLEAN_bit = 1 << 17, -+ WAIT_EXTERN_SIG_bit = 1 << 19, -+ CMDFIFO_ENTRIES_mask = 0x1f << 20, -+ CMDFIFO_ENTRIES_shift = 20, -+ -+ GRBM_READ_ERROR = 0x8058, -+/* READ_ADDRESS_mask = 0xffff << 2, */ -+/* READ_ADDRESS_shift = 2, */ -+ READ_REQUESTER_SRBM_bit = 1 << 28, -+ READ_REQUESTER_CP_bit = 1 << 29, -+ READ_REQUESTER_WU_POLL_bit = 1 << 30, -+/* READ_ERROR_bit = 1 << 31, */ -+ -+ SCRATCH_REG0 = 0x8500, -+ SCRATCH_REG1 = 0x8504, -+ SCRATCH_REG2 = 0x8508, -+ SCRATCH_REG3 = 0x850c, -+ SCRATCH_REG4 = 0x8510, -+ SCRATCH_REG5 = 0x8514, -+ SCRATCH_REG6 = 0x8518, -+ SCRATCH_REG7 = 0x851c, -+ SCRATCH_UMSK = 0x8540, -+ SCRATCH_ADDR = 0x8544, -+ -+ CP_COHER_CNTL = 0x85f0, -+ DEST_BASE_0_ENA_bit = 1 << 0, -+ DEST_BASE_1_ENA_bit = 1 << 1, -+ SO0_DEST_BASE_ENA_bit = 1 << 2, -+ SO1_DEST_BASE_ENA_bit = 1 << 3, -+ SO2_DEST_BASE_ENA_bit = 1 << 4, -+ SO3_DEST_BASE_ENA_bit = 1 << 5, -+ CB0_DEST_BASE_ENA_bit = 1 << 6, -+ CB1_DEST_BASE_ENA_bit = 1 << 7, -+ CB2_DEST_BASE_ENA_bit = 1 << 8, -+ CB3_DEST_BASE_ENA_bit = 1 << 9, -+ CB4_DEST_BASE_ENA_bit = 1 << 10, -+ CB5_DEST_BASE_ENA_bit = 1 << 11, -+ CB6_DEST_BASE_ENA_bit = 1 << 12, -+ CB7_DEST_BASE_ENA_bit = 1 << 13, -+ DB_DEST_BASE_ENA_bit = 1 << 14, -+ CR_DEST_BASE_ENA_bit = 1 << 15, -+ TC_ACTION_ENA_bit = 1 << 23, -+ VC_ACTION_ENA_bit = 1 << 24, -+ CB_ACTION_ENA_bit = 1 << 25, -+ DB_ACTION_ENA_bit = 1 << 26, -+ SH_ACTION_ENA_bit = 1 << 27, -+ SMX_ACTION_ENA_bit = 1 << 28, -+ CR0_ACTION_ENA_bit = 1 << 29, -+ CR1_ACTION_ENA_bit = 1 << 30, -+ CR2_ACTION_ENA_bit = 1 << 31, -+ CP_COHER_SIZE = 0x85f4, -+ CP_COHER_BASE = 0x85f8, -+ CP_COHER_STATUS = 0x85fc, -+ MATCHING_GFX_CNTX_mask = 0xff << 0, -+ MATCHING_GFX_CNTX_shift = 0, -+ MATCHING_CR_CNTX_mask = 0xffff << 8, -+ MATCHING_CR_CNTX_shift = 8, -+ STATUS_bit = 1 << 31, -+ -+ CP_STALLED_STAT1 = 0x8674, -+ RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0, -+ RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1, -+ RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2, -+ RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3, -+ RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4, -+ RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5, -+ RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6, -+ RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7, -+ RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8, -+ RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9, -+ MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16, -+ MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17, -+ MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18, -+ RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24, -+ RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25, -+ RCIU_STALLED_ON_ME_READ_bit = 1 << 26, -+ RCIU_STALLED_ON_DMA_READ_bit = 1 << 27, -+ RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28, -+ CP_STALLED_STAT2 = 0x8678, -+ PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0, -+ PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1, -+ PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2, -+ PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3, -+ MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4, -+ ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8, -+ ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9, -+ CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10, -+ GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11, -+ ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12, -+ ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13, -+ ME_WAITING_DATA_FROM_PFP_bit = 1 << 14, -+ ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15, -+ RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16, -+ RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17, -+ EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18, -+ EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19, -+ EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20, -+ EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21, -+ SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22, -+ SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23, -+ PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24, -+ SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30, -+ SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31, -+ CP_BUSY_STAT = 0x867c, -+ REG_BUS_FIFO_BUSY_bit = 1 << 0, -+ RING_FETCHING_DATA_bit = 1 << 1, -+ INDR1_FETCHING_DATA_bit = 1 << 2, -+ INDR2_FETCHING_DATA_bit = 1 << 3, -+ STATE_FETCHING_DATA_bit = 1 << 4, -+ PRED_FETCHING_DATA_bit = 1 << 5, -+ COHER_CNTR_NEQ_ZERO_bit = 1 << 6, -+ PFP_PARSING_PACKETS_bit = 1 << 7, -+ ME_PARSING_PACKETS_bit = 1 << 8, -+ RCIU_PFP_BUSY_bit = 1 << 9, -+ RCIU_ME_BUSY_bit = 1 << 10, -+ OUTSTANDING_READ_TAGS_bit = 1 << 11, -+ SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12, -+ SEM_FAILED_AND_HOLDING_bit = 1 << 13, -+ SEM_POLLING_FOR_PASS_bit = 1 << 14, -+ _3D_BUSY_bit = 1 << 15, -+ _2D_BUSY_bit = 1 << 16, -+ CP_STAT = 0x8680, -+ CSF_RING_BUSY_bit = 1 << 0, -+ CSF_WPTR_POLL_BUSY_bit = 1 << 1, -+ CSF_INDIRECT1_BUSY_bit = 1 << 2, -+ CSF_INDIRECT2_BUSY_bit = 1 << 3, -+ CSF_STATE_BUSY_bit = 1 << 4, -+ CSF_PREDICATE_BUSY_bit = 1 << 5, -+ CSF_BUSY_bit = 1 << 6, -+ MIU_RDREQ_BUSY_bit = 1 << 7, -+ MIU_WRREQ_BUSY_bit = 1 << 8, -+ ROQ_RING_BUSY_bit = 1 << 9, -+ ROQ_INDIRECT1_BUSY_bit = 1 << 10, -+ ROQ_INDIRECT2_BUSY_bit = 1 << 11, -+ ROQ_STATE_BUSY_bit = 1 << 12, -+ ROQ_PREDICATE_BUSY_bit = 1 << 13, -+ ROQ_ALIGN_BUSY_bit = 1 << 14, -+ PFP_BUSY_bit = 1 << 15, -+ MEQ_BUSY_bit = 1 << 16, -+ ME_BUSY_bit = 1 << 17, -+ QUERY_BUSY_bit = 1 << 18, -+ SEMAPHORE_BUSY_bit = 1 << 19, -+ INTERRUPT_BUSY_bit = 1 << 20, -+ SURFACE_SYNC_BUSY_bit = 1 << 21, -+ DMA_BUSY_bit = 1 << 22, -+ RCIU_BUSY_bit = 1 << 23, -+ CP_STAT__CP_BUSY_bit = 1 << 31, -+ -+ CP_ME_CNTL = 0x86d8, -+ ME_STATMUX_mask = 0xff << 0, -+ ME_STATMUX_shift = 0, -+ ME_HALT_bit = 1 << 28, -+ CP_ME_STATUS = 0x86dc, -+ -+ CP_RB_RPTR = 0x8700, -+ RB_RPTR_mask = 0xfffff << 0, -+ RB_RPTR_shift = 0, -+ CP_RB_WPTR_DELAY = 0x8704, -+ PRE_WRITE_TIMER_mask = 0xfffffff << 0, -+ PRE_WRITE_TIMER_shift = 0, -+ PRE_WRITE_LIMIT_mask = 0x0f << 28, -+ PRE_WRITE_LIMIT_shift = 28, -+ -+ CP_ROQ_RB_STAT = 0x8780, -+ ROQ_RPTR_PRIMARY_mask = 0x3ff << 0, -+ ROQ_RPTR_PRIMARY_shift = 0, -+ ROQ_WPTR_PRIMARY_mask = 0x3ff << 16, -+ ROQ_WPTR_PRIMARY_shift = 16, -+ CP_ROQ_IB1_STAT = 0x8784, -+ ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0, -+ ROQ_RPTR_INDIRECT1_shift = 0, -+ ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16, -+ ROQ_WPTR_INDIRECT1_shift = 16, -+ CP_ROQ_IB2_STAT = 0x8788, -+ ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0, -+ ROQ_RPTR_INDIRECT2_shift = 0, -+ ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16, -+ ROQ_WPTR_INDIRECT2_shift = 16, -+ -+ CP_MEQ_STAT = 0x8794, -+ MEQ_RPTR_mask = 0x3ff << 0, -+ MEQ_RPTR_shift = 0, -+ MEQ_WPTR_mask = 0x3ff << 16, -+ MEQ_WPTR_shift = 16, -+ -+ CC_GC_SHADER_PIPE_CONFIG = 0x8950, -+ INACTIVE_QD_PIPES_mask = 0xff << 8, -+ INACTIVE_QD_PIPES_shift = 8, -+ R6XX_MAX_QD_PIPES = 8, -+ INACTIVE_SIMDS_mask = 0xff << 16, -+ INACTIVE_SIMDS_shift = 16, -+ R6XX_MAX_SIMDS = 8, -+ GC_USER_SHADER_PIPE_CONFIG = 0x8954, -+ -+ VC_ENHANCE = 0x9714, -+ DB_DEBUG = 0x9830, -+ PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31, -+ -+ DB_WATERMARKS = 0x00009838, -+ DEPTH_FREE_mask = 0x1f << 0, -+ DEPTH_FREE_shift = 0, -+ DEPTH_FLUSH_mask = 0x3f << 5, -+ DEPTH_FLUSH_shift = 5, -+ FORCE_SUMMARIZE_mask = 0x0f << 11, -+ FORCE_SUMMARIZE_shift = 11, -+ DEPTH_PENDING_FREE_mask = 0x1f << 15, -+ DEPTH_PENDING_FREE_shift = 15, -+ DEPTH_CACHELINE_FREE_mask = 0x1f << 20, -+ DEPTH_CACHELINE_FREE_shift = 20, -+ EARLY_Z_PANIC_DISABLE_bit = 1 << 25, -+ LATE_Z_PANIC_DISABLE_bit = 1 << 26, -+ RE_Z_PANIC_DISABLE_bit = 1 << 27, -+ DB_EXTRA_DEBUG_mask = 0x0f << 28, -+ DB_EXTRA_DEBUG_shift = 28, -+ -+ CP_RB_BASE = 0xc100, -+ CP_RB_CNTL = 0xc104, -+ RB_BUFSZ_mask = 0x3f << 0, -+ CP_RB_WPTR = 0xc114, -+ RB_WPTR_mask = 0xfffff << 0, -+ RB_WPTR_shift = 0, -+ CP_RB_RPTR_WR = 0xc108, -+ RB_RPTR_WR_mask = 0xfffff << 0, -+ RB_RPTR_WR_shift = 0, -+ -+ CP_INT_STATUS = 0xc128, -+ DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0, -+ ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1, -+ SEM_SIGNAL_INT_STAT_bit = 1 << 18, -+ CNTX_BUSY_INT_STAT_bit = 1 << 19, -+ CNTX_EMPTY_INT_STAT_bit = 1 << 20, -+ WAITMEM_SEM_INT_STAT_bit = 1 << 21, -+ PRIV_INSTR_INT_STAT_bit = 1 << 22, -+ PRIV_REG_INT_STAT_bit = 1 << 23, -+ OPCODE_ERROR_INT_STAT_bit = 1 << 24, -+ SCRATCH_INT_STAT_bit = 1 << 25, -+ TIME_STAMP_INT_STAT_bit = 1 << 26, -+ RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27, -+ DMA_INT_STAT_bit = 1 << 28, -+ IB2_INT_STAT_bit = 1 << 29, -+ IB1_INT_STAT_bit = 1 << 30, -+ RB_INT_STAT_bit = 1 << 31, -+ -+// SX_ALPHA_TEST_CONTROL = 0x00028410, -+ ALPHA_FUNC__REF_NEVER = 0, -+ ALPHA_FUNC__REF_ALWAYS = 7, -+// DB_SHADER_CONTROL = 0x0002880c, -+ Z_ORDER__EARLY_Z_THEN_LATE_Z = 2, -+// PA_SU_SC_MODE_CNTL = 0x00028814, -+// POLY_MODE_mask = 0x03 << 3, -+ POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE, -+// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, -+ POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES, -+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20, -+ DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */ -+// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, -+ DB_ALPHA_TO_MASK = 0x00028d44, -+ ALPHA_TO_MASK_ENABLE = 1 << 0, -+ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, -+ ALPHA_TO_MASK_OFFSET0_shift = 8, -+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8, -+ ALPHA_TO_MASK_OFFSET1_shift = 10, -+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8, -+ ALPHA_TO_MASK_OFFSET2_shift = 12, -+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8, -+ ALPHA_TO_MASK_OFFSET3_shift = 14, -+ -+// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, -+// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, -+ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2, -+ FMT_16=5, FMT_16_FLOAT, FMT_8_8, -+ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4, -+ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16, -+ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8, -+ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10, -+ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2, -+ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16, -+ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT, -+ FMT_1 = 37, FMT_GB_GR=39, -+ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP, -+ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32, -+ FMT_32_32_32_FLOAT=48, -+ -+// High level register file lengths -+ SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */ -+ SQ_ALU_CONSTANT_ps_num = 256, -+ SQ_ALU_CONSTANT_vs_num = 256, -+ SQ_ALU_CONSTANT_all_num = 512, -+ SQ_ALU_CONSTANT_offset = 16, -+ SQ_ALU_CONSTANT_ps = 0, -+ SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num, -+ SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ -+ SQ_TEX_RESOURCE_ps_num = 160, -+ SQ_TEX_RESOURCE_vs_num = 160, -+ SQ_TEX_RESOURCE_fs_num = 16, -+ SQ_TEX_RESOURCE_gs_num = 160, -+ SQ_TEX_RESOURCE_all_num = 496, -+ SQ_TEX_RESOURCE_offset = 28, -+ SQ_TEX_RESOURCE_ps = 0, -+ SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num, -+ SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num, -+ SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num, -+ SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ -+ SQ_VTX_RESOURCE_ps_num = 160, -+ SQ_VTX_RESOURCE_vs_num = 160, -+ SQ_VTX_RESOURCE_fs_num = 16, -+ SQ_VTX_RESOURCE_gs_num = 160, -+ SQ_VTX_RESOURCE_all_num = 496, -+ SQ_VTX_RESOURCE_offset = 28, -+ SQ_VTX_RESOURCE_ps = 0, -+ SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num, -+ SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num, -+ SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num, -+ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */ -+ SQ_TEX_SAMPLER_WORD_ps_num = 18, -+ SQ_TEX_SAMPLER_WORD_vs_num = 18, -+ SQ_TEX_SAMPLER_WORD_gs_num = 18, -+ SQ_TEX_SAMPLER_WORD_all_num = 54, -+ SQ_TEX_SAMPLER_WORD_offset = 12, -+ SQ_TEX_SAMPLER_WORD_ps = 0, -+ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, -+ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, -+ SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */ -+ SQ_LOOP_CONST_ps_num = 32, -+ SQ_LOOP_CONST_vs_num = 32, -+ SQ_LOOP_CONST_gs_num = 32, -+ SQ_LOOP_CONST_all_num = 96, -+ SQ_LOOP_CONST_offset = 4, -+ SQ_LOOP_CONST_ps = 0, -+ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, -+ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, -+} ; -+ -+ -+#endif -diff --git a/src/r600_reg_r7xx.h b/src/r600_reg_r7xx.h -new file mode 100644 -index 0000000..e5c01c8 ---- /dev/null -+++ b/src/r600_reg_r7xx.h -@@ -0,0 +1,149 @@ -+/* -+ * RadeonHD R6xx, R7xx Register documentation -+ * -+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. -+ * Copyright (C) 2008-2009 Matthias Hopf -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included -+ * in all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef _R600_REG_R7xx_H_ -+#define _R600_REG_R7xx_H_ -+ -+/* -+ * Register update for R7xx chips -+ */ -+ -+enum { -+ -+ R7XX_MC_VM_FB_LOCATION = 0x00002024, -+ -+// GRBM_STATUS = 0x00008010, -+ R7XX_TA_BUSY_bit = 1 << 14, -+ -+ R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c, -+ RING0_OFFSET_mask = 0xff << 0, -+ RING0_OFFSET_shift = 0, -+ ISOLATE_ES_ENABLE_bit = 1 << 12, -+ ISOLATE_GS_ENABLE_bit = 1 << 13, -+ VS_PC_LIMIT_ENABLE_bit = 1 << 14, -+ -+// SQ_ALU_WORD0 = 0x00008dfc, -+// SRC0_SEL_mask = 0x1ff << 0, -+// SRC1_SEL_mask = 0x1ff << 13, -+ R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, -+ R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, -+ R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, -+ R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, -+// INDEX_MODE_mask = 0x07 << 26, -+ R7xx_SQ_INDEX_GLOBAL = 0x05, -+ R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06, -+ R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc, -+ R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, -+ R6xx_FOG_MERGE_bit = 1 << 5, -+ R6xx_OMOD_mask = 0x03 << 6, -+ R7xx_OMOD_mask = 0x03 << 5, -+ R6xx_OMOD_shift = 6, -+ R7xx_OMOD_shift = 5, -+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, -+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, -+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, -+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, -+ R7xx_SQ_OP2_INST_FREXP_64 = 0x07, -+ R7xx_SQ_OP2_INST_ADD_64 = 0x17, -+ R7xx_SQ_OP2_INST_MUL_64 = 0x1b, -+ R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c, -+ R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d, -+ R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a, -+ R7xx_SQ_OP2_INST_FRACT_64 = 0x7b, -+ R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c, -+ R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d, -+ R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e, -+// SQ_ALU_WORD1_OP3 = 0x00008dfc, -+// SRC2_SEL_mask = 0x1ff << 0, -+// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, -+// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, -+// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, -+// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, -+// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, -+ R7xx_SQ_OP3_INST_MULADD_64 = 0x08, -+ R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09, -+ R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a, -+ R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b, -+// SQ_CF_ALU_WORD1 = 0x00008dfc, -+ R6xx_USES_WATERFALL_bit = 1 << 25, -+ R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, -+// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, -+// ARRAY_BASE_mask = 0x1fff << 0, -+// TYPE_mask = 0x03 << 13, -+// SQ_EXPORT_PARAM = 0x02, -+// X_UNUSED_FOR_SX_EXPORTS = 0x03, -+// ELEM_SIZE_mask = 0x03 << 30, -+// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, -+// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, -+ R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a, -+// SQ_CF_WORD1 = 0x00008dfc, -+// SQ_CF_WORD1__COUNT_mask = 0x07 << 10, -+ R7xx_COUNT_3_bit = 1 << 19, -+// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, -+ R7xx_SQ_CF_INST_END_PROGRAM = 0x19, -+ R7xx_SQ_CF_INST_WAIT_ACK = 0x1a, -+ R7xx_SQ_CF_INST_TEX_ACK = 0x1b, -+ R7xx_SQ_CF_INST_VTX_ACK = 0x1c, -+ R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d, -+// SQ_VTX_WORD0 = 0x00008dfc, -+// VTX_INST_mask = 0x1f << 0, -+ R7xx_SQ_VTX_INST_MEM = 0x02, -+// SQ_VTX_WORD2 = 0x00008dfc, -+ R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, -+ -+// SQ_TEX_WORD0 = 0x00008dfc, -+// TEX_INST_mask = 0x1f << 0, -+ R7xx_X_MEMORY_READ = 0x02, -+ R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, -+ R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f, -+ R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, -+ -+ R7xx_PA_SC_EDGERULE = 0x00028230, -+ R7xx_SPI_THREAD_GROUPING = 0x000286c8, -+ PS_GROUPING_mask = 0x1f << 0, -+ PS_GROUPING_shift = 0, -+ VS_GROUPING_mask = 0x1f << 8, -+ VS_GROUPING_shift = 8, -+ GS_GROUPING_mask = 0x1f << 16, -+ GS_GROUPING_shift = 16, -+ ES_GROUPING_mask = 0x1f << 24, -+ ES_GROUPING_shift = 24, -+ R7xx_CB_SHADER_CONTROL = 0x000287a0, -+ RT0_ENABLE_bit = 1 << 0, -+ RT1_ENABLE_bit = 1 << 1, -+ RT2_ENABLE_bit = 1 << 2, -+ RT3_ENABLE_bit = 1 << 3, -+ RT4_ENABLE_bit = 1 << 4, -+ RT5_ENABLE_bit = 1 << 5, -+ RT6_ENABLE_bit = 1 << 6, -+ RT7_ENABLE_bit = 1 << 7, -+// DB_ALPHA_TO_MASK = 0x00028d44, -+ R7xx_OFFSET_ROUND_bit = 1 << 16, -+// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, -+ R7xx_TRUNCATE_COORD_bit = 1 << 9, -+ R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, -+ -+} ; -+ -+#endif /* _R600_REG_R7xx_H_ */ -diff --git a/src/r600_shader.c b/src/r600_shader.c -new file mode 100644 -index 0000000..21c4c68 ---- /dev/null -+++ b/src/r600_shader.c -@@ -0,0 +1,1854 @@ -+/* -+ * Copyright 2008 Advanced Micro Devices, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Author: Alex Deucher -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include "xf86.h" -+ -+#include "radeon.h" -+#include "r600_shader.h" -+#include "r600_reg.h" -+ -+/* solid vs --------------------------------------- */ -+int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(4)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(1), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_VTX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 1 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), -+ TYPE(SQ_EXPORT_POS), -+ RW_GPR(1), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 2 - always export a param whether it's used or not */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), -+ TYPE(SQ_EXPORT_PARAM), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(0), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 3 - padding */ -+ shader[i++] = 0x00000000; -+ shader[i++] = 0x00000000; -+ /* 4/5 */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(8)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(0), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(1)); -+ shader[i++] = VTX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* solid ps --------------------------------------- */ -+int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_ALU_DWORD0(ADDR(2), -+ KCACHE_BANK0(0), -+ KCACHE_BANK1(0), -+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); -+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), -+ KCACHE_ADDR0(0), -+ KCACHE_ADDR1(0), -+ I_COUNT(4), -+ USES_WATERFALL(0), -+ CF_INST(SQ_CF_INST_ALU), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 1 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), -+ TYPE(SQ_EXPORT_PIXEL), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(1)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 2 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MOV), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(1)); -+ /* 3 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MOV), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(1)); -+ /* 4 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Z), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MOV), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(1)); -+ /* 5 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_W), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MOV), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(1)); -+ -+ return i; -+} -+ -+/* copy vs --------------------------------------- */ -+int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(4)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(2), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_VTX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 1 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), -+ TYPE(SQ_EXPORT_POS), -+ RW_GPR(1), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 2 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), -+ TYPE(SQ_EXPORT_PARAM), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(0), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 3 */ -+ shader[i++] = 0x00000000; -+ shader[i++] = 0x00000000; -+ /* 4/5 */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(16)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(0), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(1)); -+ shader[i++] = VTX_DWORD_PAD; -+ /* 6/7 */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(8)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(8), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(0)); -+ shader[i++] = VTX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* copy ps --------------------------------------- */ -+int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i=0; -+ -+ /* CF INST 0 */ -+ shader[i++] = CF_DWORD0(ADDR(2)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(1), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_TEX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* CF INST 1 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), -+ TYPE(SQ_EXPORT_PIXEL), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(1)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* TEX INST 0 */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_X), /* R */ -+ DST_SEL_Y(SQ_SEL_Y), /* G */ -+ DST_SEL_Z(SQ_SEL_Z), /* B */ -+ DST_SEL_W(SQ_SEL_W), /* A */ -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_UNNORMALIZED), -+ COORD_TYPE_Y(TEX_UNNORMALIZED), -+ COORD_TYPE_Z(TEX_UNNORMALIZED), -+ COORD_TYPE_W(TEX_UNNORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(0), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* -+ * ; xv vertex shader -+ * 00 VTX: ADDR(4) CNT(2) -+ * 0 VFETCH R1.xy01, R0.x, fc0 MEGA(16) FORMAT(32_32_FLOAT) -+ * FORMAT_COMP(SIGNED) -+ * 1 VFETCH R0.xy01, R0.x, fc0 MINI(8) OFFSET(8) FORMAT(32_32_FLOAT) -+ * FORMAT_COMP(SIGNED) -+ * 01 EXP_DONE: POS0, R1 -+ * 02 EXP_DONE: PARAM0, R0 NO_BARRIER -+ * END_OF_PROGRAM -+ */ -+int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(4)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(2), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_VTX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 1 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), -+ TYPE(SQ_EXPORT_POS), -+ RW_GPR(1), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(3)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 2 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), -+ TYPE(SQ_EXPORT_PARAM), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(3)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ shader[i++] = 0x00000000; -+ shader[i++] = 0x00000000; -+ /* 4/5 */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(16)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(0), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(1)); -+ shader[i++] = VTX_DWORD_PAD; -+ /* 6/7 */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(8)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(8), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(0)); -+ shader[i++] = VTX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* -+ * ; xv ps planar -+ * 00 TEX: ADDR(20) CNT(3) NO_BARRIER -+ * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 -+ * 1 SAMPLE R1.__x_, R0.xy01, t1, s1 -+ * 2 SAMPLE R1._x__, R0.xy01, t2, s2 -+ * 01 TEX: ADDR(28) CNT(2) NO_BARRIER -+ * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 -+ * 1 SAMPLE R1._xy_, R0.xy01, t1, s1 -+ * 02 ALU: ADDR(4) CNT(16) -+ * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP -+ * y: MULADD R1.y, R1.y, C3.z, C3.w -+ * z: MULADD R1.z, R1.z, C3.z, C3.w -+ * w: MOV R1.w, 0.0f -+ * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102 -+ * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102 -+ * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102 -+ * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021 -+ * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102 -+ * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102 -+ * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102 -+ * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021 -+ * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102 -+ * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102 -+ * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102 -+ * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021 -+ * 03 EXP_DONE: PIX0, R2 -+ * END_OF_PROGRAM -+ */ -+int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(20)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_BOOL), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_CALL), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 1 */ -+ shader[i++] = CF_DWORD0(ADDR(28)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_NOT_BOOL), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_CALL), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 2 */ -+ shader[i++] = CF_ALU_DWORD0(ADDR(4), -+ KCACHE_BANK0(0), -+ KCACHE_BANK1(0), -+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); -+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), -+ KCACHE_ADDR0(0), -+ KCACHE_ADDR1(0), -+ I_COUNT(16), -+ USES_WATERFALL(0), -+ CF_INST(SQ_CF_INST_ALU), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 3 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), -+ TYPE(SQ_EXPORT_PIXEL), -+ RW_GPR(2), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(3)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 4 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(259), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Y), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(1)); -+ /* 5 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(259), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_W), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ /* 6 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Z), -+ SRC0_NEG(0), -+ SRC1_SEL(259), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_W), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(0)); -+ /* 7 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(SQ_ALU_SRC_0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MOV), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(0)); -+ /* 8 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(1)); -+ /* 9 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(1)); -+ /* 10 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Z), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(1)); -+ /* 11 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_W), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_021), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(1)); -+ /* 12 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(257), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(1)); -+ /* 13 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(257), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(1)); -+ /* 14 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Z), -+ SRC0_NEG(0), -+ SRC1_SEL(257), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(1)); -+ /* 15 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_W), -+ SRC0_NEG(0), -+ SRC1_SEL(257), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_021), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(1)); -+ /* 16 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(258), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(1)); -+ /* 17 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(258), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(1)); -+ /* 18 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Z), -+ SRC0_NEG(0), -+ SRC1_SEL(258), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_102), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(1)); -+ /* 19 */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_W), -+ SRC0_NEG(0), -+ SRC1_SEL(258), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(0), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_DOT4), -+ BANK_SWIZZLE(SQ_ALU_VEC_021), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(1)); -+ /* 20 */ -+ shader[i++] = CF_DWORD0(ADDR(22)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(3), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_TEX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 21 */ -+ shader[i++] = CF_DWORD0(ADDR(0)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_RETURN), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 22/23 */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_MASK), -+ DST_SEL_Z(SQ_SEL_MASK), -+ DST_SEL_W(SQ_SEL_1), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(0), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ /* 24/25 */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(1), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_MASK), -+ DST_SEL_Y(SQ_SEL_MASK), -+ DST_SEL_Z(SQ_SEL_X), -+ DST_SEL_W(SQ_SEL_MASK), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(1), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ /* 26/27 */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(2), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_MASK), -+ DST_SEL_Y(SQ_SEL_X), -+ DST_SEL_Z(SQ_SEL_MASK), -+ DST_SEL_W(SQ_SEL_MASK), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(2), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ /* 28 */ -+ shader[i++] = CF_DWORD0(ADDR(30)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(2), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_TEX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 29 */ -+ shader[i++] = CF_DWORD0(ADDR(0)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_RETURN), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 30/31 */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_MASK), -+ DST_SEL_Z(SQ_SEL_MASK), -+ DST_SEL_W(SQ_SEL_1), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(0), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ /* 32/33 */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(1), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(SQ_SEL_MASK), -+ DST_SEL_Y(SQ_SEL_X), -+ DST_SEL_Z(SQ_SEL_Y), -+ DST_SEL_W(SQ_SEL_MASK), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(1), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* comp mask ps --------------------------------------- */ -+int R600_comp_mask_ps(RADEONChipFamily ChipSet, -+ uint32_t* shader, -+ int src_a, int src_r, int src_g, int src_b, -+ int mask_a, int mask_r, int mask_g, int mask_b) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(8)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(2), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_TEX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 1 */ -+ shader[i++] = CF_ALU_DWORD0(ADDR(3), -+ KCACHE_BANK0(0), -+ KCACHE_BANK1(0), -+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); -+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), -+ KCACHE_ADDR0(0), -+ KCACHE_ADDR1(0), -+ I_COUNT(4), -+ USES_WATERFALL(0), -+ CF_INST(SQ_CF_INST_ALU), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 2 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), -+ TYPE(SQ_EXPORT_PIXEL), -+ RW_GPR(2), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(1)); -+ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 3 - alu 0 */ -+ /* MUL gpr[2].x gpr[1].x gpr[0].x */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(1)); -+ /* 4 - alu 1 */ -+ /* MUL gpr[2].y gpr[1].y gpr[0].y */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(1)); -+ /* 5 - alu 2 */ -+ /* MUL gpr[2].z gpr[1].z gpr[0].z */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Z), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Z), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(1)); -+ /* 6 - alu 3 */ -+ /* MUL gpr[2].w gpr[1].w gpr[0].w */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_W), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(1)); -+ /* 7 */ -+ shader[i++] = 0x00000000; -+ shader[i++] = 0x00000000; -+ -+ /* 8/9 - src */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(src_r), -+ DST_SEL_Y(src_g), -+ DST_SEL_Z(src_b), -+ DST_SEL_W(src_a), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(0), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ /* 10/11 - mask */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(1), -+ SRC_GPR(1), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(mask_r), -+ DST_SEL_Y(mask_g), -+ DST_SEL_Z(mask_b), -+ DST_SEL_W(mask_a), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(1), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* comp vs --------------------------------------- */ -+int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(3)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_BOOL), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_CALL), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 1 */ -+ shader[i++] = CF_DWORD0(ADDR(14)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_NOT_BOOL), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_CALL), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 2 */ -+ shader[i++] = CF_DWORD0(0); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_NOP), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 3 - mask sub */ -+ shader[i++] = CF_DWORD0(ADDR(8)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(3), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_VTX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 4 - dst */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), -+ TYPE(SQ_EXPORT_POS), -+ RW_GPR(2), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 5 - src */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), -+ TYPE(SQ_EXPORT_PARAM), -+ RW_GPR(1), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 6 - mask */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), -+ TYPE(SQ_EXPORT_PARAM), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 7 */ -+ shader[i++] = CF_DWORD0(ADDR(0)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_RETURN), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 8/9 - dst */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(24)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(0), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(1)); -+ shader[i++] = VTX_DWORD_PAD; -+ /* 10/11 - src */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(8)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(8), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(0)); -+ shader[i++] = VTX_DWORD_PAD; -+ /* 12/13 - mask */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(8)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(16), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(0)); -+ shader[i++] = VTX_DWORD_PAD; -+ -+ /* 14 - non-mask sub */ -+ shader[i++] = CF_DWORD0(ADDR(18)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(2), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_VTX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 15 - dst */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), -+ TYPE(SQ_EXPORT_POS), -+ RW_GPR(1), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 16 - src */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), -+ TYPE(SQ_EXPORT_PARAM), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(0)); -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(0)); -+ /* 17 */ -+ shader[i++] = CF_DWORD0(ADDR(0)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(0), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_RETURN), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 18/19 - dst */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(16)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(0), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(1)); -+ shader[i++] = VTX_DWORD_PAD; -+ /* 20/21 - src */ -+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), -+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), -+ FETCH_WHOLE_QUAD(0), -+ BUFFER_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ SRC_SEL_X(SQ_SEL_X), -+ MEGA_FETCH_COUNT(8)); -+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), -+ DST_REL(0), -+ DST_SEL_X(SQ_SEL_X), -+ DST_SEL_Y(SQ_SEL_Y), -+ DST_SEL_Z(SQ_SEL_0), -+ DST_SEL_W(SQ_SEL_1), -+ USE_CONST_FIELDS(0), -+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ -+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ -+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); -+ shader[i++] = VTX_DWORD2(OFFSET(8), -+ ENDIAN_SWAP(ENDIAN_NONE), -+ CONST_BUF_NO_STRIDE(0), -+ MEGA_FETCH(0)); -+ shader[i++] = VTX_DWORD_PAD; -+ -+ return i; -+} -+ -+/* comp ps --------------------------------------- */ -+int R600_comp_ps(RADEONChipFamily ChipSet, -+ uint32_t* shader, -+ int src_a, int src_r, int src_g, int src_b -+) -+{ -+ int i = 0; -+ -+ /* 0 */ -+ shader[i++] = CF_DWORD0(ADDR(2)); -+ shader[i++] = CF_DWORD1(POP_COUNT(0), -+ CF_CONST(0), -+ COND(SQ_CF_COND_ACTIVE), -+ I_COUNT(1), -+ CALL_COUNT(0), -+ END_OF_PROGRAM(0), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_TEX), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ /* 1 */ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), -+ TYPE(SQ_EXPORT_PIXEL), -+ RW_GPR(0), -+ RW_REL(ABSOLUTE), -+ INDEX_GPR(0), -+ ELEM_SIZE(1)); -+ -+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_Z), -+ SRC_SEL_W(SQ_SEL_W), -+ R6xx_ELEM_LOOP(0), -+ BURST_COUNT(1), -+ END_OF_PROGRAM(1), -+ VALID_PIXEL_MODE(0), -+ CF_INST(SQ_CF_INST_EXPORT_DONE), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ -+ /* 2/3 - src */ -+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), -+ BC_FRAC_MODE(0), -+ FETCH_WHOLE_QUAD(0), -+ RESOURCE_ID(0), -+ SRC_GPR(0), -+ SRC_REL(ABSOLUTE), -+ R7xx_ALT_CONST(0)); -+ shader[i++] = TEX_DWORD1(DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_SEL_X(src_r), -+ DST_SEL_Y(src_g), -+ DST_SEL_Z(src_b), -+ DST_SEL_W(src_a), -+ LOD_BIAS(0), -+ COORD_TYPE_X(TEX_NORMALIZED), -+ COORD_TYPE_Y(TEX_NORMALIZED), -+ COORD_TYPE_Z(TEX_NORMALIZED), -+ COORD_TYPE_W(TEX_NORMALIZED)); -+ shader[i++] = TEX_DWORD2(OFFSET_X(0), -+ OFFSET_Y(0), -+ OFFSET_Z(0), -+ SAMPLER_ID(0), -+ SRC_SEL_X(SQ_SEL_X), -+ SRC_SEL_Y(SQ_SEL_Y), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1)); -+ shader[i++] = TEX_DWORD_PAD; -+ -+ return i; -+} -diff --git a/src/r600_shader.h b/src/r600_shader.h -new file mode 100644 -index 0000000..67b64ff ---- /dev/null -+++ b/src/r600_shader.h -@@ -0,0 +1,366 @@ -+/* -+ * RadeonHD R6xx, R7xx DRI driver -+ * -+ * Copyright (C) 2008-2009 Alexander Deucher -+ * Copyright (C) 2008-2009 Matthias Hopf -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included -+ * in all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+/* -+ * Shader macros -+ */ -+ -+#ifndef __SHADER_H__ -+#define __SHADER_H__ -+ -+#include "radeon.h" -+ -+/* Restrictions of ALU instructions -+ * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1. -+ * max of 3 different src GPRs per instr. -+ * max of 4 different cfile constant components per instr. -+ * max of 2 (different) constants (any type) for t. -+ * bank swizzle (see below). -+ * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to -+ * different indices (gpr,loop,nothing). -+ * may use constant registers or constant cache, but not both. -+ */ -+ -+/* Bank_swizzle: (pp. 297ff) -+ * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2). -+ * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.: -+ * SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2 -+ * 1.x 2.x 012 1.x 2.x - -+ * 3.x 1.y 201 1.y - 3.x -+ * 2.x 1.y 102 (1.y) (2.x) - -+ * If data is read in a cycle, multiple scalar instructions can reference it. -+ * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1. -+ * No restrictions for constants or PV/PS. -+ * t can load multiple components in a single cycle slot, but has to share cycles with xyzw. -+ * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210). -+ * t with two constants may only load GPRs or PV/PS in cycle 2. -+ */ -+ -+ -+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ -+ -+ -+// CF insts -+// addr -+#define ADDR(x) (x) -+// pc -+#define POP_COUNT(x) (x) -+// const -+#define CF_CONST(x) (x) -+// cond -+#define COND(x) (x) // SQ_COND_* -+// count -+#define I_COUNT(x) ((x) ? ((x) - 1) : 0) -+//r7xx -+#define COUNT_3(x) (x) -+// call count -+#define CALL_COUNT(x) (x) -+// eop -+#define END_OF_PROGRAM(x) (x) -+// vpm -+#define VALID_PIXEL_MODE(x) (x) -+// cf inst -+#define CF_INST(x) (x) // SQ_CF_INST_* -+ -+// wqm -+#define WHOLE_QUAD_MODE(x) (x) -+// barrier -+#define BARRIER(x) (x) -+//kb0 -+#define KCACHE_BANK0(x) (x) -+//kb1 -+#define KCACHE_BANK1(x) (x) -+// km0/1 -+#define KCACHE_MODE0(x) (x) -+#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* -+// -+#define KCACHE_ADDR0(x) (x) -+#define KCACHE_ADDR1(x) (x) -+// uw -+#define USES_WATERFALL(x) (x) -+ -+#define ARRAY_BASE(x) (x) -+// export pixel -+#define CF_PIXEL_MRT0 0 -+#define CF_PIXEL_MRT1 1 -+#define CF_PIXEL_MRT2 2 -+#define CF_PIXEL_MRT3 3 -+#define CF_PIXEL_MRT4 4 -+#define CF_PIXEL_MRT5 5 -+#define CF_PIXEL_MRT6 6 -+#define CF_PIXEL_MRT7 7 -+// *_FOG: r6xx only -+#define CF_PIXEL_MRT0_FOG 16 -+#define CF_PIXEL_MRT1_FOG 17 -+#define CF_PIXEL_MRT2_FOG 18 -+#define CF_PIXEL_MRT3_FOG 19 -+#define CF_PIXEL_MRT4_FOG 20 -+#define CF_PIXEL_MRT5_FOG 21 -+#define CF_PIXEL_MRT6_FOG 22 -+#define CF_PIXEL_MRT7_FOG 23 -+#define CF_PIXEL_Z 61 -+// export pos -+#define CF_POS0 60 -+#define CF_POS1 61 -+#define CF_POS2 62 -+#define CF_POS3 63 -+// export param -+// 0...31 -+#define TYPE(x) (x) // SQ_EXPORT_* -+#if 0 -+// type export -+#define SQ_EXPORT_PIXEL 0 -+#define SQ_EXPORT_POS 1 -+#define SQ_EXPORT_PARAM 2 -+// reserved 3 -+// type mem -+#define SQ_EXPORT_WRITE 0 -+#define SQ_EXPORT_WRITE_IND 1 -+#define SQ_EXPORT_WRITE_ACK 2 -+#define SQ_EXPORT_WRITE_IND_ACK 3 -+#endif -+ -+#define RW_GPR(x) (x) -+#define RW_REL(x) (x) -+#define ABSOLUTE 0 -+#define RELATIVE 1 -+#define INDEX_GPR(x) (x) -+#define ELEM_SIZE(x) (x ? (x - 1) : 0) -+#define COMP_MASK(x) (x) -+#define R6xx_ELEM_LOOP(x) (x) -+#define BURST_COUNT(x) (x ? (x - 1) : 0) -+ -+// swiz -+#define SRC_SEL_X(x) (x) // SQ_SEL_* each -+#define SRC_SEL_Y(x) (x) -+#define SRC_SEL_Z(x) (x) -+#define SRC_SEL_W(x) (x) -+ -+#define CF_DWORD0(addr) (addr) -+// R7xx has another entry (COUNT3), but that is only used for adding a bit to count. -+// We allow one more bit for count in the argument of the macro on R7xx instead. -+// R6xx: [0,7] R7xx: [1,16] -+#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \ -+ (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \ -+ ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) -+ -+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)) -+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \ -+ (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ -+ ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)) -+ -+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ -+ (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \ -+ ((es) << 30)) -+// R7xx apparently doesn't have the ELEM_LOOP entry any more -+// We still expose it, but ELEM_LOOP is explicitely R6xx now. -+// TODO: is this just forgotten in the docs, or really not available any more? -+#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \ -+ (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \ -+ ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) -+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \ -+ (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \ -+ ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \ -+ ((wqm) << 30) | ((b) << 31)) -+ -+// ALU clause insts -+#define SRC0_SEL(x) (x) -+#define SRC1_SEL(x) (x) -+#define SRC2_SEL(x) (x) -+// src[0-2]_sel -+// 0-127 GPR -+// 128-159 kcache constants bank 0 -+// 160-191 kcache constants bank 1 -+// 248-255 special SQ_ALU_SRC_* (0, 1, etc.) -+ -+#define SRC0_REL(x) (x) -+#define SRC1_REL(x) (x) -+#define SRC2_REL(x) (x) -+// elem -+#define SRC0_ELEM(x) (x) -+#define SRC1_ELEM(x) (x) -+#define SRC2_ELEM(x) (x) -+#define ELEM_X 0 -+#define ELEM_Y 1 -+#define ELEM_Z 2 -+#define ELEM_W 3 -+// neg -+#define SRC0_NEG(x) (x) -+#define SRC1_NEG(x) (x) -+#define SRC2_NEG(x) (x) -+// im -+#define INDEX_MODE(x) (x) // SQ_INDEX_* -+// ps -+#define PRED_SEL(x) (x) // SQ_PRED_SEL_* -+// last -+#define LAST(x) (x) -+// abs -+#define SRC0_ABS(x) (x) -+#define SRC1_ABS(x) (x) -+// uem -+#define UPDATE_EXECUTE_MASK(x) (x) -+// up -+#define UPDATE_PRED(x) (x) -+// wm -+#define WRITE_MASK(x) (x) -+// fm -+#define FOG_MERGE(x) (x) -+// omod -+#define OMOD(x) (x) // SQ_ALU_OMOD_* -+// alu inst -+#define ALU_INST(x) (x) // SQ_ALU_INST_* -+//bs -+#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* -+#define DST_GPR(x) (x) -+#define DST_REL(x) (x) -+#define DST_ELEM(x) (x) -+#define CLAMP(x) (x) -+ -+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ -+ (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ -+ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ -+ ((im) << 26) | ((ps) << 29) | ((last) << 31)) -+// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more) -+#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ -+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ -+ ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \ -+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) -+#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ -+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ -+ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ -+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) -+// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays -+// Fog is NOT USED on R7xx, even if specified. -+#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ -+ ((chipfamily) < CHIP_FAMILY_RV770 ? \ -+ R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \ -+ R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp)) -+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ -+ (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ -+ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ -+ ((de) << 29) | ((clamp) << 31)) -+ -+// VTX clause insts -+// vxt insts -+#define VTX_INST(x) (x) // SQ_VTX_INST_* -+ -+// fetch type -+#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* -+ -+#define FETCH_WHOLE_QUAD(x) (x) -+#define BUFFER_ID(x) (x) -+#define SRC_GPR(x) (x) -+#define SRC_REL(x) (x) -+#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) -+ -+#define SEMANTIC_ID(x) (x) -+#define DST_SEL_X(x) (x) -+#define DST_SEL_Y(x) (x) -+#define DST_SEL_Z(x) (x) -+#define DST_SEL_W(x) (x) -+#define USE_CONST_FIELDS(x) (x) -+#define DATA_FORMAT(x) (x) -+// num format -+#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* -+// format comp -+#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* -+// sma -+#define SRF_MODE_ALL(x) (x) -+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 -+#define SRF_MODE_NO_ZERO 1 -+#define OFFSET(x) (x) -+// endian swap -+#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* -+#define CONST_BUF_NO_STRIDE(x) (x) -+// mf -+#define MEGA_FETCH(x) (x) -+ -+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ -+ (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ -+ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)) -+#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ -+ (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ -+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) -+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ -+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ -+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) -+#define VTX_DWORD2(offset, es, cbns, mf) \ -+ (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)) -+#define VTX_DWORD_PAD 0x00000000 -+ -+// TEX clause insts -+// tex insts -+#define TEX_INST(x) (x) // SQ_TEX_INST_* -+ -+#define BC_FRAC_MODE(x) (x) -+#define FETCH_WHOLE_QUAD(x) (x) -+#define RESOURCE_ID(x) (x) -+#define R7xx_ALT_CONST(x) (x) -+ -+#define LOD_BIAS(x) (x) -+//ct -+#define COORD_TYPE_X(x) (x) -+#define COORD_TYPE_Y(x) (x) -+#define COORD_TYPE_Z(x) (x) -+#define COORD_TYPE_W(x) (x) -+#define TEX_UNNORMALIZED 0 -+#define TEX_NORMALIZED 1 -+#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */ -+#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f) -+#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f) -+#define SAMPLER_ID(x) (x) -+ -+// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only -+#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \ -+ (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ -+ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)) -+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ -+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ -+ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)) -+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ -+ (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ -+ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)) -+#define TEX_DWORD_PAD 0x00000000 -+ -+extern int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs); -+extern int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps); -+ -+extern int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs); -+extern int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps); -+ -+extern int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader); -+extern int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader); -+ -+extern int R600_comp_mask_vs(RADEONChipFamily ChipSet, uint32_t* vs); -+extern int R600_comp_mask_ps(RADEONChipFamily ChipSet, -+ uint32_t* ps, -+ int src_a, int src_r, int src_g, int src_b, -+ int mask_a, int mask_r, int mask_g, int mask_b); -+ -+extern int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs); -+extern int R600_comp_ps(RADEONChipFamily ChipSet, -+ uint32_t* ps, -+ int src_a, int src_r, int src_g, int src_b); -+ -+#endif -diff --git a/src/r600_state.h b/src/r600_state.h -new file mode 100644 -index 0000000..6621420 ---- /dev/null -+++ b/src/r600_state.h -@@ -0,0 +1,282 @@ -+#ifndef __R600_STATE_H__ -+#define __R600_STATE_H__ -+ -+ -+#include "xf86drm.h" -+ -+typedef int bool_t; -+ -+#define CLEAR(x) memset (&x, 0, sizeof(x)) -+ -+/* Sequencer / thread handling */ -+typedef struct { -+ int ps_prio; -+ int vs_prio; -+ int gs_prio; -+ int es_prio; -+ int num_ps_gprs; -+ int num_vs_gprs; -+ int num_gs_gprs; -+ int num_es_gprs; -+ int num_temp_gprs; -+ int num_ps_threads; -+ int num_vs_threads; -+ int num_gs_threads; -+ int num_es_threads; -+ int num_ps_stack_entries; -+ int num_vs_stack_entries; -+ int num_gs_stack_entries; -+ int num_es_stack_entries; -+} sq_config_t; -+ -+/* Color buffer / render target */ -+typedef struct { -+ int id; -+ int w; -+ int h; -+ uint64_t base; -+ int format; -+ int endian; -+ int array_mode; // tiling -+ int number_type; -+ int read_size; -+ int comp_swap; -+ int tile_mode; -+ int blend_clamp; -+ int clear_color; -+ int blend_bypass; -+ int blend_float32; -+ int simple_float; -+ int round_mode; -+ int tile_compact; -+ int source_format; -+} cb_config_t; -+ -+/* Depth buffer */ -+typedef struct { -+ int w; -+ int h; -+ uint64_t base; -+ int format; -+ int read_size; -+ int array_mode; // tiling -+ int tile_surface_en; -+ int tile_compact; -+ int zrange_precision; -+} db_config_t; -+ -+/* Shader */ -+typedef struct { -+ uint64_t shader_addr; -+ int num_gprs; -+ int stack_size; -+ int dx10_clamp; -+ int prime_cache_pgm_en; -+ int prime_cache_on_draw; -+ int fetch_cache_lines; -+ int prime_cache_en; -+ int prime_cache_on_const; -+ int clamp_consts; -+ int export_mode; -+ int uncached_first_inst; -+} shader_config_t; -+ -+/* Vertex buffer / vtx resource */ -+typedef struct { -+ int id; -+ uint64_t vb_addr; -+ uint32_t vtx_num_entries; -+ uint32_t vtx_size_dw; -+ int clamp_x; -+ int format; -+ int num_format_all; -+ int format_comp_all; -+ int srf_mode_all; -+ int endian; -+ int mem_req_size; -+} vtx_resource_t; -+ -+/* Texture resource */ -+typedef struct { -+ int id; -+ int w; -+ int h; -+ int pitch; -+ int depth; -+ int dim; -+ int tile_mode; -+ int tile_type; -+ int format; -+ uint64_t base; -+ uint64_t mip_base; -+ int format_comp_x; -+ int format_comp_y; -+ int format_comp_z; -+ int format_comp_w; -+ int num_format_all; -+ int srf_mode_all; -+ int force_degamma; -+ int endian; -+ int request_size; -+ int dst_sel_x; -+ int dst_sel_y; -+ int dst_sel_z; -+ int dst_sel_w; -+ int base_level; -+ int last_level; -+ int base_array; -+ int last_array; -+ int mpeg_clamp; -+ int perf_modulation; -+ int interlaced; -+} tex_resource_t; -+ -+/* Texture sampler */ -+typedef struct { -+ int id; -+ /* Clamping */ -+ int clamp_x, clamp_y, clamp_z; -+ int border_color; -+ /* Filtering */ -+ int xy_mag_filter, xy_min_filter; -+ int z_filter; -+ int mip_filter; -+ bool_t high_precision_filter; /* ? */ -+ int perf_mip; /* ? 0-7 */ -+ int perf_z; /* ? 3 */ -+ /* LoD selection */ -+ int min_lod, max_lod; /* 0-0x3ff */ -+ int lod_bias; /* 0-0xfff (signed?) */ -+ int lod_bias2; /* ? 0-0xfff (signed?) */ -+ bool_t lod_uses_minor_axis; /* ? */ -+ /* Other stuff */ -+ bool_t point_sampling_clamp; /* ? */ -+ bool_t tex_array_override; /* ? */ -+ bool_t mc_coord_truncate; /* ? */ -+ bool_t force_degamma; /* ? */ -+ bool_t fetch_4; /* ? */ -+ bool_t sample_is_pcf; /* ? */ -+ bool_t type; /* ? */ -+ int depth_compare; /* only depth textures? */ -+ int chroma_key; -+} tex_sampler_t; -+ -+/* Draw command */ -+typedef struct { -+ uint32_t prim_type; -+ uint32_t vgt_draw_initiator; -+ uint32_t index_type; -+ uint32_t num_instances; -+ uint32_t num_indices; -+} draw_config_t; -+ -+#define E32(ib, dword) \ -+do { \ -+ uint32_t *ib_head = (pointer)(char*)(ib)->address; \ -+ ib_head[(ib)->used >> 2] = (dword); \ -+ (ib)->used += 4; \ -+} while (0) -+ -+#define EFLOAT(ib, val) \ -+do { \ -+ union { float f; uint32_t d; } a; \ -+ a.f = (val); \ -+ E32((ib), a.d); \ -+} while (0) -+ -+#define PACK3(ib, cmd, num) \ -+do { \ -+ E32((ib), RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \ -+} while (0) -+ -+/* write num registers, start at reg */ -+/* If register falls in a special area, special commands are issued */ -+#define PACK0(ib, reg, num) \ -+do { \ -+ if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \ -+ PACK3((ib), IT_SET_CONFIG_REG, (num) + 1); \ -+ E32(ib, ((reg) - SET_CONFIG_REG_offset) >> 2); \ -+ } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \ -+ PACK3((ib), IT_SET_CONTEXT_REG, (num) + 1); \ -+ E32(ib, ((reg) - 0x28000) >> 2); \ -+ } else if ((reg) >= SET_ALU_CONST_offset && (reg) < SET_ALU_CONST_end) { \ -+ PACK3((ib), IT_SET_ALU_CONST, (num) + 1); \ -+ E32(ib, ((reg) - SET_ALU_CONST_offset) >> 2); \ -+ } else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \ -+ PACK3((ib), IT_SET_RESOURCE, num + 1); \ -+ E32((ib), ((reg) - SET_RESOURCE_offset) >> 2); \ -+ } else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \ -+ PACK3((ib), IT_SET_SAMPLER, (num) + 1); \ -+ E32((ib), (reg - SET_SAMPLER_offset) >> 2); \ -+ } else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \ -+ PACK3((ib), IT_SET_CTL_CONST, (num) + 1); \ -+ E32((ib), ((reg) - SET_CTL_CONST_offset) >> 2); \ -+ } else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \ -+ PACK3((ib), IT_SET_LOOP_CONST, (num) + 1); \ -+ E32((ib), ((reg) - SET_LOOP_CONST_offset) >> 2); \ -+ } else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \ -+ PACK3((ib), IT_SET_BOOL_CONST, (num) + 1); \ -+ E32((ib), ((reg) - SET_BOOL_CONST_offset) >> 2); \ -+ } else { \ -+ E32((ib), CP_PACKET0 ((reg), (num) - 1)); \ -+ } \ -+} while (0) -+ -+/* write a single register */ -+#define EREG(ib, reg, val) \ -+do { \ -+ PACK0((ib), (reg), 1); \ -+ E32((ib), (val)); \ -+} while (0) -+ -+void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib); -+void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib); -+ -+uint64_t -+upload (ScrnInfoPtr pScrn, void *shader, int size, int offset); -+void -+wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib); -+void -+wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib); -+void -+start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); -+void -+set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf); -+void -+cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr); -+void -+cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, int crtc, int start, int stop, Bool enable); -+void -+fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf); -+void -+vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf); -+void -+ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf); -+void -+set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf); -+void -+set_bool_const(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val); -+void -+set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res); -+void -+set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res); -+void -+set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s); -+void -+set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); -+void -+set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); -+void -+set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); -+void -+set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); -+void -+set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); -+void -+set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib); -+void -+draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices); -+void -+draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); -+ -+#endif -diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c -new file mode 100644 -index 0000000..56adc6d ---- /dev/null -+++ b/src/r600_textured_videofuncs.c -@@ -0,0 +1,514 @@ -+/* -+ * Copyright 2008 Advanced Micro Devices, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Author: Alex Deucher -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include "xf86.h" -+ -+#include "exa.h" -+ -+#include "radeon.h" -+#include "radeon_reg.h" -+#include "r600_shader.h" -+#include "r600_reg.h" -+#include "r600_state.h" -+ -+#include "radeon_video.h" -+ -+#include -+#include "fourcc.h" -+ -+#include "damage.h" -+ -+static void -+R600DoneTexturedVideo(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ draw_config_t draw_conf; -+ vtx_resource_t vtx_res; -+ -+ CLEAR (draw_conf); -+ CLEAR (vtx_res); -+ -+ if (accel_state->vb_index == 0) { -+ R600IBDiscard(pScrn, accel_state->ib); -+ return; -+ } -+ -+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + -+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); -+ accel_state->vb_size = accel_state->vb_index * 16; -+ -+ /* flush vertex cache */ -+ if ((info->ChipFamily == CHIP_FAMILY_RV610) || -+ (info->ChipFamily == CHIP_FAMILY_RV620) || -+ (info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RV710)) -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ else -+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, -+ accel_state->vb_size, accel_state->vb_mc_addr); -+ -+ /* Vertex buffer setup */ -+ vtx_res.id = SQ_VTX_RESOURCE_vs; -+ vtx_res.vtx_size_dw = 16 / 4; -+ vtx_res.vtx_num_entries = accel_state->vb_size / 4; -+ vtx_res.mem_req_size = 1; -+ vtx_res.vb_addr = accel_state->vb_mc_addr; -+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); -+ -+ draw_conf.prim_type = DI_PT_RECTLIST; -+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; -+ draw_conf.num_instances = 1; -+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; -+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; -+ -+ draw_auto(pScrn, accel_state->ib, &draw_conf); -+ -+ wait_3d_idle_clean(pScrn, accel_state->ib); -+ -+ /* sync destination surface */ -+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), -+ accel_state->dst_size, accel_state->dst_mc_addr); -+ -+ R600CPFlushIndirect(pScrn, accel_state->ib); -+} -+ -+void -+R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ PixmapPtr pPixmap = pPriv->pPixmap; -+ BoxPtr pBox = REGION_RECTS(&pPriv->clip); -+ int nBox = REGION_NUM_RECTS(&pPriv->clip); -+ int dstxoff, dstyoff; -+ cb_config_t cb_conf; -+ tex_resource_t tex_res; -+ tex_sampler_t tex_samp; -+ shader_config_t vs_conf, ps_conf; -+ int uv_offset; -+ static float ps_alu_consts[] = { -+ 1.0, 0.0, 1.4020, 0, /* r - c[0] */ -+ 1.0, -0.34414, -0.71414, 0, /* g - c[1] */ -+ 1.0, 1.7720, 0.0, 0, /* b - c[2] */ -+ /* Constants for undoing Y'CbCr scaling -+ * - Y' is scaled from 16:235 -+ * - Cb/Cr are scaled from 16:240 -+ * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5]) -+ * Vector is [Y_mul, Y_shfit, C_mul, C_shift] -+ */ -+ 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0, -+ }; -+ -+ CLEAR (cb_conf); -+ CLEAR (tex_res); -+ CLEAR (tex_samp); -+ CLEAR (vs_conf); -+ CLEAR (ps_conf); -+ -+ accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); -+ accel_state->src_pitch[0] = pPriv->src_pitch; -+ -+ /* bad pitch */ -+ if (accel_state->src_pitch[0] & 7) -+ return; -+ if (accel_state->dst_pitch & 7) -+ return; -+ -+#ifdef COMPOSITE -+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; -+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; -+#else -+ dstxoff = 0; -+ dstyoff = 0; -+#endif -+ -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ -+ /* Init */ -+ start_3d(pScrn, accel_state->ib); -+ -+ set_default_state(pScrn, accel_state->ib); -+ -+ /* Scissor / viewport */ -+ EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); -+ EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); -+ -+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->xv_vs_offset; -+ -+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -+ accel_state->xv_ps_offset; -+ -+ /* PS bool constant */ -+ switch(pPriv->id) { -+ case FOURCC_YV12: -+ case FOURCC_I420: -+ set_bool_const(pScrn, accel_state->ib, 0, 1); -+ break; -+ case FOURCC_UYVY: -+ case FOURCC_YUY2: -+ default: -+ set_bool_const(pScrn, accel_state->ib, 0, 0); -+ break; -+ } -+ -+ accel_state->vs_size = 512; -+ accel_state->ps_size = 512; -+ -+ /* Shader */ -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->vs_size, accel_state->vs_mc_addr); -+ -+ vs_conf.shader_addr = accel_state->vs_mc_addr; -+ vs_conf.num_gprs = 2; -+ vs_conf.stack_size = 0; -+ vs_setup (pScrn, accel_state->ib, &vs_conf); -+ -+ /* flush SQ cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, -+ accel_state->ps_size, accel_state->ps_mc_addr); -+ -+ ps_conf.shader_addr = accel_state->ps_mc_addr; -+ ps_conf.num_gprs = 3; -+ ps_conf.stack_size = 1; -+ ps_conf.uncached_first_inst = 1; -+ ps_conf.clamp_consts = 0; -+ ps_conf.export_mode = 2; -+ ps_setup (pScrn, accel_state->ib, &ps_conf); -+ -+ /* PS alu constants */ -+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, -+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); -+ -+ /* Texture */ -+ switch(pPriv->id) { -+ case FOURCC_YV12: -+ case FOURCC_I420: -+ accel_state->src_mc_addr[0] = pPriv->src_offset; -+ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; -+ -+ /* flush texture cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], -+ accel_state->src_mc_addr[0]); -+ -+ /* Y texture */ -+ tex_res.id = 0; -+ tex_res.w = pPriv->w; -+ tex_res.h = pPriv->h; -+ tex_res.pitch = accel_state->src_pitch[0]; -+ tex_res.depth = 0; -+ tex_res.dim = SQ_TEX_DIM_2D; -+ tex_res.base = accel_state->src_mc_addr[0]; -+ tex_res.mip_base = accel_state->src_mc_addr[0]; -+ -+ tex_res.format = FMT_8; -+ tex_res.dst_sel_x = SQ_SEL_X; /* Y */ -+ tex_res.dst_sel_y = SQ_SEL_1; -+ tex_res.dst_sel_z = SQ_SEL_1; -+ tex_res.dst_sel_w = SQ_SEL_1; -+ -+ tex_res.request_size = 1; -+ tex_res.base_level = 0; -+ tex_res.last_level = 0; -+ tex_res.perf_modulation = 0; -+ tex_res.interlaced = 0; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ /* Y sampler */ -+ tex_samp.id = 0; -+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_z = SQ_TEX_WRAP; -+ -+ /* xxx: switch to bicubic */ -+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; -+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; -+ -+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; -+ tex_samp.mip_filter = 0; /* no mipmap */ -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ -+ /* U or V texture */ -+ uv_offset = accel_state->src_pitch[0] * pPriv->h; -+ uv_offset = (uv_offset + 255) & ~255; -+ -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->src_size[0] / 4, -+ accel_state->src_mc_addr[0] + uv_offset); -+ -+ tex_res.id = 1; -+ tex_res.format = FMT_8; -+ tex_res.w = pPriv->w >> 1; -+ tex_res.h = pPriv->h >> 1; -+ tex_res.pitch = accel_state->src_pitch[0] >> 1; -+ tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ -+ tex_res.dst_sel_y = SQ_SEL_1; -+ tex_res.dst_sel_z = SQ_SEL_1; -+ tex_res.dst_sel_w = SQ_SEL_1; -+ tex_res.interlaced = 0; -+ -+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset; -+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ /* U or V sampler */ -+ tex_samp.id = 1; -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ -+ /* U or V texture */ -+ uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1)); -+ uv_offset = (uv_offset + 255) & ~255; -+ -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, -+ accel_state->src_size[0] / 4, -+ accel_state->src_mc_addr[0] + uv_offset); -+ -+ tex_res.id = 2; -+ tex_res.format = FMT_8; -+ tex_res.w = pPriv->w >> 1; -+ tex_res.h = pPriv->h >> 1; -+ tex_res.pitch = accel_state->src_pitch[0] >> 1; -+ tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ -+ tex_res.dst_sel_y = SQ_SEL_1; -+ tex_res.dst_sel_z = SQ_SEL_1; -+ tex_res.dst_sel_w = SQ_SEL_1; -+ tex_res.interlaced = 0; -+ -+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset; -+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ /* UV sampler */ -+ tex_samp.id = 2; -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ break; -+ case FOURCC_UYVY: -+ case FOURCC_YUY2: -+ default: -+ accel_state->src_mc_addr[0] = pPriv->src_offset; -+ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; -+ -+ /* flush texture cache */ -+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], -+ accel_state->src_mc_addr[0]); -+ -+ /* Y texture */ -+ tex_res.id = 0; -+ tex_res.w = pPriv->w; -+ tex_res.h = pPriv->h; -+ tex_res.pitch = accel_state->src_pitch[0] >> 1; -+ tex_res.depth = 0; -+ tex_res.dim = SQ_TEX_DIM_2D; -+ tex_res.base = accel_state->src_mc_addr[0]; -+ tex_res.mip_base = accel_state->src_mc_addr[0]; -+ -+ tex_res.format = FMT_8_8; -+ if (pPriv->id == FOURCC_UYVY) -+ tex_res.dst_sel_x = SQ_SEL_Y; /* Y */ -+ else -+ tex_res.dst_sel_x = SQ_SEL_X; /* Y */ -+ tex_res.dst_sel_y = SQ_SEL_1; -+ tex_res.dst_sel_z = SQ_SEL_1; -+ tex_res.dst_sel_w = SQ_SEL_1; -+ -+ tex_res.request_size = 1; -+ tex_res.base_level = 0; -+ tex_res.last_level = 0; -+ tex_res.perf_modulation = 0; -+ tex_res.interlaced = 0; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ /* Y sampler */ -+ tex_samp.id = 0; -+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; -+ tex_samp.clamp_z = SQ_TEX_WRAP; -+ -+ /* xxx: switch to bicubic */ -+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; -+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; -+ -+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; -+ tex_samp.mip_filter = 0; /* no mipmap */ -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ -+ /* UV texture */ -+ tex_res.id = 1; -+ tex_res.format = FMT_8_8_8_8; -+ tex_res.w = pPriv->w >> 1; -+ tex_res.h = pPriv->h; -+ tex_res.pitch = accel_state->src_pitch[0] >> 2; -+ if (pPriv->id == FOURCC_UYVY) { -+ tex_res.dst_sel_x = SQ_SEL_X; /* V */ -+ tex_res.dst_sel_y = SQ_SEL_Z; /* U */ -+ } else { -+ tex_res.dst_sel_x = SQ_SEL_Y; /* V */ -+ tex_res.dst_sel_y = SQ_SEL_W; /* U */ -+ } -+ tex_res.dst_sel_z = SQ_SEL_1; -+ tex_res.dst_sel_w = SQ_SEL_1; -+ tex_res.interlaced = 0; -+ -+ tex_res.base = accel_state->src_mc_addr[0]; -+ tex_res.mip_base = accel_state->src_mc_addr[0]; -+ set_tex_resource (pScrn, accel_state->ib, &tex_res); -+ -+ /* UV sampler */ -+ tex_samp.id = 1; -+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); -+ break; -+ } -+ -+ /* Render setup */ -+ EREG(accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); -+ EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); -+ EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ -+ -+ cb_conf.id = 0; -+ -+ accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; -+ -+ cb_conf.w = accel_state->dst_pitch; -+ cb_conf.h = pPixmap->drawable.height; -+ cb_conf.base = accel_state->dst_mc_addr; -+ -+ switch (pPixmap->drawable.bitsPerPixel) { -+ case 16: -+ if (pPixmap->drawable.depth == 15) { -+ cb_conf.format = COLOR_1_5_5_5; -+ cb_conf.comp_swap = 1; /* ARGB */ -+ } else { -+ cb_conf.format = COLOR_5_6_5; -+ cb_conf.comp_swap = 2; /* RGB */ -+ } -+ break; -+ case 32: -+ cb_conf.format = COLOR_8_8_8_8; -+ cb_conf.comp_swap = 1; /* ARGB */ -+ break; -+ default: -+ return; -+ } -+ -+ cb_conf.source_format = 1; -+ cb_conf.blend_clamp = 1; -+ set_render_target(pScrn, accel_state->ib, &cb_conf); -+ -+ EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | -+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); -+ EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ -+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ -+ -+ /* Interpolator setup */ -+ /* export tex coords from VS */ -+ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); -+ EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); -+ -+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x -+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); -+ EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); -+ EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | -+ (0x03 << DEFAULT_VAL_shift) | -+ SEL_CENTROID_bit)); -+ EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); -+ -+ -+ cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, -+ radeon_covering_crtc_num(pScrn, -+ pPriv->drw_x, -+ pPriv->drw_x + pPriv->dst_w, -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h, -+ pPriv->desired_crtc), -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h, -+ pPriv->vsync); -+ -+ -+ accel_state->vb_index = 0; -+ -+ while (nBox--) { -+ int srcX, srcY, srcw, srch; -+ int dstX, dstY, dstw, dsth; -+ float *vb; -+ -+ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { -+ R600DoneTexturedVideo(pScrn); -+ accel_state->vb_index = 0; -+ accel_state->ib = RADEONCPGetBuffer(pScrn); -+ } -+ -+ vb = (pointer)((char*)accel_state->ib->address + -+ (accel_state->ib->total / 2) + -+ accel_state->vb_index * 16); -+ -+ dstX = pBox->x1 + dstxoff; -+ dstY = pBox->y1 + dstyoff; -+ dstw = pBox->x2 - pBox->x1; -+ dsth = pBox->y2 - pBox->y1; -+ -+ srcX = ((pBox->x1 - pPriv->drw_x) * -+ pPriv->src_w) / pPriv->dst_w; -+ srcY = ((pBox->y1 - pPriv->drw_y) * -+ pPriv->src_h) / pPriv->dst_h; -+ -+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w; -+ srch = (pPriv->src_h * dsth) / pPriv->dst_h; -+ -+ vb[0] = (float)dstX; -+ vb[1] = (float)dstY; -+ vb[2] = (float)srcX / pPriv->w; -+ vb[3] = (float)srcY / pPriv->h; -+ -+ vb[4] = (float)dstX; -+ vb[5] = (float)(dstY + dsth); -+ vb[6] = (float)srcX / pPriv->w; -+ vb[7] = (float)(srcY + srch) / pPriv->h; -+ -+ vb[8] = (float)(dstX + dstw); -+ vb[9] = (float)(dstY + dsth); -+ vb[10] = (float)(srcX + srcw) / pPriv->w; -+ vb[11] = (float)(srcY + srch) / pPriv->h; -+ -+ accel_state->vb_index += 3; -+ -+ pBox++; -+ } -+ -+ R600DoneTexturedVideo(pScrn); -+ -+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip); -+} -diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c -new file mode 100644 -index 0000000..114ccf5 ---- /dev/null -+++ b/src/r6xx_accel.c -@@ -0,0 +1,1102 @@ -+/* -+ * Copyright 2008 Advanced Micro Devices, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: Alex Deucher -+ * Matthias Hopf -+ */ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include "xf86.h" -+ -+#include -+ -+#include "radeon.h" -+#include "r600_shader.h" -+#include "radeon_reg.h" -+#include "r600_reg.h" -+#include "r600_state.h" -+ -+#include "radeon_drm.h" -+ -+/* Flush the indirect buffer to the kernel for submission to the card */ -+void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ drmBufPtr buffer = ib; -+ int start = 0; -+ drm_radeon_indirect_t indirect; -+ -+ if (!buffer) return; -+ -+ //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", -+ // buffer->idx); -+ -+ while (buffer->used & 0x3c){ -+ E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ -+ } -+ -+ //ErrorF("buffer bytes: %d\n", buffer->used); -+ -+ indirect.idx = buffer->idx; -+ indirect.start = start; -+ indirect.end = buffer->used; -+ indirect.discard = 1; -+ -+ drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, -+ &indirect, sizeof(drm_radeon_indirect_t)); -+ -+} -+ -+void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ if (!ib) return; -+ -+ ib->used = 0; -+ R600CPFlushIndirect(pScrn, ib); -+} -+ -+void -+wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ -+ //flush caches, don't generate timestamp -+ PACK3(ib, IT_EVENT_WRITE, 1); -+ E32(ib, CACHE_FLUSH_AND_INV_EVENT); -+ // wait for 3D idle clean -+ EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | -+ WAIT_3D_IDLECLEAN_bit)); -+} -+ -+void -+wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ -+ EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); -+ -+} -+ -+static void -+reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ int i; -+ -+ PACK0(ib, CB_COLOR0_INFO, 8); -+ for (i = 0; i < 8; i++) -+ E32(ib, 0); -+} -+ -+static void -+reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ int i; -+ -+ wait_3d_idle(pScrn, ib); -+ -+ PACK0(ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num); -+ for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++) -+ E32(ib, 0); -+ PACK0(ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num); -+ for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++) -+ E32(ib, 0); -+ -+ wait_3d_idle(pScrn, ib); -+} -+ -+static void -+reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ int i; -+ -+ for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) { -+ PACK0(ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3); -+ E32(ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift); -+ E32(ib, MAX_LOD_mask); -+ E32(ib, 0); -+ } -+} -+ -+static void -+reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ int i; -+ -+ const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2); -+ -+ PACK0(ib, SQ_ALU_CONSTANT, count); -+ for (i = 0; i < count; i++) -+ EFLOAT(ib, 0.0); -+} -+ -+static void -+reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ int i; -+ -+ for (i = 0; i < SQ_BOOL_CONST_0_num; i++) -+ EREG(ib, SQ_BOOL_CONST_0 + (i << 2), 0); -+ -+ PACK0(ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num); -+ -+ for (i = 0; i < SQ_LOOP_CONST_all_num; i++) -+ E32(ib, 0); -+ -+} -+ -+void -+start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->ChipFamily < CHIP_FAMILY_RV770) { -+ PACK3(ib, IT_START_3D_CMDBUF, 1); -+ E32(ib, 0); -+ } -+ -+ PACK3(ib, IT_CONTEXT_CONTROL, 2); -+ E32(ib, 0x80000000); -+ E32(ib, 0x80000000); -+ -+ wait_3d_idle_clean (pScrn, ib); -+} -+ -+/* -+ * Setup of functional groups -+ */ -+ -+// asic stack/thread/gpr limits - need to query the drm -+static void -+sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) -+{ -+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; -+ uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if ((info->ChipFamily == CHIP_FAMILY_RV610) || -+ (info->ChipFamily == CHIP_FAMILY_RV620) || -+ (info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RV710)) -+ sq_config = 0; // no VC -+ else -+ sq_config = VC_ENABLE_bit; -+ -+ sq_config |= (DX9_CONSTS_bit | -+ ALU_INST_PREFER_VECTOR_bit | -+ (sq_conf->ps_prio << PS_PRIO_shift) | -+ (sq_conf->vs_prio << VS_PRIO_shift) | -+ (sq_conf->gs_prio << GS_PRIO_shift) | -+ (sq_conf->es_prio << ES_PRIO_shift)); -+ -+ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | -+ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | -+ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); -+ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | -+ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); -+ -+ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | -+ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | -+ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | -+ (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); -+ -+ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | -+ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); -+ -+ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | -+ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); -+ -+ PACK0(ib, SQ_CONFIG, 6); -+ E32(ib, sq_config); -+ E32(ib, sq_gpr_resource_mgmt_1); -+ E32(ib, sq_gpr_resource_mgmt_2); -+ E32(ib, sq_thread_resource_mgmt); -+ E32(ib, sq_stack_resource_mgmt_1); -+ E32(ib, sq_stack_resource_mgmt_2); -+ -+} -+ -+void -+set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) -+{ -+ uint32_t cb_color_info; -+ int pitch, slice, h; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ cb_color_info = ((cb_conf->endian << ENDIAN_shift) | -+ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | -+ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | -+ (cb_conf->number_type << NUMBER_TYPE_shift) | -+ (cb_conf->comp_swap << COMP_SWAP_shift) | -+ (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); -+ if (cb_conf->read_size) -+ cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; -+ if (cb_conf->blend_clamp) -+ cb_color_info |= BLEND_CLAMP_bit; -+ if (cb_conf->clear_color) -+ cb_color_info |= CLEAR_COLOR_bit; -+ if (cb_conf->blend_bypass) -+ cb_color_info |= BLEND_BYPASS_bit; -+ if (cb_conf->blend_float32) -+ cb_color_info |= BLEND_FLOAT32_bit; -+ if (cb_conf->simple_float) -+ cb_color_info |= SIMPLE_FLOAT_bit; -+ if (cb_conf->round_mode) -+ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; -+ if (cb_conf->tile_compact) -+ cb_color_info |= TILE_COMPACT_bit; -+ if (cb_conf->source_format) -+ cb_color_info |= SOURCE_FORMAT_bit; -+ -+ pitch = (cb_conf->w / 8) - 1; -+ h = (cb_conf->h + 7) & ~7; -+ slice = ((cb_conf->w * h) / 64) - 1; -+ -+ EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); -+ -+ // rv6xx workaround -+ if ((info->ChipFamily > CHIP_FAMILY_R600) && -+ (info->ChipFamily < CHIP_FAMILY_RV770)) { -+ PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); -+ E32(ib, (2 << cb_conf->id)); -+ } -+ -+ // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib -+ EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | -+ (slice << SLICE_TILE_MAX_shift))); -+ EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | -+ (0 << SLICE_MAX_shift))); -+ EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); -+ EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 -+ EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 -+ EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | -+ (0 << FMASK_TILE_MAX_shift))); -+} -+ -+void -+cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) -+{ -+ uint32_t cp_coher_size; -+ if (size == 0xffffffff) -+ cp_coher_size = 0xffffffff; -+ else -+ cp_coher_size = ((size + 255) >> 8); -+ -+ PACK3(ib, IT_SURFACE_SYNC, 4); -+ E32(ib, sync_type); -+ E32(ib, cp_coher_size); -+ E32(ib, (mc_addr >> 8)); -+ E32(ib, 10); /* poll interval */ -+} -+ -+/* inserts a wait for vline in the command stream */ -+void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, -+ int crtc, int start, int stop, Bool enable) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ uint32_t offset; -+ RADEONCrtcPrivatePtr radeon_crtc; -+ -+ if (!enable) -+ return; -+ -+ if ((crtc < 0) || (crtc > 1)) -+ return; -+ -+ if (stop < start) -+ return; -+ -+ if (!xf86_config->crtc[crtc]->enabled) -+ return; -+ -+#ifdef USE_EXA -+ if (info->useEXA) -+ offset = exaGetPixmapOffset(pPix); -+ else -+#endif -+ offset = pPix->devPrivate.ptr - info->FB; -+ -+ /* if drawing to front buffer */ -+ if (offset != 0) -+ return; -+ -+ start = max(start, 0); -+ stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay); -+ -+ if (start > xf86_config->crtc[crtc]->mode.VDisplay) -+ return; -+ -+ radeon_crtc = xf86_config->crtc[crtc]->driver_private; -+ -+ /* set the VLINE range */ -+ EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, -+ (start << AVIVO_D1MODE_VLINE_START_SHIFT) | -+ (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); -+ -+ /* tell the CP to poll the VLINE state register */ -+ PACK3(ib, IT_WAIT_REG_MEM, 6); -+ E32(ib, IT_WAIT_REG | IT_WAIT_EQ); -+ E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset)); -+ E32(ib, 0); -+ E32(ib, 0); // Ref value -+ E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask -+ E32(ib, 10); // Wait interval -+} -+ -+void -+fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) -+{ -+ uint32_t sq_pgm_resources; -+ -+ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | -+ (fs_conf->stack_size << STACK_SIZE_shift)); -+ -+ if (fs_conf->dx10_clamp) -+ sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; -+ -+ EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); -+ EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); -+ EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); -+} -+ -+void -+vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) -+{ -+ uint32_t sq_pgm_resources; -+ -+ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | -+ (vs_conf->stack_size << STACK_SIZE_shift)); -+ -+ if (vs_conf->dx10_clamp) -+ sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; -+ if (vs_conf->fetch_cache_lines) -+ sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); -+ if (vs_conf->uncached_first_inst) -+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit; -+ -+ EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); -+ EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); -+ EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); -+} -+ -+void -+ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) -+{ -+ uint32_t sq_pgm_resources; -+ -+ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | -+ (ps_conf->stack_size << STACK_SIZE_shift)); -+ -+ if (ps_conf->dx10_clamp) -+ sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; -+ if (ps_conf->fetch_cache_lines) -+ sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); -+ if (ps_conf->uncached_first_inst) -+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit; -+ if (ps_conf->clamp_consts) -+ sq_pgm_resources |= CLAMP_CONSTS_bit; -+ -+ EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); -+ EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); -+ EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); -+ EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); -+} -+ -+void -+set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) -+{ -+ int i; -+ const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); -+ -+ PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); -+ for (i = 0; i < countreg; i++) -+ EFLOAT(ib, const_buf[i]); -+} -+ -+void -+set_bool_const(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) -+{ -+ /* bool order is: ps, vs, gs, ps, vs, gs, ... */ -+ EREG(ib, SQ_BOOL_CONST_0 + (offset << 2), val); -+} -+ -+void -+set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) -+{ -+ uint32_t sq_vtx_constant_word2; -+ -+ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | -+ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | -+ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | -+ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | -+ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); -+ if (res->clamp_x) -+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; -+ -+ if (res->format_comp_all) -+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; -+ -+ if (res->srf_mode_all) -+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; -+ -+ PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); -+ E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS -+ E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE -+ E32(ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN -+ E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? -+ E32(ib, 0); // 4: n/a -+ E32(ib, 0); // 5: n/a -+ E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE -+} -+ -+void -+set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) -+{ -+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; -+ uint32_t sq_tex_resource_word5, sq_tex_resource_word6; -+ -+ sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | -+ (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); -+ -+ if (tex_res->w) -+ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | -+ ((tex_res->w - 1) << TEX_WIDTH_shift)); -+ -+ if (tex_res->tile_type) -+ sq_tex_resource_word0 |= TILE_TYPE_bit; -+ -+ sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); -+ -+ if (tex_res->h) -+ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); -+ if (tex_res->depth) -+ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); -+ -+ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | -+ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | -+ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | -+ (tex_res->format_comp_w << FORMAT_COMP_W_shift) | -+ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | -+ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | -+ (tex_res->request_size << REQUEST_SIZE_shift) | -+ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | -+ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | -+ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | -+ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | -+ (tex_res->base_level << BASE_LEVEL_shift)); -+ -+ if (tex_res->srf_mode_all) -+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; -+ if (tex_res->force_degamma) -+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; -+ -+ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | -+ (tex_res->base_array << BASE_ARRAY_shift) | -+ (tex_res->last_array << LAST_ARRAY_shift)); -+ -+ sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | -+ (tex_res->perf_modulation << PERF_MODULATION_shift) | -+ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); -+ -+ if (tex_res->interlaced) -+ sq_tex_resource_word6 |= INTERLACED_bit; -+ -+ PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); -+ E32(ib, sq_tex_resource_word0); -+ E32(ib, sq_tex_resource_word1); -+ E32(ib, ((tex_res->base) >> 8)); -+ E32(ib, ((tex_res->mip_base) >> 8)); -+ E32(ib, sq_tex_resource_word4); -+ E32(ib, sq_tex_resource_word5); -+ E32(ib, sq_tex_resource_word6); -+} -+ -+void -+set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) -+{ -+ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; -+ -+ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | -+ (s->clamp_y << CLAMP_Y_shift) | -+ (s->clamp_z << CLAMP_Z_shift) | -+ (s->xy_mag_filter << XY_MAG_FILTER_shift) | -+ (s->xy_min_filter << XY_MIN_FILTER_shift) | -+ (s->z_filter << Z_FILTER_shift) | -+ (s->mip_filter << MIP_FILTER_shift) | -+ (s->border_color << BORDER_COLOR_TYPE_shift) | -+ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | -+ (s->chroma_key << CHROMA_KEY_shift)); -+ if (s->point_sampling_clamp) -+ sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; -+ if (s->tex_array_override) -+ sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; -+ if (s->lod_uses_minor_axis) -+ sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; -+ -+ sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | -+ (s->max_lod << MAX_LOD_shift) | -+ (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); -+ -+ sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | -+ (s->perf_mip << PERF_MIP_shift) | -+ (s->perf_z << PERF_Z_shift)); -+ if (s->mc_coord_truncate) -+ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; -+ if (s->force_degamma) -+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; -+ if (s->high_precision_filter) -+ sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; -+ if (s->fetch_4) -+ sq_tex_sampler_word2 |= FETCH_4_bit; -+ if (s->sample_is_pcf) -+ sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; -+ if (s->type) -+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; -+ -+ PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); -+ E32(ib, sq_tex_sampler_word0); -+ E32(ib, sq_tex_sampler_word1); -+ E32(ib, sq_tex_sampler_word2); -+} -+ -+//XXX deal with clip offsets in clip setup -+void -+set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) -+{ -+ -+ EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | -+ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); -+ EREG(ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | -+ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); -+} -+ -+void -+set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) -+{ -+ -+ EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + -+ id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | -+ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | -+ WINDOW_OFFSET_DISABLE_bit)); -+ EREG(ib, PA_SC_VPORT_SCISSOR_0_BR + -+ id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | -+ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); -+} -+ -+void -+set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) -+{ -+ -+ EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | -+ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | -+ WINDOW_OFFSET_DISABLE_bit)); -+ EREG(ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | -+ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); -+} -+ -+void -+set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) -+{ -+ -+ EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | -+ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | -+ WINDOW_OFFSET_DISABLE_bit)); -+ EREG(ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | -+ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); -+} -+ -+void -+set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) -+{ -+ -+ EREG(ib, PA_SC_CLIPRECT_0_TL + -+ id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | -+ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); -+ EREG(ib, PA_SC_CLIPRECT_0_BR + -+ id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | -+ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); -+} -+ -+/* -+ * Setup of default state -+ */ -+ -+void -+set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) -+{ -+ tex_resource_t tex_res; -+ shader_config_t fs_conf; -+ sq_config_t sq_conf; -+ int i; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_accel_state *accel_state = info->accel_state; -+ -+ memset(&tex_res, 0, sizeof(tex_resource_t)); -+ memset(&fs_conf, 0, sizeof(shader_config_t)); -+ -+#if 1 -+ if (accel_state->XInited3D) -+ return; -+#endif -+ -+ accel_state->XInited3D = TRUE; -+ -+ wait_3d_idle(pScrn, ib); -+ -+ // ASIC specific setup, see drm -+ if (info->ChipFamily < CHIP_FAMILY_RV770) { -+ EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | -+ (28 << TD_FIFO_CREDIT_shift))); -+ EREG(ib, VC_ENHANCE, 0); -+ EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); -+ EREG(ib, DB_DEBUG, 0x82000000); /* ? */ -+ EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | -+ (16 << DEPTH_FLUSH_shift) | -+ (0 << FORCE_SUMMARIZE_shift) | -+ (4 << DEPTH_PENDING_FREE_shift) | -+ (16 << DEPTH_CACHELINE_FREE_shift) | -+ 0)); -+ } else { -+ EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | -+ (28 << TD_FIFO_CREDIT_shift))); -+ EREG(ib, VC_ENHANCE, 0); -+ EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); -+ EREG(ib, DB_DEBUG, 0); -+ EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | -+ (16 << DEPTH_FLUSH_shift) | -+ (0 << FORCE_SUMMARIZE_shift) | -+ (4 << DEPTH_PENDING_FREE_shift) | -+ (4 << DEPTH_CACHELINE_FREE_shift) | -+ 0)); -+ } -+ -+ reset_td_samplers(pScrn, ib); -+ reset_dx9_alu_consts(pScrn, ib); -+ reset_bool_loop_const (pScrn, ib); -+ reset_sampler_const (pScrn, ib); -+ -+ // SQ -+ sq_conf.ps_prio = 0; -+ sq_conf.vs_prio = 1; -+ sq_conf.gs_prio = 2; -+ sq_conf.es_prio = 3; -+ // need to set stack/thread/gpr limits based on the asic -+ // for now just set them low enough so any card will work -+ // see r600_cp.c in the drm -+ switch (info->ChipFamily) { -+ case CHIP_FAMILY_R600: -+ sq_conf.num_ps_gprs = 192; -+ sq_conf.num_vs_gprs = 56; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 136; -+ sq_conf.num_vs_threads = 48; -+ sq_conf.num_gs_threads = 4; -+ sq_conf.num_es_threads = 4; -+ sq_conf.num_ps_stack_entries = 128; -+ sq_conf.num_vs_stack_entries = 128; -+ sq_conf.num_gs_stack_entries = 0; -+ sq_conf.num_es_stack_entries = 0; -+ break; -+ case CHIP_FAMILY_RV630: -+ case CHIP_FAMILY_RV635: -+ sq_conf.num_ps_gprs = 84; -+ sq_conf.num_vs_gprs = 36; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 144; -+ sq_conf.num_vs_threads = 40; -+ sq_conf.num_gs_threads = 4; -+ sq_conf.num_es_threads = 4; -+ sq_conf.num_ps_stack_entries = 40; -+ sq_conf.num_vs_stack_entries = 40; -+ sq_conf.num_gs_stack_entries = 32; -+ sq_conf.num_es_stack_entries = 16; -+ break; -+ case CHIP_FAMILY_RV610: -+ case CHIP_FAMILY_RV620: -+ case CHIP_FAMILY_RS780: -+ default: -+ sq_conf.num_ps_gprs = 84; -+ sq_conf.num_vs_gprs = 36; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 136; -+ sq_conf.num_vs_threads = 48; -+ sq_conf.num_gs_threads = 4; -+ sq_conf.num_es_threads = 4; -+ sq_conf.num_ps_stack_entries = 40; -+ sq_conf.num_vs_stack_entries = 40; -+ sq_conf.num_gs_stack_entries = 32; -+ sq_conf.num_es_stack_entries = 16; -+ break; -+ case CHIP_FAMILY_RV670: -+ sq_conf.num_ps_gprs = 144; -+ sq_conf.num_vs_gprs = 40; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 136; -+ sq_conf.num_vs_threads = 48; -+ sq_conf.num_gs_threads = 4; -+ sq_conf.num_es_threads = 4; -+ sq_conf.num_ps_stack_entries = 40; -+ sq_conf.num_vs_stack_entries = 40; -+ sq_conf.num_gs_stack_entries = 32; -+ sq_conf.num_es_stack_entries = 16; -+ break; -+ case CHIP_FAMILY_RV770: -+ sq_conf.num_ps_gprs = 192; -+ sq_conf.num_vs_gprs = 56; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 188; -+ sq_conf.num_vs_threads = 60; -+ sq_conf.num_gs_threads = 0; -+ sq_conf.num_es_threads = 0; -+ sq_conf.num_ps_stack_entries = 256; -+ sq_conf.num_vs_stack_entries = 256; -+ sq_conf.num_gs_stack_entries = 0; -+ sq_conf.num_es_stack_entries = 0; -+ break; -+ case CHIP_FAMILY_RV730: -+ sq_conf.num_ps_gprs = 84; -+ sq_conf.num_vs_gprs = 36; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 188; -+ sq_conf.num_vs_threads = 60; -+ sq_conf.num_gs_threads = 0; -+ sq_conf.num_es_threads = 0; -+ sq_conf.num_ps_stack_entries = 128; -+ sq_conf.num_vs_stack_entries = 128; -+ sq_conf.num_gs_stack_entries = 0; -+ sq_conf.num_es_stack_entries = 0; -+ break; -+ case CHIP_FAMILY_RV710: -+ sq_conf.num_ps_gprs = 192; -+ sq_conf.num_vs_gprs = 56; -+ sq_conf.num_temp_gprs = 4; -+ sq_conf.num_gs_gprs = 0; -+ sq_conf.num_es_gprs = 0; -+ sq_conf.num_ps_threads = 144; -+ sq_conf.num_vs_threads = 48; -+ sq_conf.num_gs_threads = 0; -+ sq_conf.num_es_threads = 0; -+ sq_conf.num_ps_stack_entries = 128; -+ sq_conf.num_vs_stack_entries = 128; -+ sq_conf.num_gs_stack_entries = 0; -+ sq_conf.num_es_stack_entries = 0; -+ break; -+ } -+ -+ sq_setup(pScrn, ib, &sq_conf); -+ -+ EREG(ib, SQ_VTX_BASE_VTX_LOC, 0); -+ EREG(ib, SQ_VTX_START_INST_LOC, 0); -+ -+ PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9); -+ E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE -+ E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE -+ E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE -+ E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE -+ E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE -+ E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE -+ E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE -+ E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE -+ E32(ib, 0); // SQ_GS_VERT_ITEMSIZE -+ -+ // DB -+ EREG(ib, DB_DEPTH_INFO, 0); -+ EREG(ib, DB_STENCIL_CLEAR, 0); -+ EREG(ib, DB_DEPTH_CLEAR, 0); -+ EREG(ib, DB_STENCILREFMASK, 0); -+ EREG(ib, DB_STENCILREFMASK_BF, 0); -+ EREG(ib, DB_DEPTH_CONTROL, 0); -+ EREG(ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); -+ if (info->ChipFamily < CHIP_FAMILY_RV770) -+ EREG(ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit); -+ else -+ EREG(ib, DB_RENDER_OVERRIDE, 0); -+ EREG(ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | -+ (2 << ALPHA_TO_MASK_OFFSET1_shift) | -+ (2 << ALPHA_TO_MASK_OFFSET2_shift) | -+ (2 << ALPHA_TO_MASK_OFFSET3_shift))); -+ -+ // SX -+ EREG(ib, SX_ALPHA_TEST_CONTROL, 0); -+ EREG(ib, SX_ALPHA_REF, 0); -+ -+ // CB -+ reset_cb(pScrn, ib); -+ -+ PACK0(ib, CB_BLEND_RED, 4); -+ E32(ib, 0x00000000); -+ E32(ib, 0x00000000); -+ E32(ib, 0x00000000); -+ E32(ib, 0x00000000); -+ -+ /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */ -+ // RV6xx+ have per-MRT blend -+ if (info->ChipFamily > CHIP_FAMILY_R600) { -+ PACK0(ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num); -+ for (i = 0; i < CB_BLEND0_CONTROL_num; i++) -+ E32(ib, 0); -+ } -+ -+ EREG(ib, CB_BLEND_CONTROL, 0); -+ -+ if (info->ChipFamily < CHIP_FAMILY_RV770) { -+ PACK0(ib, CB_FOG_RED, 3); -+ E32(ib, 0x00000000); -+ E32(ib, 0x00000000); -+ E32(ib, 0x00000000); -+ } -+ -+ EREG(ib, CB_COLOR_CONTROL, 0); -+ PACK0(ib, CB_CLRCMP_CONTROL, 4); -+ E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC -+ E32(ib, 0); // CB_CLRCMP_SRC -+ E32(ib, 0); // CB_CLRCMP_DST -+ E32(ib, 0); // CB_CLRCMP_MSK -+ -+ -+ if (info->ChipFamily < CHIP_FAMILY_RV770) { -+ PACK0(ib, CB_CLEAR_RED, 4); -+ EFLOAT(ib, 1.0); /* WTF? */ -+ EFLOAT(ib, 0.0); -+ EFLOAT(ib, 1.0); -+ EFLOAT(ib, 1.0); -+ } -+ EREG(ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); -+ -+ // SC -+ set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192); -+ set_screen_scissor(pScrn, ib, 0, 0, 8192, 8192); -+ EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | -+ (0 << WINDOW_Y_OFFSET_shift))); -+ set_window_scissor(pScrn, ib, 0, 0, 8192, 8192); -+ -+ EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); -+ -+ /* clip boolean is set to always visible -> doesn't matter */ -+ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) -+ set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); -+ -+ if (info->ChipFamily < CHIP_FAMILY_RV770) -+ EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); -+ else -+ EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); -+ -+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { -+ set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); -+ PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); -+ EFLOAT(ib, 0.0); -+ EFLOAT(ib, 1.0); -+ } -+ -+ if (info->ChipFamily < CHIP_FAMILY_RV770) -+ EREG(ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); -+ else -+ EREG(ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | -+ 0x00500000)); /* ? */ -+ -+ EREG(ib, PA_SC_LINE_CNTL, 0); -+ EREG(ib, PA_SC_AA_CONFIG, 0); -+ EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); -+ -+ //XXX: double check this -+ if (info->ChipFamily > CHIP_FAMILY_R600) { -+ EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); -+ EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); -+ } -+ -+ EREG(ib, PA_SC_LINE_STIPPLE, 0); -+ EREG(ib, PA_SC_MPASS_PS_CNTL, 0); -+ -+ // CL -+ PACK0(ib, PA_CL_VPORT_XSCALE_0, 6); -+ EFLOAT(ib, 0.0f); // PA_CL_VPORT_XSCALE -+ EFLOAT(ib, 0.0f); // PA_CL_VPORT_XOFFSET -+ EFLOAT(ib, 0.0f); // PA_CL_VPORT_YSCALE -+ EFLOAT(ib, 0.0f); // PA_CL_VPORT_YOFFSET -+ EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZSCALE -+ EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZOFFSET -+ EREG(ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit)); -+ EREG(ib, PA_CL_VTE_CNTL, 0); -+ EREG(ib, PA_CL_VS_OUT_CNTL, 0); -+ EREG(ib, PA_CL_NANINF_CNTL, 0); -+ PACK0(ib, PA_CL_GB_VERT_CLIP_ADJ, 4); -+ EFLOAT(ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ -+ EFLOAT(ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ -+ EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ -+ EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ -+ -+ /* user clipping planes are disabled by default */ -+ PACK0(ib, PA_CL_UCP_0_X, 24); -+ for (i = 0; i < 24; i++) -+ EFLOAT(ib, 0.0); -+ -+ // SU -+ EREG(ib, PA_SU_SC_MODE_CNTL, FACE_bit); -+ EREG(ib, PA_SU_POINT_SIZE, 0); -+ EREG(ib, PA_SU_POINT_MINMAX, 0); -+ EREG(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0); -+ EREG(ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0); -+ EREG(ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0); -+ EREG(ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0); -+ EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0); -+ -+ EREG(ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ -+ EREG(ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | -+ (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ -+ EREG(ib, PA_SU_POLY_OFFSET_CLAMP, 0); -+ -+ // SPI -+ if (info->ChipFamily < CHIP_FAMILY_RV770) -+ EREG(ib, R7xx_SPI_THREAD_GROUPING, 0); -+ else -+ EREG(ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); -+ -+ EREG(ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) | -+ (3 << PNT_SPRITE_OVRD_Y_shift) | -+ (0 << PNT_SPRITE_OVRD_Z_shift) | -+ (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */ -+ EREG(ib, SPI_INPUT_Z, 0); -+ EREG(ib, SPI_FOG_CNTL, 0); -+ EREG(ib, SPI_FOG_FUNC_SCALE, 0); -+ EREG(ib, SPI_FOG_FUNC_BIAS, 0); -+ -+ PACK0(ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num); -+ for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */ -+ E32(ib, 0x03020100 + i*0x04040404); -+ EREG(ib, SPI_VS_OUT_CONFIG, 0); -+ -+ // clear FS -+ fs_setup(pScrn, ib, &fs_conf); -+ -+ // VGT -+ EREG(ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */ -+ EREG(ib, VGT_MIN_VTX_INDX, 0); -+ EREG(ib, VGT_INDX_OFFSET, 0); -+ EREG(ib, VGT_INSTANCE_STEP_RATE_0, 0); -+ EREG(ib, VGT_INSTANCE_STEP_RATE_1, 0); -+ -+ EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); -+ EREG(ib, VGT_OUTPUT_PATH_CNTL, 0); -+ EREG(ib, VGT_GS_MODE, 0); -+ EREG(ib, VGT_HOS_CNTL, 0); -+ EREG(ib, VGT_HOS_MAX_TESS_LEVEL, 0); -+ EREG(ib, VGT_HOS_MIN_TESS_LEVEL, 0); -+ EREG(ib, VGT_HOS_REUSE_DEPTH, 0); -+ EREG(ib, VGT_GROUP_PRIM_TYPE, 0); -+ EREG(ib, VGT_GROUP_FIRST_DECR, 0); -+ EREG(ib, VGT_GROUP_DECR, 0); -+ EREG(ib, VGT_GROUP_VECT_0_CNTL, 0); -+ EREG(ib, VGT_GROUP_VECT_1_CNTL, 0); -+ EREG(ib, VGT_GROUP_VECT_0_FMT_CNTL, 0); -+ EREG(ib, VGT_GROUP_VECT_1_FMT_CNTL, 0); -+ EREG(ib, VGT_PRIMITIVEID_EN, 0); -+ EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); -+ EREG(ib, VGT_STRMOUT_EN, 0); -+ EREG(ib, VGT_REUSE_OFF, 0); -+ EREG(ib, VGT_VTX_CNT_EN, 0); -+ EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); -+ -+ // clear tex resources - PS -+ for (i = 0; i < 16; i++) { -+ tex_res.id = i; -+ set_tex_resource(pScrn, ib, &tex_res); -+ } -+ -+ // clear tex resources - VS -+ for (i = 160; i < 164; i++) { -+ tex_res.id = i; -+ set_tex_resource(pScrn, ib, &tex_res); -+ } -+ -+ // clear tex resources - FS -+ for (i = 320; i < 335; i++) { -+ tex_res.id = i; -+ set_tex_resource(pScrn, ib, &tex_res); -+ } -+ -+} -+ -+ -+/* -+ * Commands -+ */ -+ -+void -+draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) -+{ -+ uint32_t i, count; -+ -+ EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); -+ PACK3(ib, IT_INDEX_TYPE, 1); -+ E32(ib, draw_conf->index_type); -+ PACK3(ib, IT_NUM_INSTANCES, 1); -+ E32(ib, draw_conf->num_instances); -+ -+ // calculate num of packets -+ count = 2; -+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) -+ count += (draw_conf->num_indices + 1) / 2; -+ else -+ count += draw_conf->num_indices; -+ -+ PACK3(ib, IT_DRAW_INDEX_IMMD, count); -+ E32(ib, draw_conf->num_indices); -+ E32(ib, draw_conf->vgt_draw_initiator); -+ -+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { -+ for (i = 0; i < draw_conf->num_indices; i += 2) { -+ if ((i + 1) == draw_conf->num_indices) -+ E32(ib, indices[i]); -+ else -+ E32(ib, (indices[i] | (indices[i + 1] << 16))); -+ } -+ } else { -+ for (i = 0; i < draw_conf->num_indices; i++) -+ E32(ib, indices[i]); -+ } -+} -+ -+void -+draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) -+{ -+ -+ EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); -+ PACK3(ib, IT_INDEX_TYPE, 1); -+ E32(ib, draw_conf->index_type); -+ PACK3(ib, IT_NUM_INSTANCES, 1); -+ E32(ib, draw_conf->num_instances); -+ PACK3(ib, IT_DRAW_INDEX_AUTO, 2); -+ E32(ib, draw_conf->num_indices); -+ E32(ib, draw_conf->vgt_draw_initiator); -+} -diff --git a/src/radeon.h b/src/radeon.h -index 2944fe8..7bb720a 100644 ---- a/src/radeon.h -+++ b/src/radeon.h -@@ -206,7 +206,8 @@ typedef enum { - OPTION_DEFAULT_TVDAC_ADJ, - OPTION_INT10, - OPTION_EXA_VSYNC, -- OPTION_ATOM_TVOUT -+ OPTION_ATOM_TVOUT, -+ OPTION_R4XX_ATOM - } RADEONOpts; - - -@@ -354,6 +355,8 @@ typedef enum { - - #define IS_DCE32_VARIANT ((info->ChipFamily >= CHIP_FAMILY_RV730)) - -+#define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600) -+ - #define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515) || \ - (info->ChipFamily == CHIP_FAMILY_R520) || \ - (info->ChipFamily == CHIP_FAMILY_RV530) || \ -@@ -609,6 +612,49 @@ struct radeon_accel_state { - Bool src_tile_height; - - Bool vsync; -+ -+ drmBufPtr ib; -+ int vb_index; -+ -+ // shader storage -+ ExaOffscreenArea *shaders; -+ uint32_t solid_vs_offset; -+ uint32_t solid_ps_offset; -+ uint32_t copy_vs_offset; -+ uint32_t copy_ps_offset; -+ uint32_t comp_vs_offset; -+ uint32_t comp_ps_offset; -+ uint32_t comp_mask_ps_offset; -+ uint32_t xv_vs_offset; -+ uint32_t xv_ps_offset; -+ -+ //size/addr stuff -+ uint32_t src_size[2]; -+ uint64_t src_mc_addr[2]; -+ uint32_t src_pitch[2]; -+ uint32_t src_width[2]; -+ uint32_t src_height[2]; -+ uint32_t src_bpp[2]; -+ uint32_t dst_size; -+ uint64_t dst_mc_addr; -+ uint32_t dst_pitch; -+ uint32_t dst_height; -+ uint32_t dst_bpp; -+ uint32_t vs_size; -+ uint64_t vs_mc_addr; -+ uint32_t ps_size; -+ uint64_t ps_mc_addr; -+ uint32_t vb_size; -+ uint64_t vb_mc_addr; -+ -+ // UTS/DFS -+ drmBufPtr scratch; -+ -+ // copy -+ ExaOffscreenArea *copy_area; -+ Bool same_surface; -+ int rop; -+ uint32_t planemask; - #endif - - #ifdef USE_XAA -@@ -839,6 +885,8 @@ typedef struct { - int virtualX; - int virtualY; - -+ Bool r4xx_atom; -+ - } RADEONInfoRec, *RADEONInfoPtr; - - #define RADEONWaitForFifo(pScrn, entries) \ -@@ -947,11 +995,11 @@ extern Bool radeon_card_posted(ScrnInfoPtr pScrn); - #ifdef XF86DRI - extern void RADEONWaitForIdleCP(ScrnInfoPtr pScrn); - extern void RADEONWaitForVLineCP(ScrnInfoPtr pScrn, PixmapPtr pPix, -- int crtc, int start, int stop, int enable); -+ int crtc, int start, int stop); - #endif - extern void RADEONWaitForIdleMMIO(ScrnInfoPtr pScrn); - extern void RADEONWaitForVLineMMIO(ScrnInfoPtr pScrn, PixmapPtr pPix, -- int crtc, int start, int stop, int enable); -+ int crtc, int start, int stop); - - /* radeon_crtc.c */ - extern void radeon_crtc_dpms(xf86CrtcPtr crtc, int mode); -@@ -1035,6 +1083,8 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn, - uint32_t dst_pitch_offset, - uint32_t datatype, int rop, - Pixel planemask); -+extern Bool R600DrawInit(ScreenPtr pScreen); -+extern Bool R600LoadShaders(ScrnInfoPtr pScrn); - #endif - - #if defined(XF86DRI) && defined(USE_EXA) -@@ -1119,15 +1169,16 @@ do { \ - #define RADEONCP_STOP(pScrn, info) \ - do { \ - int _ret; \ -- if (info->cp->CPStarted) { \ -+ if (info->cp->CPStarted) { \ - _ret = RADEONCPStop(pScrn, info); \ - if (_ret) { \ - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \ - "%s: CP stop %d\n", __FUNCTION__, _ret); \ - } \ - info->cp->CPStarted = FALSE; \ -- } \ -- RADEONEngineRestore(pScrn); \ -+ } \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) \ -+ RADEONEngineRestore(pScrn); \ - info->cp->CPRuns = FALSE; \ - } while (0) - -@@ -1235,28 +1286,31 @@ do { \ - if (RADEON_VERBOSE) \ - xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ - "FLUSH_RING in %s\n", __FUNCTION__); \ -- if (info->cp->indirectBuffer) { \ -+ if (info->cp->indirectBuffer) \ - RADEONCPFlushIndirect(pScrn, 0); \ -- } \ - } while (0) - - - #define RADEON_WAIT_UNTIL_2D_IDLE() \ - do { \ -- BEGIN_RING(2); \ -- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ -- OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ -- RADEON_WAIT_HOST_IDLECLEAN)); \ -- ADVANCE_RING(); \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { \ -+ BEGIN_RING(2); \ -+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ -+ OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ -+ RADEON_WAIT_HOST_IDLECLEAN)); \ -+ ADVANCE_RING(); \ -+ } \ - } while (0) - - #define RADEON_WAIT_UNTIL_3D_IDLE() \ - do { \ -- BEGIN_RING(2); \ -- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ -- OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \ -- RADEON_WAIT_HOST_IDLECLEAN)); \ -- ADVANCE_RING(); \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { \ -+ BEGIN_RING(2); \ -+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ -+ OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \ -+ RADEON_WAIT_HOST_IDLECLEAN)); \ -+ ADVANCE_RING(); \ -+ } \ - } while (0) - - #define RADEON_WAIT_UNTIL_IDLE() \ -@@ -1265,38 +1319,44 @@ do { \ - xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ - "WAIT_UNTIL_IDLE() in %s\n", __FUNCTION__); \ - } \ -- BEGIN_RING(2); \ -- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ -- OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ -- RADEON_WAIT_3D_IDLECLEAN | \ -- RADEON_WAIT_HOST_IDLECLEAN)); \ -- ADVANCE_RING(); \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { \ -+ BEGIN_RING(2); \ -+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ -+ OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ -+ RADEON_WAIT_3D_IDLECLEAN | \ -+ RADEON_WAIT_HOST_IDLECLEAN)); \ -+ ADVANCE_RING(); \ -+ } \ - } while (0) - - #define RADEON_PURGE_CACHE() \ - do { \ -- BEGIN_RING(2); \ -- if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ -- OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ -- OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ -- } else { \ -- OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ -- OUT_RING(R300_RB3D_DC_FLUSH_ALL); \ -- } \ -- ADVANCE_RING(); \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { \ -+ BEGIN_RING(2); \ -+ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ -+ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ -+ OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ -+ } else { \ -+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ -+ OUT_RING(R300_RB3D_DC_FLUSH_ALL); \ -+ } \ -+ ADVANCE_RING(); \ -+ } \ - } while (0) - - #define RADEON_PURGE_ZCACHE() \ - do { \ -- BEGIN_RING(2); \ -- if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ -- OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ -- OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ -- } else { \ -- OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ -- OUT_RING(R300_ZC_FLUSH_ALL); \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { \ -+ BEGIN_RING(2); \ -+ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ -+ OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ -+ OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ -+ } else { \ -+ OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ -+ OUT_RING(R300_ZC_FLUSH_ALL); \ -+ } \ -+ ADVANCE_RING(); \ - } \ -- ADVANCE_RING(); \ - } while (0) - - #endif /* XF86DRI */ -diff --git a/src/radeon_accel.c b/src/radeon_accel.c -index 96570e8..dffbc57 100644 ---- a/src/radeon_accel.c -+++ b/src/radeon_accel.c -@@ -78,6 +78,7 @@ - /* Driver data structures */ - #include "radeon.h" - #include "radeon_reg.h" -+#include "r600_reg.h" - #include "radeon_macros.h" - #include "radeon_probe.h" - #include "radeon_version.h" -@@ -92,6 +93,7 @@ - /* X and server generic header files */ - #include "xf86.h" - -+static void R600EngineReset(ScrnInfoPtr pScrn); - - #ifdef USE_XAA - static struct { -@@ -149,6 +151,37 @@ void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries) - } - } - -+void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ int i; -+ -+ for (;;) { -+ for (i = 0; i < RADEON_TIMEOUT; i++) { -+ if (info->ChipFamily >= CHIP_FAMILY_RV770) -+ info->accel_state->fifo_slots = -+ INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK; -+ else -+ info->accel_state->fifo_slots = -+ INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK; -+ if (info->accel_state->fifo_slots >= entries) return; -+ } -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "FIFO timed out: stat=0x%08x\n", -+ (unsigned int)INREG(R600_GRBM_STATUS)); -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "FIFO timed out, resetting engine...\n"); -+ R600EngineReset(pScrn); -+#ifdef XF86DRI -+ if (info->directRenderingEnabled) { -+ RADEONCP_RESET(pScrn, info); -+ RADEONCP_START(pScrn, info); -+ } -+#endif -+ } -+} -+ - /* Flush all dirty data in the Pixel Cache to memory */ - void RADEONEngineFlush(ScrnInfoPtr pScrn) - { -@@ -156,9 +189,6 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn) - unsigned char *RADEONMMIO = info->MMIO; - int i; - -- if (info->ChipFamily >= CHIP_FAMILY_R600) -- return; -- - if (info->ChipFamily <= CHIP_FAMILY_RV280) { - OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, - RADEON_RB3D_DC_FLUSH_ALL, -@@ -198,8 +228,6 @@ void RADEONEngineReset(ScrnInfoPtr pScrn) - uint32_t rbbm_soft_reset; - uint32_t host_path_cntl; - -- if (info->ChipFamily >= CHIP_FAMILY_R600) -- return; - /* The following RBBM_SOFT_RESET sequence can help un-wedge - * an R300 after the command processor got stuck. - */ -@@ -310,6 +338,35 @@ void RADEONEngineReset(ScrnInfoPtr pScrn) - OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl); - } - -+/* Reset graphics card to known state */ -+static void R600EngineReset(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl; -+ -+ cp_ptr = INREG(R600_CP_RB_WPTR); -+ -+ cp_me_cntl = INREG(R600_CP_ME_CNTL); -+ OUTREG(R600_CP_ME_CNTL, 0x10000000); -+ -+ OUTREG(R600_GRBM_SOFT_RESET, 0x7fff); -+ INREG(R600_GRBM_SOFT_RESET); -+ usleep (50); -+ OUTREG(R600_GRBM_SOFT_RESET, 0); -+ INREG(R600_GRBM_SOFT_RESET); -+ -+ OUTREG(R600_CP_RB_WPTR_DELAY, 0); -+ cp_rb_cntl = INREG(R600_CP_RB_CNTL); -+ OUTREG(R600_CP_RB_CNTL, 0x80000000); -+ -+ OUTREG(R600_CP_RB_RPTR_WR, cp_ptr); -+ OUTREG(R600_CP_RB_WPTR, cp_ptr); -+ OUTREG(R600_CP_RB_CNTL, cp_rb_cntl); -+ OUTREG(R600_CP_ME_CNTL, cp_me_cntl); -+ -+} -+ - /* Restore the acceleration hardware to its previous state */ - void RADEONEngineRestore(ScrnInfoPtr pScrn) - { -@@ -611,8 +668,12 @@ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) - - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, - "GetBuffer timed out, resetting engine...\n"); -- RADEONEngineReset(pScrn); -- RADEONEngineRestore(pScrn); -+ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ RADEONEngineReset(pScrn); -+ RADEONEngineRestore(pScrn); -+ } else -+ R600EngineReset(pScrn); - - /* Always restart the engine when doing CP 2D acceleration */ - RADEONCP_RESET(pScrn, info); -@@ -627,6 +688,8 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) - drmBufPtr buffer = info->cp->indirectBuffer; - int start = info->cp->indirectStart; - drm_radeon_indirect_t indirect; -+ RING_LOCALS; -+ RADEONCP_REFRESH(pScrn, info); - - if (!buffer) return; - if (start == buffer->used && !discard) return; -@@ -636,6 +699,14 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) - buffer->idx); - } - -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ while (buffer->used & 0x3c){ -+ BEGIN_RING(1); -+ OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ -+ ADVANCE_RING(); -+ } -+ } -+ - indirect.idx = buffer->idx; - indirect.start = start; - indirect.end = buffer->used; -@@ -664,6 +735,19 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) - drmBufPtr buffer = info->cp->indirectBuffer; - int start = info->cp->indirectStart; - drm_radeon_indirect_t indirect; -+ RING_LOCALS; -+ RADEONCP_REFRESH(pScrn, info); -+ -+ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ if (buffer) { -+ while (buffer->used & 0x3c) { -+ BEGIN_RING(1); -+ OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ -+ ADVANCE_RING(); -+ } -+ } -+ } - - info->cp->indirectBuffer = NULL; - info->cp->indirectStart = 0; -@@ -926,20 +1010,26 @@ Bool RADEONAccelInit(ScreenPtr pScreen) - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - RADEONInfoPtr info = RADEONPTR(pScrn); - -- if (info->ChipFamily >= CHIP_FAMILY_R600) -- return FALSE; -- - #ifdef USE_EXA - if (info->useEXA) { - # ifdef XF86DRI - if (info->directRenderingEnabled) { -- if (!RADEONDrawInitCP(pScreen)) -- return FALSE; -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ if (!R600DrawInit(pScreen)) -+ return FALSE; -+ } else { -+ if (!RADEONDrawInitCP(pScreen)) -+ return FALSE; -+ } - } else - # endif /* XF86DRI */ - { -- if (!RADEONDrawInitMMIO(pScreen)) -+ if (info->ChipFamily >= CHIP_FAMILY_R600) - return FALSE; -+ else { -+ if (!RADEONDrawInitMMIO(pScreen)) -+ return FALSE; -+ } - } - } - #endif /* USE_EXA */ -@@ -947,6 +1037,9 @@ Bool RADEONAccelInit(ScreenPtr pScreen) - if (!info->useEXA) { - XAAInfoRecPtr a; - -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ return FALSE; -+ - if (!(a = info->accel_state->accel = XAACreateInfoRec())) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); - return FALSE; -diff --git a/src/radeon_atombios.c b/src/radeon_atombios.c -index 34bf1dc..406e4f9 100644 ---- a/src/radeon_atombios.c -+++ b/src/radeon_atombios.c -@@ -1876,6 +1876,7 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn) - for (j = 0; j < ATOM_MAX_SUPPORTED_DEVICE; j++) { - if (info->BiosConnector[j].valid && (i != j) ) { - if (info->BiosConnector[i].i2c_line_mux == info->BiosConnector[j].i2c_line_mux) { -+ ErrorF("Shared DDC line: %d %d\n", i, j); - info->BiosConnector[i].shared_ddc = TRUE; - info->BiosConnector[j].shared_ddc = TRUE; - } -diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c -index 0a9f9db..eabd87d 100644 ---- a/src/radeon_commonfuncs.c -+++ b/src/radeon_commonfuncs.c -@@ -628,16 +628,13 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - - /* inserts a wait for vline in the command stream */ - void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, -- int crtc, int start, int stop, Bool enable) -+ int crtc, int start, int stop) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); - uint32_t offset; - ACCEL_PREAMBLE(); - -- if (!enable) -- return; -- - if ((crtc < 0) || (crtc > 1)) - return; - -@@ -659,7 +656,7 @@ void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, - return; - - start = max(start, 0); -- stop = max(stop, xf86_config->crtc[crtc]->mode.VDisplay); -+ stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay); - - if (start > xf86_config->crtc[crtc]->mode.VDisplay) - return; -@@ -733,8 +730,11 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) - - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, - "Idle timed out, resetting engine...\n"); -- RADEONEngineReset(pScrn); -- RADEONEngineRestore(pScrn); -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ RADEONEngineReset(pScrn); -+ RADEONEngineRestore(pScrn); -+ } else -+ R600EngineReset(pScrn); - - /* Always restart the engine when doing CP 2D acceleration */ - RADEONCP_RESET(pScrn, info); -@@ -743,39 +743,56 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) - } - #endif - --#if 0 -- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -- "WaitForIdle (entering): %d entries, stat=0x%08x\n", -- INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, -- INREG(RADEON_RBBM_STATUS)); --#endif -- -- if (info->ChipFamily >= CHIP_FAMILY_R600) -- return; -- -- /* Wait for the engine to go idle */ -- RADEONWaitForFifoFunction(pScrn, 64); -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ /* Wait for the engine to go idle */ -+ if (info->ChipFamily >= CHIP_FAMILY_RV770) -+ R600WaitForFifoFunction(pScrn, 8); -+ else -+ R600WaitForFifoFunction(pScrn, 16); - -- for (;;) { -- for (i = 0; i < RADEON_TIMEOUT; i++) { -- if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) { -- RADEONEngineFlush(pScrn); -- return; -+ for (;;) { -+ for (i = 0; i < RADEON_TIMEOUT; i++) { -+ if (!(INREG(R600_GRBM_STATUS) & R600_GUI_ACTIVE)) -+ return; - } -- } -- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -- "Idle timed out: %u entries, stat=0x%08x\n", -- (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, -- (unsigned int)INREG(RADEON_RBBM_STATUS)); -- xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -- "Idle timed out, resetting engine...\n"); -- RADEONEngineReset(pScrn); -- RADEONEngineRestore(pScrn); -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Idle timed out: stat=0x%08x\n", -+ (unsigned int)INREG(R600_GRBM_STATUS)); -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Idle timed out, resetting engine...\n"); -+ R600EngineReset(pScrn); - #ifdef XF86DRI -- if (info->directRenderingEnabled) { -- RADEONCP_RESET(pScrn, info); -- RADEONCP_START(pScrn, info); -+ if (info->directRenderingEnabled) { -+ RADEONCP_RESET(pScrn, info); -+ RADEONCP_START(pScrn, info); -+ } -+#endif - } -+ } else { -+ /* Wait for the engine to go idle */ -+ RADEONWaitForFifoFunction(pScrn, 64); -+ -+ for (;;) { -+ for (i = 0; i < RADEON_TIMEOUT; i++) { -+ if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) { -+ RADEONEngineFlush(pScrn); -+ return; -+ } -+ } -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Idle timed out: %u entries, stat=0x%08x\n", -+ (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, -+ (unsigned int)INREG(RADEON_RBBM_STATUS)); -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Idle timed out, resetting engine...\n"); -+ RADEONEngineReset(pScrn); -+ RADEONEngineRestore(pScrn); -+#ifdef XF86DRI -+ if (info->directRenderingEnabled) { -+ RADEONCP_RESET(pScrn, info); -+ RADEONCP_START(pScrn, info); -+ } - #endif -+ } - } - } -diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c -index 5a7c730..e360d4e 100644 ---- a/src/radeon_crtc.c -+++ b/src/radeon_crtc.c -@@ -77,7 +77,7 @@ radeon_crtc_dpms(xf86CrtcPtr crtc, int mode) - if ((mode == DPMSModeOn) && radeon_crtc->enabled) - return; - -- if (IS_AVIVO_VARIANT) { -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) { - atombios_crtc_dpms(crtc, mode); - } else { - -@@ -271,7 +271,7 @@ radeon_crtc_mode_set(xf86CrtcPtr crtc, DisplayModePtr mode, - ScrnInfoPtr pScrn = crtc->scrn; - RADEONInfoPtr info = RADEONPTR(pScrn); - -- if (IS_AVIVO_VARIANT) { -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) { - atombios_crtc_mode_set(crtc, mode, adjusted_mode, x, y); - } else { - legacy_crtc_mode_set(crtc, mode, adjusted_mode, x, y); -@@ -410,6 +410,14 @@ radeon_crtc_shadow_allocate (xf86CrtcPtr crtc, int width, int height) - int align = 4096, size; - int cpp = pScrn->bitsPerPixel / 8; - -+ /* No rotation without accel */ -+ if (((info->ChipFamily >= CHIP_FAMILY_R600) && !info->directRenderingEnabled) || -+ xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Acceleration required for rotation\n"); -+ return NULL; -+ } -+ - rotate_pitch = pScrn->displayWidth * cpp; - size = rotate_pitch * height; - -@@ -424,7 +432,7 @@ radeon_crtc_shadow_allocate (xf86CrtcPtr crtc, int width, int height) - - return info->FB + rotate_offset; - } -- -+ - /** - * Creates a pixmap for this CRTC's rotated shadow framebuffer. - */ -@@ -499,6 +507,8 @@ radeon_crtc_set_origin(xf86CrtcPtr crtc, int x, int y) - unsigned char *RADEONMMIO = info->MMIO; - - if (IS_AVIVO_VARIANT) { -+ x &= ~3; -+ y &= ~1; - atombios_lock_crtc(info->atomBIOS, radeon_crtc->crtc_id, 1); - OUTREG(AVIVO_D1MODE_VIEWPORT_START + radeon_crtc->crtc_offset, (x << 16) | y); - atombios_lock_crtc(info->atomBIOS, radeon_crtc->crtc_id, 0); -@@ -587,8 +597,7 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) - RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); - RADEONInfoPtr info = RADEONPTR(pScrn); - -- if ((info->ChipFamily < CHIP_FAMILY_R600) && -- (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE))) { -+ if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { - radeon_crtc_funcs.shadow_create = radeon_crtc_shadow_create; - radeon_crtc_funcs.shadow_allocate = radeon_crtc_shadow_allocate; - radeon_crtc_funcs.shadow_destroy = radeon_crtc_shadow_destroy; -diff --git a/src/radeon_dri.c b/src/radeon_dri.c -index 59d9a83..9c9fc7f 100644 ---- a/src/radeon_dri.c -+++ b/src/radeon_dri.c -@@ -45,11 +45,14 @@ - #include "radeon.h" - #include "radeon_video.h" - #include "radeon_reg.h" -+#include "r600_reg.h" - #include "radeon_macros.h" - #include "radeon_drm.h" - #include "radeon_dri.h" - #include "radeon_version.h" - -+#include "atipciids.h" -+ - /* X and server generic header files */ - #include "xf86.h" - #include "xf86PciInfo.h" -@@ -722,7 +725,7 @@ static void RADEONDRIInitGARTValues(RADEONInfoPtr info) - /* AGP Mode Quirk List - Certain hostbridge/gfx-card combos don't work with - * the standard AGPMode settings, so we detect and handle these - * on a case-by-base basis with quirks. To see if an AGPMode is valid, test -- * it by setting Option "AGPMode" "1" (or "2", or "4" or "8"). */ -+ * it by setting Option "AGPMode" "1" (or "2", or "4", or "8"). */ - typedef struct { - unsigned int hostbridgeVendor; - unsigned int hostbridgeDevice; -@@ -737,48 +740,57 @@ typedef struct { - static radeon_agpmode_quirk radeon_agpmode_quirk_list[] = { - - /* Intel E7505 Memory Controller Hub / RV350 AR [Radeon 9600XT] Needs AGPMode 4 (deb #515326) */ -- { PCI_VENDOR_INTEL,0x2550, PCI_VENDOR_ATI,0x4152, 0x1458,0x4038, 4 }, -+ { PCI_VENDOR_INTEL,0x2550, PCI_VENDOR_ATI,0x4152, 0x1458,0x4038, 4 }, - /* Intel 82865G/PE/P DRAM Controller/Host-Hub / Mobility 9800 Needs AGPMode 4 (deb #462590) */ - { PCI_VENDOR_INTEL,0x2570, PCI_VENDOR_ATI,0x4a4e, PCI_VENDOR_DELL,0x5106, 4 }, -+ /* Intel 82865G/PE/P DRAM Controller/Host-Hub / RV280 [Radeon 9200 SE] Needs AGPMode 4 (lp #300304) */ -+ { PCI_VENDOR_INTEL,0x2570, PCI_VENDOR_ATI,0x5964, 0x148c,0x2073, 4 }, - /* Intel 82855PM Processor to I/O Controller / Mobility M6 LY Needs AGPMode 1 (deb #467235) */ -- { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x4c59, 0x1014,0x052f, 1 }, -+ { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x4c59, PCI_VENDOR_IBM,0x052f, 1 }, -+ /* Intel 82855PM host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #195051) */ -+ { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x4e50, PCI_VENDOR_IBM,0x0550, 1 }, -+ /* Intel 82855PM host bridge / Mobility M9+ / VaioPCG-V505DX Needs AGPMode 2 (fdo #17928) */ -+ { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x5c61, PCI_VENDOR_SONY,0x816b, 2 }, - /* Intel 82855PM Processor to I/O Controller / Mobility M9+ Needs AGPMode 8 (phoronix forum) */ -- { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x5c61, 0x104d,0x8195, 8 }, -+ { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x5c61, PCI_VENDOR_SONY,0x8195, 8 }, - /* Intel 82830 830 Chipset Host Bridge / Mobility M6 LY Needs AGPMode 2 (fdo #17360)*/ - { PCI_VENDOR_INTEL,0x3575, PCI_VENDOR_ATI,0x4c59, PCI_VENDOR_DELL,0x00e3, 2 }, -+ /* Intel 82852/82855 host bridge / Mobility FireGL 9000 R250 Needs AGPMode 1 (lp #296617) */ -+ { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4c66, PCI_VENDOR_DELL,0x0149, 1 }, - /* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (deb #467460) */ -- { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x1025,0x0061, 1 }, -+ { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x1025,0x0061, 1 }, - /* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #203007) */ -- { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x1025,0x0064, 1 }, -+ { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x1025,0x0064, 1 }, - /* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #141551) */ -- { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x1043,0x1942, 1 }, -- /* Intel 82852/82855 host bridge / Mobility FireGL 9000 R250 Needs AGPMode 1 (lp #296617) */ -- { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4c66, 0x1028,0x0149, 1 }, -+ { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, PCI_VENDOR_ASUS,0x1942, 1 }, - /* Intel 82852/82855 host bridge / Mobility 9600/9700 Needs AGPMode 1 (deb #510208) */ -- { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x10cf,0x127f, 1 }, -+ { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x10cf,0x127f, 1 }, - -- /* ASRock K7VT4A+ AGP 8x / ATI Radeon 9250 AGP Needs AGPMode 4 (LP: #133192) */ -- { 0x1849,0x3189, PCI_VENDOR_ATI,0x5960, 0x1787, 0x5960, 4 }, -+ /* ASRock K7VT4A+ AGP 8x / ATI Radeon 9250 AGP Needs AGPMode 4 (lp #133192) */ -+ { 0x1849,0x3189, PCI_VENDOR_ATI,0x5960, 0x1787,0x5960, 4 }, - -+ /* VIA K8M800 Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 4 (fdo #12544) */ -+ { PCI_VENDOR_VIA,0x0204, PCI_VENDOR_ATI,0x5960, 0x17af,0x2020, 4 }, -+ /* VIA KT880 Host Bridge / RV350 [Radeon 9550] Needs AGPMode 4 (fdo #19981) */ -+ { PCI_VENDOR_VIA,0x0269, PCI_VENDOR_ATI,0x4153, PCI_VENDOR_ASUS,0x003c, 4 }, - /* VIA VT8363 Host Bridge / R200 QL [Radeon 8500] Needs AGPMode 2 (lp #141551) */ -- { 0x1106,0x0305, PCI_VENDOR_ATI,0x514c, 0x1002,0x013a, 2 }, -+ { PCI_VENDOR_VIA,0x0305, PCI_VENDOR_ATI,0x514c, PCI_VENDOR_ATI,0x013a, 2 }, -+ /* VIA VT82C693A Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 2 (deb #515512) */ -+ { PCI_VENDOR_VIA,0x0691, PCI_VENDOR_ATI,0x5960, PCI_VENDOR_ASUS,0x004c, 2 }, -+ /* VIA VT82C693A Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 2 */ -+ { PCI_VENDOR_VIA,0x0691, PCI_VENDOR_ATI,0x5960, PCI_VENDOR_ASUS,0x0054, 2 }, - /* VIA VT8377 Host Bridge / R200 QM [Radeon 9100] Needs AGPMode 4 (deb #461144) */ -- { 0x1106,0x3189, PCI_VENDOR_ATI,0x514d, 0x174b,0x7149, 4 }, -+ { PCI_VENDOR_VIA,0x3189, PCI_VENDOR_ATI,0x514d, 0x174b,0x7149, 4 }, - /* VIA VT8377 Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 4 (lp #312693) */ -- { 0x1106,0x3189, PCI_VENDOR_ATI,0x5960, 0x1462,0x0380, 4 }, -+ { PCI_VENDOR_VIA,0x3189, PCI_VENDOR_ATI,0x5960, 0x1462,0x0380, 4 }, - /* VIA VT8377 Host Bridge / RV280 Needs AGPMode 4 (ati ML) */ -- { 0x1106,0x3189, PCI_VENDOR_ATI,0x5964, 0x148c,0x2073, 4 }, -- /* VIA VT82C693A Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 2 */ -- { 0x1106,0x0691, PCI_VENDOR_ATI,0x5960, 0x1043,0x0054, 2 }, -- /* VIA VT82C693A Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 2 (deb #515512) */ -- { 0x1106,0x0691, PCI_VENDOR_ATI,0x5960, 0x1043,0x004c, 2 }, -- /* VIA K8M800 Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 4 (fdo #12544) */ -- { 0x1106,0x0204, PCI_VENDOR_ATI,0x5960, 0x17af,0x2020, 4 }, -- /* VIA KT880 Host Bridge / RV350 [Radeon 9550] Needs AGPMode 4 (fdo #19981) */ -- { 0x1106,0x0269, PCI_VENDOR_ATI,0x4153, 0x1043,0x003c, 4 }, -+ { PCI_VENDOR_VIA,0x3189, PCI_VENDOR_ATI,0x5964, 0x148c,0x2073, 4 }, - - /* ATI Host Bridge / RV280 [M9+] Needs AGPMode 1 (phoronix forum) */ -- { 0x1002,0xcbb2, PCI_VENDOR_ATI,0x5c61, 0x104d,0x8175, 1 }, -+ { PCI_VENDOR_ATI,0xcbb2, PCI_VENDOR_ATI,0x5c61, PCI_VENDOR_SONY,0x8175, 1 }, -+ -+ /* HP Host Bridge / R300 [FireGL X1] Needs AGPMode 2 (fdo #7770) */ -+ { PCI_VENDOR_HP,0x122e, PCI_VENDOR_ATI,0x4e47, PCI_VENDOR_ATI,0x0152, 2 }, - - { 0, 0, 0, 0, 0, 0, 0 }, - }; -@@ -790,92 +802,96 @@ static Bool RADEONSetAgpMode(RADEONInfoPtr info, ScreenPtr pScreen) - unsigned long mode = drmAgpGetMode(info->dri->drmFD); /* Default mode */ - unsigned int vendor = drmAgpVendorId(info->dri->drmFD); - unsigned int device = drmAgpDeviceId(info->dri->drmFD); -- /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with -- pcie-agp rialto bridge chip - use the one from bridge which must match */ -- uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode; -- Bool is_v3 = (agp_status & RADEON_AGPv3_MODE); -- unsigned int defaultMode; -- MessageType from; - -- if (is_v3) { -- defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4; -- } else { -- if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4; -- else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2; -- else defaultMode = 1; -- } -- -- /* Apply AGPMode Quirks */ -- radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list; -- while (p && p->chipDevice != 0) { -- if (vendor == p->hostbridgeVendor && -- device == p->hostbridgeDevice && -- PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor && -- PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice && -- PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor && -- PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice) -- { -- defaultMode = p->defaultMode; -- } -- ++p; -- } -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with -+ pcie-agp rialto bridge chip - use the one from bridge which must match */ -+ uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode; -+ Bool is_v3 = (agp_status & RADEON_AGPv3_MODE); -+ unsigned int defaultMode; -+ MessageType from; - -- from = X_DEFAULT; -+ if (is_v3) { -+ defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4; -+ } else { -+ if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4; -+ else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2; -+ else defaultMode = 1; -+ } - -- if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) { -- if ((info->dri->agpMode < (is_v3 ? 4 : 1)) || -- (info->dri->agpMode > (is_v3 ? 8 : 4)) || -- (info->dri->agpMode & (info->dri->agpMode - 1))) { -- xf86DrvMsg(pScreen->myNum, X_ERROR, -- "Illegal AGP Mode: %d (valid values: %s), leaving at " -- "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4", -- defaultMode); -- info->dri->agpMode = defaultMode; -+ /* Apply AGPMode Quirks */ -+ radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list; -+ while (p && p->chipDevice != 0) { -+ if (vendor == p->hostbridgeVendor && -+ device == p->hostbridgeDevice && -+ PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor && -+ PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice && -+ PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor && -+ PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice) -+ { -+ defaultMode = p->defaultMode; -+ } -+ ++p; -+ } -+ -+ from = X_DEFAULT; -+ -+ if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) { -+ if ((info->dri->agpMode < (is_v3 ? 4 : 1)) || -+ (info->dri->agpMode > (is_v3 ? 8 : 4)) || -+ (info->dri->agpMode & (info->dri->agpMode - 1))) { -+ xf86DrvMsg(pScreen->myNum, X_ERROR, -+ "Illegal AGP Mode: %d (valid values: %s), leaving at " -+ "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4", -+ defaultMode); -+ info->dri->agpMode = defaultMode; -+ } else -+ from = X_CONFIG; - } else -- from = X_CONFIG; -- } else -- info->dri->agpMode = defaultMode; -+ info->dri->agpMode = defaultMode; - -- xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode); -+ xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode); - -- mode &= ~RADEON_AGP_MODE_MASK; -- if (is_v3) { -- /* only set one mode bit for AGPv3 */ -- switch (info->dri->agpMode) { -- case 8: mode |= RADEON_AGPv3_8X_MODE; break; -- case 4: default: mode |= RADEON_AGPv3_4X_MODE; -- } -- /*TODO: need to take care of other bits valid for v3 mode -- * currently these bits are not used in all tested cards. -- */ -- } else { -- switch (info->dri->agpMode) { -- case 4: mode |= RADEON_AGP_4X_MODE; -- case 2: mode |= RADEON_AGP_2X_MODE; -- case 1: default: mode |= RADEON_AGP_1X_MODE; -+ mode &= ~RADEON_AGP_MODE_MASK; -+ if (is_v3) { -+ /* only set one mode bit for AGPv3 */ -+ switch (info->dri->agpMode) { -+ case 8: mode |= RADEON_AGPv3_8X_MODE; break; -+ case 4: default: mode |= RADEON_AGPv3_4X_MODE; -+ } -+ /*TODO: need to take care of other bits valid for v3 mode -+ * currently these bits are not used in all tested cards. -+ */ -+ } else { -+ switch (info->dri->agpMode) { -+ case 4: mode |= RADEON_AGP_4X_MODE; -+ case 2: mode |= RADEON_AGP_2X_MODE; -+ case 1: default: mode |= RADEON_AGP_1X_MODE; -+ } - } -- } - -- /* AGP Fast Writes. -- * TODO: take into account that certain agp modes don't support fast -- * writes at all */ -- mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */ -- if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) { -- xf86DrvMsg(pScreen->myNum, X_WARNING, -- "WARNING: Using the AGPFastWrite option is not recommended.\n"); -- xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed" -- " boost, while it\n\twill probably hard lock your machine." -- " All bets are off!\n"); -- -- /* Black list some host/AGP bridges. */ -- if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761)) -- xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option " -- "for the AMD 761 northbridge.\n"); -- else { -- xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n"); -- mode |= RADEON_AGP_FW_MODE; -- } -- } /* Don't mention this otherwise, so that people don't get funny ideas */ -+ /* AGP Fast Writes. -+ * TODO: take into account that certain agp modes don't support fast -+ * writes at all */ -+ mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */ -+ if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) { -+ xf86DrvMsg(pScreen->myNum, X_WARNING, -+ "WARNING: Using the AGPFastWrite option is not recommended.\n"); -+ xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed" -+ " boost, while it\n\twill probably hard lock your machine." -+ " All bets are off!\n"); -+ -+ /* Black list some host/AGP bridges. */ -+ if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761)) -+ xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option " -+ "for the AMD 761 northbridge.\n"); -+ else { -+ xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n"); -+ mode |= RADEON_AGP_FW_MODE; -+ } -+ } /* Don't mention this otherwise, so that people don't get funny ideas */ -+ } else -+ info->dri->agpMode = 8; /* doesn't matter at this point */ - - xf86DrvMsg(pScreen->myNum, X_INFO, - "[agp] Mode 0x%08lx [AGP 0x%04x/0x%04x; Card 0x%04x/0x%04x 0x%04x/0x%04x]\n", -@@ -910,6 +926,9 @@ static void RADEONSetAgpBase(RADEONInfoPtr info, ScreenPtr pScreen) - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - unsigned char *RADEONMMIO = info->MMIO; - -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ return; -+ - /* drm already does this, so we can probably remove this. - * agp_base_2 ? - */ -@@ -1183,13 +1202,14 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) - drm_radeon_init_t drmInfo; - - memset(&drmInfo, 0, sizeof(drm_radeon_init_t)); -- if ( info->ChipFamily >= CHIP_FAMILY_R300 ) -- drmInfo.func = RADEON_INIT_R300_CP; -- else -- if ( info->ChipFamily >= CHIP_FAMILY_R200 ) -- drmInfo.func = RADEON_INIT_R200_CP; -+ if ( info->ChipFamily >= CHIP_FAMILY_R600 ) -+ drmInfo.func = RADEON_INIT_R600_CP; -+ else if ( info->ChipFamily >= CHIP_FAMILY_R300 ) -+ drmInfo.func = RADEON_INIT_R300_CP; -+ else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) -+ drmInfo.func = RADEON_INIT_R200_CP; - else -- drmInfo.func = RADEON_INIT_CP; -+ drmInfo.func = RADEON_INIT_CP; - - drmInfo.sarea_priv_offset = sizeof(XF86DRISAREARec); - drmInfo.is_pci = (info->cardType!=CARD_AGP); -@@ -1223,7 +1243,8 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) - * registers back to their default values, so we need to restore - * those engine register here. - */ -- RADEONEngineRestore(pScrn); -+ if (info->ChipFamily < CHIP_FAMILY_R600) -+ RADEONEngineRestore(pScrn); - - return TRUE; - } -@@ -1299,14 +1320,16 @@ static void RADEONDRIIrqInit(RADEONInfoPtr info, ScreenPtr pScreen) - "[drm] falling back to irq-free operation\n"); - info->dri->irq = 0; - } else { -- unsigned char *RADEONMMIO = info->MMIO; -- info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL ); -- -- /* Let the DRM know it can safely disable the vblank interrupts */ -- radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], -- FALSE); -- radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], -- TRUE); -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ unsigned char *RADEONMMIO = info->MMIO; -+ info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL ); -+ -+ /* Let the DRM know it can safely disable the vblank interrupts */ -+ radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], -+ FALSE); -+ radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], -+ TRUE); -+ } - } - } - -@@ -1840,7 +1863,8 @@ void RADEONDRIResume(ScreenPtr pScreen) - /* FIXME: return? */ - } - -- RADEONEngineRestore(pScrn); -+ if (info->ChipFamily < CHIP_FAMILY_R600) -+ RADEONEngineRestore(pScrn); - - RADEONDRICPInit(pScrn); - } -@@ -2303,8 +2327,12 @@ int RADEONDRIGetPciAperTableSize(ScrnInfoPtr pScrn) - int num_pages; - - num_pages = (info->dri->pciAperSize * 1024 * 1024) / 4096; -- -- ret_size = num_pages * sizeof(unsigned int); -+ -+ if ((info->ChipFamily >= CHIP_FAMILY_R600) || -+ (info->ChipFamily == CHIP_FAMILY_RS600)) -+ ret_size = num_pages * sizeof(uint64_t); -+ else -+ ret_size = num_pages * sizeof(unsigned int); - - return ret_size; - } -diff --git a/src/radeon_driver.c b/src/radeon_driver.c -index 32cb307..c0f5e7b 100644 ---- a/src/radeon_driver.c -+++ b/src/radeon_driver.c -@@ -193,6 +193,7 @@ static const OptionInfoRec RADEONOptions[] = { - { OPTION_INT10, "Int10", OPTV_BOOLEAN, {0}, FALSE }, - { OPTION_EXA_VSYNC, "EXAVSync", OPTV_BOOLEAN, {0}, FALSE }, - { OPTION_ATOM_TVOUT, "ATOMTVOut", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_R4XX_ATOM, "R4xxATOM", OPTV_BOOLEAN, {0}, FALSE }, - { -1, NULL, OPTV_NONE, {0}, FALSE } - }; - -@@ -347,7 +348,39 @@ static Bool RADEONGetRec(ScrnInfoPtr pScrn) - /* Free our private RADEONInfoRec */ - static void RADEONFreeRec(ScrnInfoPtr pScrn) - { -+ RADEONInfoPtr info; -+ int i; -+ - if (!pScrn || !pScrn->driverPrivate) return; -+ -+ info = RADEONPTR(pScrn); -+ -+ if (info->cp) { -+ xfree(info->cp); -+ info->cp = NULL; -+ } -+ -+ if (info->dri) { -+ xfree(info->dri); -+ info->dri = NULL; -+ } -+ -+ if (info->accel_state) { -+ xfree(info->accel_state); -+ info->accel_state = NULL; -+ } -+ -+ for (i = 0; i < RADEON_MAX_BIOS_CONNECTOR; i++) { -+ if (info->encoders[i]) { -+ if (info->encoders[i]->dev_priv) { -+ xfree(info->encoders[i]->dev_priv); -+ info->encoders[i]->dev_priv = NULL; -+ } -+ xfree(info->encoders[i]); -+ info->encoders[i]= NULL; -+ } -+ } -+ - xfree(pScrn->driverPrivate); - pScrn->driverPrivate = NULL; - } -@@ -578,7 +611,7 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) - OUTREG(RS690_MC_INDEX, (addr & RS690_MC_INDEX_MASK)); - data = INREG(RS690_MC_DATA); - } else if (info->ChipFamily == CHIP_FAMILY_RS600) { -- OUTREG(RS600_MC_INDEX, (addr & RS600_MC_INDEX_MASK)); -+ OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_ADDR_MASK) | RS600_MC_IND_CITF_ARB0)); - data = INREG(RS600_MC_DATA); - } else if (IS_AVIVO_VARIANT) { - OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0x7f0000); -@@ -591,7 +624,7 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) - OUTREG(R300_MC_IND_INDEX, addr & 0x3f); - (void)INREG(R300_MC_IND_INDEX); - data = INREG(R300_MC_IND_DATA); -- -+ - OUTREG(R300_MC_IND_INDEX, 0); - (void)INREG(R300_MC_IND_INDEX); - } -@@ -612,10 +645,10 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data) - OUTREG(RS690_MC_DATA, data); - OUTREG(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK); - } else if (info->ChipFamily == CHIP_FAMILY_RS600) { -- OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_INDEX_MASK) | -- RS600_MC_INDEX_WR_EN)); -+ OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_ADDR_MASK) | -+ RS600_MC_IND_CITF_ARB0 | -+ RS600_MC_IND_WR_EN)); - OUTREG(RS600_MC_DATA, data); -- OUTREG(RS600_MC_INDEX, RS600_MC_INDEX_WR_ACK); - } else if (IS_AVIVO_VARIANT) { - OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0xff0000); - (void)INREG(AVIVO_MC_INDEX); -@@ -635,17 +668,20 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data) - static Bool avivo_get_mc_idle(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; - - if (info->ChipFamily >= CHIP_FAMILY_R600) { -- /* no idea where this is on r600 yet */ -- return TRUE; -+ if (INREG(R600_SRBM_STATUS) & 0x3f00) -+ return FALSE; -+ else -+ return TRUE; - } else if (info->ChipFamily == CHIP_FAMILY_RV515) { - if (INMC(pScrn, RV515_MC_STATUS) & RV515_MC_STATUS_IDLE) - return TRUE; - else - return FALSE; - } else if (info->ChipFamily == CHIP_FAMILY_RS600) { -- if (INMC(pScrn, RS600_MC_STATUS) & RS600_MC_STATUS_IDLE) -+ if (INMC(pScrn, RS600_MC_STATUS) & RS600_MC_IDLE) - return TRUE; - else - return FALSE; -@@ -674,8 +710,8 @@ static void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_ - if (mask & LOC_FB) - OUTREG(R700_MC_VM_FB_LOCATION, fb_loc); - if (mask & LOC_AGP) { -- OUTREG(R600_MC_VM_AGP_BOT, agp_loc); -- OUTREG(R600_MC_VM_AGP_TOP, agp_loc_hi); -+ OUTREG(R700_MC_VM_AGP_BOT, agp_loc); -+ OUTREG(R700_MC_VM_AGP_TOP, agp_loc_hi); - } - } else if (info->ChipFamily >= CHIP_FAMILY_R600) { - if (mask & LOC_FB) -@@ -693,8 +729,8 @@ static void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_ - } else if (info->ChipFamily == CHIP_FAMILY_RS600) { - if (mask & LOC_FB) - OUTMC(pScrn, RS600_MC_FB_LOCATION, fb_loc); -- /* if (mask & LOC_AGP) -- OUTMC(pScrn, RS600_MC_AGP_LOCATION, agp_loc);*/ -+ if (mask & LOC_AGP) -+ OUTMC(pScrn, RS600_MC_AGP_LOCATION, agp_loc); - } else if ((info->ChipFamily == CHIP_FAMILY_RS690) || - (info->ChipFamily == CHIP_FAMILY_RS740)) { - if (mask & LOC_FB) -@@ -724,8 +760,8 @@ static void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_t - if (mask & LOC_FB) - *fb_loc = INREG(R700_MC_VM_FB_LOCATION); - if (mask & LOC_AGP) { -- *agp_loc = INREG(R600_MC_VM_AGP_BOT); -- *agp_loc_hi = INREG(R600_MC_VM_AGP_TOP); -+ *agp_loc = INREG(R700_MC_VM_AGP_BOT); -+ *agp_loc_hi = INREG(R700_MC_VM_AGP_TOP); - } - } else if (info->ChipFamily >= CHIP_FAMILY_R600) { - if (mask & LOC_FB) -@@ -745,7 +781,7 @@ static void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_t - if (mask & LOC_FB) - *fb_loc = INMC(pScrn, RS600_MC_FB_LOCATION); - if (mask & LOC_AGP) { -- *agp_loc = 0;//INMC(pScrn, RS600_MC_AGP_LOCATION); -+ *agp_loc = INMC(pScrn, RS600_MC_AGP_LOCATION); - *agp_loc_hi = 0; - } - } else if ((info->ChipFamily == CHIP_FAMILY_RS690) || -@@ -1258,8 +1294,8 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - unsigned char *RADEONMMIO = info->MMIO; -- uint32_t mem_size; -- uint32_t aper_size; -+ uint64_t mem_size; -+ uint64_t aper_size; - - radeon_read_mc_fb_agp_location(pScrn, LOC_FB | LOC_AGP, &info->mc_fb_location, - &info->mc_agp_location, &info->mc_agp_location_hi); -@@ -1306,7 +1342,7 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) - else - #endif - { -- uint32_t aper0_base; -+ uint64_t aper0_base; - - if (info->ChipFamily >= CHIP_FAMILY_R600) { - aper0_base = INREG(R600_CONFIG_F0_BASE); -@@ -1330,33 +1366,29 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) - aper0_base &= ~(mem_size - 1); - - if (info->ChipFamily >= CHIP_FAMILY_R600) { -- info->mc_fb_location = (aper0_base >> 24) | -- (((aper0_base + mem_size - 1) & 0xff000000U) >> 8); -+ uint64_t mc_fb = ((aper0_base >> 24) & 0xffff) | -+ (((aper0_base + mem_size - 1) >> 8) & 0xffff0000); -+ info->mc_fb_location = mc_fb & 0xffffffff; - ErrorF("mc fb loc is %08x\n", (unsigned int)info->mc_fb_location); - } else { -- info->mc_fb_location = (aper0_base >> 16) | -+ uint64_t mc_fb = ((aper0_base >> 16) & 0xffff) | - ((aper0_base + mem_size - 1) & 0xffff0000U); -+ info->mc_fb_location = mc_fb & 0xffffffff; - } - } - } - if (info->ChipFamily >= CHIP_FAMILY_R600) { - info->fbLocation = (info->mc_fb_location & 0xffff) << 24; - } else { -- info->fbLocation = (info->mc_fb_location & 0xffff) << 16; -+ info->fbLocation = (info->mc_fb_location & 0xffff) << 16; - } - /* Just disable the damn AGP apertures for now, it may be - * re-enabled later by the DRM - */ -- -- if (IS_AVIVO_VARIANT) { -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -- OUTREG(R600_HDP_NONSURFACE_BASE, (info->mc_fb_location << 16) & 0xff0000); -- } else { -- OUTREG(AVIVO_HDP_FB_LOCATION, info->mc_fb_location); -- } -- info->mc_agp_location = 0x003f0000; -- } else -- info->mc_agp_location = 0xffffffc0; -+ if (IS_AVIVO_VARIANT) -+ info->mc_agp_location = 0x003f0000; -+ else -+ info->mc_agp_location = 0xffffffc0; - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "RADEONInitMemoryMap() : \n"); - xf86DrvMsg(pScrn->scrnIndex, X_INFO, -@@ -1503,6 +1535,9 @@ static uint32_t RADEONGetAccessibleVRAM(ScrnInfoPtr pScrn) - info->dri->newMemoryMap = TRUE; - #endif /* XF86DRI */ - -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ return aper_size; -+ - /* Set HDP_APER_CNTL only on cards that are known not to be broken, - * that is has the 2nd generation multifunction PCI interface - */ -@@ -1511,7 +1546,7 @@ static uint32_t RADEONGetAccessibleVRAM(ScrnInfoPtr pScrn) - info->ChipFamily == CHIP_FAMILY_RV380 || - info->ChipFamily == CHIP_FAMILY_R420 || - info->ChipFamily == CHIP_FAMILY_RV410 || -- IS_AVIVO_VARIANT) { -+ IS_AVIVO_VARIANT) { - OUTREGP (RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, - ~RADEON_HDP_APER_CNTL); - xf86DrvMsg(pScrn->scrnIndex, X_INFO, -@@ -1588,9 +1623,10 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) - if (pScrn->videoRam > accessible) - pScrn->videoRam = accessible; - -- if (!IS_AVIVO_VARIANT) -+ if (!IS_AVIVO_VARIANT) { - info->MemCntl = INREG(RADEON_SDRAM_MODE_REG); -- info->BusCntl = INREG(RADEON_BUS_CNTL); -+ info->BusCntl = INREG(RADEON_BUS_CNTL); -+ } - - RADEONGetVRamType(pScrn); - -@@ -1876,7 +1912,14 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) - - /* treat PCIE IGP cards as PCI */ - if (info->cardType == CARD_PCIE && info->IsIGP) -- info->cardType = CARD_PCI; -+ info->cardType = CARD_PCI; -+ -+ if ((info->ChipFamily >= CHIP_FAMILY_R600) && info->IsIGP) -+ info->cardType = CARD_PCIE; -+ -+ /* not sure about gart table requirements */ -+ if ((info->ChipFamily == CHIP_FAMILY_RS600) && info->IsIGP) -+ info->cardType = CARD_PCIE; - - if ((s = xf86GetOptValString(info->Options, OPTION_BUS_TYPE))) { - if (strcmp(s, "AGP") == 0) { -@@ -1905,12 +1948,11 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) - info->Chipset != PCI_CHIP_RN50_5969); - #endif - -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -- info->r600_shadow_fb = TRUE; -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, -- "using shadow framebuffer\n"); -- if (!xf86LoadSubModule(pScrn, "shadow")) -- return FALSE; -+ info->r4xx_atom = FALSE; -+ if (((info->ChipFamily == CHIP_FAMILY_R420) || (info->ChipFamily == CHIP_FAMILY_RV410)) && -+ xf86ReturnOptValBool(info->Options, OPTION_R4XX_ATOM, FALSE)) { -+ info->r4xx_atom = TRUE; -+ xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Using ATOMBIOS for R4xx chip\n"); - } - - return TRUE; -@@ -1989,8 +2031,8 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) - - if (info->ChipFamily >= CHIP_FAMILY_R600) { - xf86DrvMsg(pScrn->scrnIndex, X_DEFAULT, -- "No acceleration support available on R600 yet.\n"); -- return TRUE; -+ "Will attempt to use R6xx/R7xx EXA support if DRI is enabled.\n"); -+ info->useEXA = TRUE; - } - - if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { -@@ -2145,17 +2187,15 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) - return FALSE; - - if (info->Chipset == PCI_CHIP_RN50_515E || -- info->Chipset == PCI_CHIP_RN50_5969 || -- info->ChipFamily == CHIP_FAMILY_RS600 || -- info->ChipFamily >= CHIP_FAMILY_R600) { -+ info->Chipset == PCI_CHIP_RN50_5969) { - if (xf86ReturnOptValBool(info->Options, OPTION_DRI, FALSE)) { - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -- "Direct rendering for RN50/RS600/R600 forced on -- " -+ "Direct rendering for RN50 forced on -- " - "This is NOT officially supported at the hardware level " - "and may cause instability or lockups\n"); - } else { - xf86DrvMsg(pScrn->scrnIndex, X_INFO, -- "Direct rendering not officially supported on RN50/RS600/R600\n"); -+ "Direct rendering not officially supported on RN50\n"); - return FALSE; - } - } -@@ -2173,11 +2213,6 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) - return FALSE; - } - -- if (!(info->dri = xcalloc(1, sizeof(struct radeon_dri)))) { -- ErrorF("Unable to allocate dri rec!\n"); -- return FALSE; -- } -- - info->dri->pLibDRMVersion = NULL; - info->dri->pKernelDRMVersion = NULL; - -@@ -2887,6 +2922,16 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - */ - info->directRenderingEnabled = RADEONPreInitDRI(pScrn); - #endif -+ if (!info->directRenderingEnabled) { -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ info->r600_shadow_fb = TRUE; -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, -+ "using shadow framebuffer\n"); -+ if (!xf86LoadSubModule(pScrn, "shadow")) -+ info->r600_shadow_fb = FALSE; -+ } -+ } -+ - if (!RADEONPreInitVRAM(pScrn)) - goto fail; - -@@ -2952,6 +2997,24 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - goto fail; - } - -+ /* fix up cloning on rn50 cards -+ * since they only have one crtc sometimes the xserver doesn't assign -+ * a crtc to one of the outputs even though both outputs have common modes -+ * which results in only one monitor being enabled. Assign a crtc here so -+ * that both outputs light up. -+ */ -+ if (info->ChipFamily == CHIP_FAMILY_RV100 && !pRADEONEnt->HasCRTC2) { -+ int i; -+ -+ for (i = 0; i < xf86_config->num_output; i++) { -+ xf86OutputPtr output = xf86_config->output[i]; -+ -+ /* XXX: double check crtc mode */ -+ if ((output->probed_modes != NULL) && (output->crtc == NULL)) -+ output->crtc = xf86_config->crtc[0]; -+ } -+ } -+ - ErrorF("after xf86InitialConfiguration\n"); - - RADEONSetPitch(pScrn); -@@ -3644,11 +3707,9 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, - RADEONDGAInit(pScreen); - - /* Init Xv */ -- if (info->ChipFamily < CHIP_FAMILY_R600) { -- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -- "Initializing Xv\n"); -- RADEONInitVideo(pScreen); -- } -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing Xv\n"); -+ RADEONInitVideo(pScreen); - - if (info->r600_shadow_fb == TRUE) { - if (!shadowSetup(pScreen)) { -@@ -3771,9 +3832,10 @@ void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn, - } else { - OUTREG(R600_HDP_NONSURFACE_BASE, (restore->mc_fb_location << 16) & 0xff0000); - } -- -+ - /* Reset the engine and HDP */ -- RADEONEngineReset(pScrn); -+ if (info->ChipFamily < CHIP_FAMILY_R600) -+ RADEONEngineReset(pScrn); - } - } else { - -@@ -3958,7 +4020,7 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save) - } - - #ifdef USE_EXA -- if (info->accelDFS) -+ if (info->accelDFS || (info->ChipFamily >= CHIP_FAMILY_R600)) - { - drm_radeon_getparam_t gp; - int gart_base; -@@ -4419,15 +4481,40 @@ avivo_save(ScrnInfoPtr pScrn, RADEONSavePtr save) - state->aux_cntl2[j] = INREG(i + 0x040); - state->aux_cntl3[j] = INREG(i + 0x400); - state->aux_cntl4[j] = INREG(i + 0x440); -+ if (IS_DCE32_VARIANT) { -+ state->aux_cntl5[j] = INREG(i + 0x500); -+ state->aux_cntl6[j] = INREG(i + 0x540); -+ } - j++; - } - - j = 0; - /* save UNIPHY regs */ -- for (i = 0x7ec0; i <= 0x7edc; i += 4) { -- state->uniphy1[j] = INREG(i); -- state->uniphy2[j] = INREG(i + 0x100); -- j++; -+ if (IS_DCE32_VARIANT) { -+ for (i = 0x7680; i <= 0x7690; i += 4) { -+ state->uniphy1[j] = INREG(i); -+ state->uniphy2[j] = INREG(i + 0x20); -+ state->uniphy3[j] = INREG(i + 0x400); -+ state->uniphy4[j] = INREG(i + 0x420); -+ state->uniphy5[j] = INREG(i + 0x840); -+ state->uniphy6[j] = INREG(i + 0x940); -+ j++; -+ } -+ for (i = 0x7698; i <= 0x769c; i += 4) { -+ state->uniphy1[j] = INREG(i); -+ state->uniphy2[j] = INREG(i + 0x20); -+ state->uniphy3[j] = INREG(i + 0x400); -+ state->uniphy4[j] = INREG(i + 0x420); -+ state->uniphy5[j] = INREG(i + 0x840); -+ state->uniphy6[j] = INREG(i + 0x940); -+ j++; -+ } -+ } else { -+ for (i = 0x7ec0; i <= 0x7edc; i += 4) { -+ state->uniphy1[j] = INREG(i); -+ state->uniphy2[j] = INREG(i + 0x100); -+ j++; -+ } - } - j = 0; - /* save PHY,LINK regs */ -@@ -4770,15 +4857,40 @@ avivo_restore(ScrnInfoPtr pScrn, RADEONSavePtr restore) - OUTREG((i + 0x040), state->aux_cntl2[j]); - OUTREG((i + 0x400), state->aux_cntl3[j]); - OUTREG((i + 0x440), state->aux_cntl4[j]); -+ if (IS_DCE32_VARIANT) { -+ OUTREG((i + 0x500), state->aux_cntl5[j]); -+ OUTREG((i + 0x540), state->aux_cntl6[j]); -+ } - j++; - } - - j = 0; - /* save UNIPHY regs */ -- for (i = 0x7ec0; i <= 0x7edc; i += 4) { -- OUTREG(i, state->uniphy1[j]); -- OUTREG((i + 0x100), state->uniphy2[j]); -- j++; -+ if (IS_DCE32_VARIANT) { -+ for (i = 0x7680; i <= 0x7690; i += 4) { -+ OUTREG(i, state->uniphy1[j]); -+ OUTREG((i + 0x20), state->uniphy2[j]); -+ OUTREG((i + 0x400), state->uniphy3[j]); -+ OUTREG((i + 0x420), state->uniphy4[j]); -+ OUTREG((i + 0x840), state->uniphy5[j]); -+ OUTREG((i + 0x940), state->uniphy6[j]); -+ j++; -+ } -+ for (i = 0x7698; i <= 0x769c; i += 4) { -+ OUTREG(i, state->uniphy1[j]); -+ OUTREG((i + 0x20), state->uniphy2[j]); -+ OUTREG((i + 0x400), state->uniphy3[j]); -+ OUTREG((i + 0x420), state->uniphy4[j]); -+ OUTREG((i + 0x840), state->uniphy5[j]); -+ OUTREG((i + 0x940), state->uniphy6[j]); -+ j++; -+ } -+ } else { -+ for (i = 0x7ec0; i <= 0x7edc; i += 4) { -+ OUTREG(i, state->uniphy1[j]); -+ OUTREG((i + 0x100), state->uniphy2[j]); -+ j++; -+ } - } - j = 0; - /* save PHY,LINK regs */ -@@ -5046,8 +5158,10 @@ static void RADEONRestore(ScrnInfoPtr pScrn) - "RADEONRestore\n"); - - #if X_BYTE_ORDER == X_BIG_ENDIAN -- RADEONWaitForFifo(pScrn, 1); -- OUTREG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ RADEONWaitForFifo(pScrn, 1); -+ OUTREG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); -+ } - #endif - - RADEONBlank(pScrn); -@@ -5220,7 +5334,8 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) - - if (info->accelOn) { - RADEON_SYNC(info, pScrn); -- RADEONEngineRestore(pScrn); -+ if (info->ChipFamily < CHIP_FAMILY_R600) -+ RADEONEngineRestore(pScrn); - } - - #ifdef XF86DRI -@@ -5424,6 +5539,10 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) - xf86OutputPtr output = config->output[config->compat_output]; - xf86CrtcPtr crtc = output->crtc; - -+ /* not handled */ -+ if (IS_AVIVO_VARIANT) -+ return; -+ - #ifdef XF86DRI - if (info->cp->CPStarted && pScrn->pScreen) DRILock(pScrn->pScreen, 0); - #endif -@@ -5536,11 +5655,16 @@ Bool RADEONEnterVT(int scrnIndex, int flags) - if (info->adaptor) - RADEONResetVideo(pScrn); - -- if (info->accelOn) -+ if (info->accelOn && (info->ChipFamily < CHIP_FAMILY_R600)) - RADEONEngineRestore(pScrn); - -+ if (info->accelOn && info->accel_state) -+ info->accel_state->XInited3D = FALSE; -+ - #ifdef XF86DRI - if (info->directRenderingEnabled) { -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ R600LoadShaders(pScrn); - RADEONCP_START(pScrn, info); - DRIUnlock(pScrn->pScreen); - } -diff --git a/src/radeon_exa.c b/src/radeon_exa.c -index 2f36d71..ae68146 100644 ---- a/src/radeon_exa.c -+++ b/src/radeon_exa.c -@@ -35,6 +35,7 @@ - - #include "radeon.h" - #include "radeon_reg.h" -+#include "r600_reg.h" - #ifdef XF86DRI - #include "radeon_drm.h" - #endif -@@ -221,7 +222,7 @@ int RADEONBiggerCrtcArea(PixmapPtr pPix) - - #if X_BYTE_ORDER == X_BIG_ENDIAN - --static unsigned long swapper_surfaces[3]; -+static unsigned long swapper_surfaces[6]; - - static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) - { -diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c -index cd97cc6..59cb46f 100644 ---- a/src/radeon_exa_funcs.c -+++ b/src/radeon_exa_funcs.c -@@ -129,7 +129,8 @@ FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2) - - TRACE; - -- FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, RADEONBiggerCrtcArea(pPix), y1, y2, info->accel_state->vsync); -+ if (info->accel_state->vsync) -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, RADEONBiggerCrtcArea(pPix), y1, y2); - - BEGIN_ACCEL(2); - OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1); -@@ -230,7 +231,8 @@ FUNC_NAME(RADEONCopy)(PixmapPtr pDst, - dstY += h - 1; - } - -- FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h, info->accel_state->vsync); -+ if (info->accel_state->vsync) -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h); - - BEGIN_ACCEL(3); - -@@ -281,7 +283,8 @@ RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h, - - RADEON_SWITCH_TO_2D(); - -- FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), y, y + h, info->accel_state->vsync); -+ if (info->accel_state->vsync) -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), y, y + h); - - while ((buf = RADEONHostDataBlit(pScrn, - cpp, w, dst_pitch_off, &buf_pitch, -@@ -476,6 +479,9 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) - #endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ - - info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; -+#ifdef EXA_SUPPORTS_PREPARE_AUX -+ info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; -+#endif - info->accel_state->exa->pixmapOffsetAlign = RADEON_BUFFER_ALIGN + 1; - info->accel_state->exa->pixmapPitchAlign = 64; - -diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c -index c44502c..571204a 100644 ---- a/src/radeon_exa_render.c -+++ b/src/radeon_exa_render.c -@@ -458,7 +458,7 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - - #ifdef ONLY_ONCE - --static PixmapPtr -+PixmapPtr - RADEONGetDrawablePixmap(DrawablePtr pDrawable) - { - if (pDrawable->type == DRAWABLE_WINDOW) -@@ -2015,13 +2015,14 @@ static inline void transformPoint(PictTransform *transform, xPointFixed *point) - } - #endif - --static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, -+static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, -+ RADEONInfoPtr info, -+ PixmapPtr pDst, - int srcX, int srcY, - int maskX, int maskY, - int dstX, int dstY, - int w, int h) - { -- RINFO_FROM_SCREEN(pDst->drawable.pScreen); - int vtx_count; - xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; - static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; -@@ -2069,7 +2070,8 @@ static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, - } else - vtx_count = 4; - -- FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h, info->accel_state->vsync); -+ if (info->accel_state->vsync) -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h); - - #ifdef ACCEL_CP - if (info->ChipFamily < CHIP_FAMILY_R200) { -@@ -2180,7 +2182,9 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, - RINFO_FROM_SCREEN(pDst->drawable.pScreen); - - if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) { -- FUNC_NAME(RadeonCompositeTile)(pDst, -+ FUNC_NAME(RadeonCompositeTile)(pScrn, -+ info, -+ pDst, - srcX, srcY, - maskX, maskY, - dstX, dstY, -@@ -2214,7 +2218,9 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, - w = remainingWidth; - remainingWidth -= w; - -- FUNC_NAME(RadeonCompositeTile)(pDst, -+ FUNC_NAME(RadeonCompositeTile)(pScrn, -+ info, -+ pDst, - tileSrcX, tileSrcY, - tileMaskX, tileMaskY, - tileDstX, tileDstY, -diff --git a/src/radeon_legacy_memory.c b/src/radeon_legacy_memory.c -index 2a9ee94..861fd97 100644 ---- a/src/radeon_legacy_memory.c -+++ b/src/radeon_legacy_memory.c -@@ -93,10 +93,10 @@ void - radeon_legacy_free_memory(ScrnInfoPtr pScrn, - void *mem_struct) - { -- ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; - RADEONInfoPtr info = RADEONPTR(pScrn); -- - #ifdef USE_EXA -+ ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; -+ - if (info->useEXA) { - ExaOffscreenArea *area = mem_struct; - -diff --git a/src/radeon_modes.c b/src/radeon_modes.c -index e06f8dd..ec60cc9 100644 ---- a/src/radeon_modes.c -+++ b/src/radeon_modes.c -@@ -65,15 +65,19 @@ void RADEONSetPitch (ScrnInfoPtr pScrn) - align_large = info->allowColorTiling || IS_AVIVO_VARIANT; - - /* FIXME: May need to validate line pitch here */ -- switch (pScrn->depth / 8) { -- case 1: pitch_mask = align_large ? 255 : 127; -- break; -- case 2: pitch_mask = align_large ? 127 : 31; -- break; -- case 3: -- case 4: pitch_mask = align_large ? 63 : 15; -- break; -- } -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ switch (pScrn->depth / 8) { -+ case 1: pitch_mask = align_large ? 255 : 127; -+ break; -+ case 2: pitch_mask = align_large ? 127 : 31; -+ break; -+ case 3: -+ case 4: pitch_mask = align_large ? 63 : 15; -+ break; -+ } -+ } else -+ pitch_mask = 255; /* r6xx/r7xx need 256B alignment for accel */ -+ - dummy = (pScrn->virtualX + pitch_mask) & ~pitch_mask; - pScrn->displayWidth = dummy; - info->CurrentLayout.displayWidth = pScrn->displayWidth; -@@ -411,6 +415,56 @@ RADEONUpdatePanelSize(xf86OutputPtr output) - } - } - -+static void -+radeon_add_common_modes(xf86OutputPtr output, DisplayModePtr modes) -+{ -+ RADEONOutputPrivatePtr radeon_output = output->driver_private; -+ radeon_native_mode_ptr native_mode = &radeon_output->native_mode; -+ DisplayModePtr last = NULL; -+ DisplayModePtr new = NULL; -+ DisplayModePtr first = NULL; -+ int i; -+ /* Add some common sizes */ -+ int widths[15] = {640, 800, 1024, 1152, 1280, 1280, 1280, 1280, 1280, 1440, 1400, 1680, 1600, 1920, 1920}; -+ int heights[15] = {480, 600, 768, 768, 720, 800, 854, 960, 1024, 900, 1050, 1050, 1200, 1080, 1200}; -+ -+ for (i = 0; i < 15; i++) { -+ if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT)) { -+ /* already added the native mode */ -+ if (widths[i] == native_mode->PanelXRes && heights[i] == native_mode->PanelYRes) -+ continue; -+ -+ /* Note: We allow all non-standard modes as long as they do not -+ * exceed the native resolution of the panel. Since these modes -+ * need the internal RMX unit in the video chips (and there is -+ * only one per card), this will only apply to the primary head. -+ */ -+ if (widths[i] < 320 || widths[i] > native_mode->PanelXRes || -+ heights[i] < 200 || heights[i] > native_mode->PanelYRes) -+ continue; -+ } -+ -+ new = xf86CVTMode(widths[i], heights[i], 60.0, FALSE, FALSE); -+ -+ new->type = M_T_DRIVER; -+ -+ new->next = NULL; -+ new->prev = last; -+ -+ if (last) last->next = new; -+ last = new; -+ if (!first) first = new; -+ } -+ -+ if (last) { -+ last->next = NULL; //first; -+ first->prev = NULL; //last; -+ } -+ -+ xf86ModesAdd(modes, first); -+ -+} -+ - DisplayModePtr - RADEONProbeOutputModes(xf86OutputPtr output) - { -@@ -459,12 +513,16 @@ RADEONProbeOutputModes(xf86OutputPtr output) - if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT)) - modes = RADEONFPNativeMode(output); - /* add the screen modes */ -- RADEONAddScreenModes(output, &modes); -+ if (modes == NULL) -+ RADEONAddScreenModes(output, &modes); - } - } - } - } - -+ if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT)) -+ radeon_add_common_modes(output, modes); -+ - return modes; - } - -diff --git a/src/radeon_output.c b/src/radeon_output.c -index 352519f..baa9592 100644 ---- a/src/radeon_output.c -+++ b/src/radeon_output.c -@@ -255,7 +255,11 @@ radeon_ddc_connected(xf86OutputPtr output) - MonType = MT_DFP; - break; - case CONNECTOR_DISPLAY_PORT: -- MonType = MT_DP; -+ /* -+ * XXX wrong. need to infer based on whether we got DDC from I2C -+ * or AUXCH. -+ */ -+ MonType = MT_DFP; - case CONNECTOR_DVI_I: - if (MonInfo->rawData[0x14] & 0x80) /* if it's digital and DVI */ - MonType = MT_DFP; -@@ -342,7 +346,7 @@ radeon_dpms(xf86OutputPtr output, int mode) - if ((mode == DPMSModeOn) && radeon_output->enabled) - return; - -- if (IS_AVIVO_VARIANT) { -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) { - atombios_output_dpms(output, mode); - } else { - legacy_output_dpms(output, mode); -@@ -399,6 +403,13 @@ radeon_mode_valid(xf86OutputPtr output, DisplayModePtr pMode) - } - } - -+ if (radeon_output->ConnectorType == CONNECTOR_DISPLAY_PORT && -+ radeon_output->MonType == MT_DFP) { -+ /* DP to DVI converter, single-link only */ -+ if (pMode->Clock > 165000) -+ return MODE_CLOCK_HIGH; -+ } -+ - if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT)) { - if (radeon_output->rmx_type == RMX_OFF) { - if (pMode->HDisplay != native_mode->PanelXRes || -@@ -512,7 +523,7 @@ radeon_mode_prepare(xf86OutputPtr output) - if (other_crtc->enabled) { - if (other_radeon_crtc->initialized) { - radeon_crtc_dpms(other_crtc, DPMSModeOff); -- if (IS_AVIVO_VARIANT) -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) - atombios_lock_crtc(info->atomBIOS, other_radeon_crtc->crtc_id, 1); - radeon_dpms(loop_output, DPMSModeOff); - } -@@ -532,7 +543,7 @@ radeon_mode_set(xf86OutputPtr output, DisplayModePtr mode, - { - RADEONInfoPtr info = RADEONPTR(output->scrn); - -- if (IS_AVIVO_VARIANT) -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) - atombios_output_mode_set(output, mode, adjusted_mode); - else - legacy_output_mode_set(output, mode, adjusted_mode); -@@ -557,7 +568,7 @@ radeon_mode_commit(xf86OutputPtr output) - if (other_crtc->enabled) { - if (other_radeon_crtc->initialized) { - radeon_crtc_dpms(other_crtc, DPMSModeOn); -- if (IS_AVIVO_VARIANT) -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) - atombios_lock_crtc(info->atomBIOS, other_radeon_crtc->crtc_id, 0); - radeon_dpms(loop_output, DPMSModeOn); - } -@@ -1147,7 +1158,7 @@ radeon_create_resources(xf86OutputPtr output) - } - #endif - -- if (radeon_output->devices & (ATOM_DEVICE_CRT_SUPPORT)) { -+ if (radeon_output->devices & (ATOM_DEVICE_CRT_SUPPORT | ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) { - load_detection_atom = MAKE_ATOM("load_detection"); - - range[0] = 0; /* off */ -@@ -1465,6 +1476,11 @@ radeon_set_property(xf86OutputPtr output, Atom property, - radeon_output->rmx_type = RMX_FULL; - } else if (value->size == strlen("center") && !strncmp("center", s, strlen("center"))) { - radeon_output->rmx_type = RMX_CENTER; -+ } else if (value->size == strlen("aspect") && !strncmp("aspect", s, strlen("aspect"))) { -+ if (IS_AVIVO_VARIANT) -+ radeon_output->rmx_type = RMX_ASPECT; -+ else -+ return FALSE; - } else if (value->size == strlen("off") && !strncmp("off", s, strlen("off"))) { - radeon_output->rmx_type = RMX_OFF; - } else -@@ -2500,6 +2516,14 @@ radeon_output_clones (ScrnInfoPtr pScrn, xf86OutputPtr output) - return index_mask; - } - -+static xf86OutputPtr -+RADEONOutputCreate(ScrnInfoPtr pScrn, const char *name, int i) -+{ -+ char buf[32]; -+ sprintf(buf, name, i); -+ return xf86OutputCreate(pScrn, &radeon_output_funcs, buf); -+} -+ - /* - * initialise the static data sos we don't have to re-do at randr change */ - Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) -@@ -2512,6 +2536,7 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - int num_vga = 0; - int num_dvi = 0; - int num_hdmi = 0; -+ int num_dp = 0; - - /* We first get the information about all connectors from BIOS. - * This is how the card is phyiscally wired up. -@@ -2648,15 +2673,18 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - - for (i = 0; i < RADEON_MAX_BIOS_CONNECTOR; i++) { - if (info->BiosConnector[i].valid) { -- if ((info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_D) || -- (info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_I) || -- (info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_A)) { -+ RADEONConnectorType conntype = info->BiosConnector[i].ConnectorType; -+ if ((conntype == CONNECTOR_DVI_D) || -+ (conntype == CONNECTOR_DVI_I) || -+ (conntype == CONNECTOR_DVI_A)) { - num_dvi++; -- } else if (info->BiosConnector[i].ConnectorType == CONNECTOR_VGA) { -+ } else if (conntype == CONNECTOR_VGA) { - num_vga++; -- } else if ((info->BiosConnector[i].ConnectorType == CONNECTOR_HDMI_TYPE_A) || -- (info->BiosConnector[i].ConnectorType == CONNECTOR_HDMI_TYPE_B)) { -+ } else if ((conntype == CONNECTOR_HDMI_TYPE_A) || -+ (conntype == CONNECTOR_HDMI_TYPE_B)) { - num_hdmi++; -+ } else if (conntype == CONNECTOR_DISPLAY_PORT) { -+ num_dp++; - } - } - } -@@ -2664,8 +2692,9 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - for (i = 0 ; i < RADEON_MAX_BIOS_CONNECTOR; i++) { - if (info->BiosConnector[i].valid) { - RADEONOutputPrivatePtr radeon_output; -+ RADEONConnectorType conntype = info->BiosConnector[i].ConnectorType; - -- if (info->BiosConnector[i].ConnectorType == CONNECTOR_NONE) -+ if (conntype == CONNECTOR_NONE) - continue; - - radeon_output = xnfcalloc(sizeof(RADEONOutputPrivateRec), 1); -@@ -2673,7 +2702,7 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - return FALSE; - } - radeon_output->MonType = MT_UNKNOWN; -- radeon_output->ConnectorType = info->BiosConnector[i].ConnectorType; -+ radeon_output->ConnectorType = conntype; - radeon_output->devices = info->BiosConnector[i].devices; - radeon_output->ddc_i2c = info->BiosConnector[i].ddc_i2c; - radeon_output->igp_lane_info = info->BiosConnector[i].igp_lane_info; -@@ -2682,33 +2711,21 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - radeon_output->linkb = info->BiosConnector[i].linkb; - radeon_output->connector_id = info->BiosConnector[i].connector_object; - -- if ((info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_D) || -- (info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_I) || -- (info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_A)) { -- if (num_dvi > 1) { -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, "DVI-1"); -- num_dvi--; -- } else { -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, "DVI-0"); -- } -- } else if (info->BiosConnector[i].ConnectorType == CONNECTOR_VGA) { -- if (num_vga > 1) { -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, "VGA-1"); -- num_vga--; -- } else { -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, "VGA-0"); -- } -- } else if ((info->BiosConnector[i].ConnectorType == CONNECTOR_HDMI_TYPE_A) || -- (info->BiosConnector[i].ConnectorType == CONNECTOR_HDMI_TYPE_B)) { -- if (num_hdmi > 1) { -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, "HDMI-1"); -- num_hdmi--; -- } else { -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, "HDMI-0"); -- } -- } else -- output = xf86OutputCreate(pScrn, &radeon_output_funcs, -- ConnectorTypeName[radeon_output->ConnectorType]); -+ if ((conntype == CONNECTOR_DVI_D) || -+ (conntype == CONNECTOR_DVI_I) || -+ (conntype == CONNECTOR_DVI_A)) { -+ output = RADEONOutputCreate(pScrn, "DVI-%d", --num_dvi); -+ } else if (conntype == CONNECTOR_VGA) { -+ output = RADEONOutputCreate(pScrn, "VGA-%d", --num_vga); -+ } else if ((conntype == CONNECTOR_HDMI_TYPE_A) || -+ (conntype == CONNECTOR_HDMI_TYPE_B)) { -+ output = RADEONOutputCreate(pScrn, "HDMI-%d", --num_hdmi); -+ } else if (conntype == CONNECTOR_DISPLAY_PORT) { -+ output = RADEONOutputCreate(pScrn, "DisplayPort-%d", --num_dp); -+ } else { -+ output = RADEONOutputCreate(pScrn, -+ ConnectorTypeName[conntype], 0); -+ } - - if (!output) { - return FALSE; -diff --git a/src/radeon_probe.h b/src/radeon_probe.h -index 447ef57..afc8e21 100644 ---- a/src/radeon_probe.h -+++ b/src/radeon_probe.h -@@ -101,7 +101,8 @@ typedef enum - { - RMX_OFF, - RMX_FULL, -- RMX_CENTER -+ RMX_CENTER, -+ RMX_ASPECT - } RADEONRMXType; - - typedef struct { -@@ -373,7 +374,7 @@ struct avivo_state - /* dvoa */ - uint32_t dvoa[16]; - -- /* DCE3 chips */ -+ /* DCE3+ chips */ - uint32_t fmt1[18]; - uint32_t fmt2[18]; - uint32_t dig1[19]; -@@ -384,9 +385,15 @@ struct avivo_state - uint32_t aux_cntl2[14]; - uint32_t aux_cntl3[14]; - uint32_t aux_cntl4[14]; -+ uint32_t aux_cntl5[14]; -+ uint32_t aux_cntl6[14]; - uint32_t phy[10]; - uint32_t uniphy1[8]; - uint32_t uniphy2[8]; -+ uint32_t uniphy3[8]; -+ uint32_t uniphy4[8]; -+ uint32_t uniphy5[8]; -+ uint32_t uniphy6[8]; - - }; - -diff --git a/src/radeon_reg.h b/src/radeon_reg.h -index 7b8840b..0af8859 100644 ---- a/src/radeon_reg.h -+++ b/src/radeon_reg.h -@@ -3445,15 +3445,24 @@ - #define RS690_MC_STATUS 0x90 - #define RS690_MC_STATUS_IDLE (1 << 0) - --#define RS600_MC_INDEX 0x78 --# define RS600_MC_INDEX_MASK 0xff --# define RS600_MC_INDEX_WR_EN (1 << 8) --# define RS600_MC_INDEX_WR_ACK 0xff --#define RS600_MC_DATA 0x7c -- --#define RS600_MC_FB_LOCATION 0xA --#define RS600_MC_STATUS 0x0 --#define RS600_MC_STATUS_IDLE (1 << 0) -+#define RS600_MC_INDEX 0x70 -+# define RS600_MC_ADDR_MASK 0xffff -+# define RS600_MC_IND_SEQ_RBS_0 (1 << 16) -+# define RS600_MC_IND_SEQ_RBS_1 (1 << 17) -+# define RS600_MC_IND_SEQ_RBS_2 (1 << 18) -+# define RS600_MC_IND_SEQ_RBS_3 (1 << 19) -+# define RS600_MC_IND_AIC_RBS (1 << 20) -+# define RS600_MC_IND_CITF_ARB0 (1 << 21) -+# define RS600_MC_IND_CITF_ARB1 (1 << 22) -+# define RS600_MC_IND_WR_EN (1 << 23) -+#define RS600_MC_DATA 0x74 -+ -+#define RS600_MC_STATUS 0x0 -+# define RS600_MC_IDLE (1 << 1) -+#define RS600_MC_FB_LOCATION 0x4 -+#define RS600_MC_AGP_LOCATION 0x5 -+#define RS600_AGP_BASE 0x6 -+#define RS600_AGP_BASE2 0x7 - - #define AVIVO_MC_INDEX 0x0070 - #define R520_MC_STATUS 0x00 -@@ -3482,6 +3491,8 @@ - # define R600_CHANSIZE (1 << 7) - # define R600_CHANSIZE_OVERRIDE (1 << 10) - -+#define R600_SRBM_STATUS 0x0e50 -+ - #define AVIVO_HDP_FB_LOCATION 0x134 - - #define AVIVO_VGA_RENDER_CONTROL 0x0300 -@@ -3662,6 +3673,8 @@ - # define AVIVO_D1MODE_VLINE_START_SHIFT 0 - # define AVIVO_D1MODE_VLINE_END_SHIFT 16 - # define AVIVO_D1MODE_VLINE_INV (1 << 31) -+#define AVIVO_D1MODE_VLINE_STATUS 0x653c -+# define AVIVO_D1MODE_VLINE_STAT (1 << 12) - #define AVIVO_D1MODE_VIEWPORT_START 0x6580 - #define AVIVO_D1MODE_VIEWPORT_SIZE 0x6584 - #define AVIVO_D1MODE_EXT_OVERSCAN_LEFT_RIGHT 0x6588 -@@ -3984,6 +3997,9 @@ - #define R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 - - #define R700_MC_VM_FB_LOCATION 0x2024 -+#define R700_MC_VM_AGP_TOP 0x2028 -+#define R700_MC_VM_AGP_BOT 0x202c -+#define R700_MC_VM_AGP_BASE 0x2030 - - #define R600_HDP_NONSURFACE_BASE 0x2c04 - -@@ -5362,4 +5378,32 @@ - - #define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */ - -+/* r6xx/r7xx stuff */ -+#define R600_GRBM_STATUS 0x8010 -+# define R600_CMDFIFO_AVAIL_MASK 0x1f -+# define R700_CMDFIFO_AVAIL_MASK 0xf -+# define R600_GUI_ACTIVE (1 << 31) -+ -+#define R600_GRBM_SOFT_RESET 0x8020 -+# define R600_SOFT_RESET_CP (1 << 0) -+ -+#define R600_WAIT_UNTIL 0x8040 -+ -+#define R600_CP_ME_CNTL 0x86d8 -+# define R600_CP_ME_HALT (1 << 28) -+ -+#define R600_CP_RB_BASE 0xc100 -+#define R600_CP_RB_CNTL 0xc104 -+# define R600_RB_NO_UPDATE (1 << 27) -+# define R600_RB_RPTR_WR_ENA (1 << 31) -+#define R600_CP_RB_RPTR_WR 0xc108 -+#define R600_CP_RB_RPTR_ADDR 0xc10c -+#define R600_CP_RB_RPTR_ADDR_HI 0xc110 -+#define R600_CP_RB_WPTR 0xc114 -+#define R600_CP_RB_WPTR_ADDR 0xc118 -+#define R600_CP_RB_WPTR_ADDR_HI 0xc11c -+ -+#define R600_CP_RB_RPTR 0x8700 -+#define R600_CP_RB_WPTR_DELAY 0x8704 -+ - #endif -diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c -index 7712344..2df299f 100644 ---- a/src/radeon_textured_video.c -+++ b/src/radeon_textured_video.c -@@ -36,6 +36,7 @@ - - #include "radeon.h" - #include "radeon_reg.h" -+#include "r600_reg.h" - #include "radeon_macros.h" - #include "radeon_probe.h" - #include "radeon_video.h" -@@ -43,12 +44,24 @@ - #include - #include "fourcc.h" - -+extern void -+R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); -+ -+extern Bool -+R600CopyToVRAM(ScrnInfoPtr pScrn, -+ char *src, int src_pitch, -+ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, -+ int x, int y, int w, int h); -+ - #define IMAGE_MAX_WIDTH 2048 - #define IMAGE_MAX_HEIGHT 2048 - - #define IMAGE_MAX_WIDTH_R500 4096 - #define IMAGE_MAX_HEIGHT_R500 4096 - -+#define IMAGE_MAX_WIDTH_R600 8192 -+#define IMAGE_MAX_HEIGHT_R600 8192 -+ - static Bool - RADEONTilingEnabled(ScrnInfoPtr pScrn, PixmapPtr pPix) - { -@@ -146,6 +159,132 @@ static __inline__ uint32_t F_TO_24(float val) - - #endif /* XF86DRI */ - -+static void -+R600CopyPlanarHW(ScrnInfoPtr pScrn, -+ unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, -+ uint32_t dst_mc_addr, -+ int srcPitch, int srcPitch2, int dstPitch, -+ int w, int h) -+{ -+ int dstPitch2 = dstPitch >> 1; -+ int h2 = h >> 1; -+ int w2 = w >> 1; -+ int v_offset, u_offset; -+ v_offset = dstPitch * h; -+ v_offset = (v_offset + 255) & ~255; -+ u_offset = v_offset + (dstPitch2 * h2); -+ u_offset = (u_offset + 255) & ~255; -+ -+ /* Y */ -+ R600CopyToVRAM(pScrn, -+ (char *)y_src, srcPitch, -+ dstPitch, dst_mc_addr, h, 8, -+ 0, 0, w, h); -+ -+ /* V */ -+ R600CopyToVRAM(pScrn, -+ (char *)v_src, srcPitch2, -+ dstPitch2, dst_mc_addr + v_offset, h2, 8, -+ 0, 0, w2, h2); -+ -+ /* U */ -+ R600CopyToVRAM(pScrn, -+ (char *)u_src, srcPitch2, -+ dstPitch2, dst_mc_addr + u_offset, h2, 8, -+ 0, 0, w2, h2); -+} -+ -+static void -+R600CopyPackedHW(ScrnInfoPtr pScrn, -+ unsigned char *src, uint32_t dst_mc_addr, -+ int srcPitch, int dstPitch, -+ int w, int h) -+{ -+ -+ /* YUV */ -+ R600CopyToVRAM(pScrn, -+ (char *)src, srcPitch, -+ dstPitch >> 2, dst_mc_addr, h, 32, -+ 0, 0, w >> 1, h); -+ -+} -+ -+static void -+R600CopyPlanarSW(ScrnInfoPtr pScrn, -+ unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, -+ unsigned char *dst, -+ int srcPitch, int srcPitch2, int dstPitch, -+ int w, int h) -+{ -+ int i; -+ int dstPitch2 = dstPitch >> 1; -+ int h2 = h >> 1; -+ -+ /* Y */ -+ if (srcPitch == dstPitch) { -+ memcpy(dst, y_src, srcPitch * h); -+ dst += (dstPitch * h); -+ } else { -+ for (i = 0; i < h; i++) { -+ memcpy(dst, y_src, srcPitch); -+ y_src += srcPitch; -+ dst += dstPitch; -+ } -+ } -+ -+ /* tex base need 256B alignment */ -+ if (h & 1) -+ dst += dstPitch; -+ -+ /* V */ -+ if (srcPitch2 == dstPitch2) { -+ memcpy(dst, v_src, srcPitch2 * h2); -+ dst += (dstPitch2 * h2); -+ } else { -+ for (i = 0; i < h2; i++) { -+ memcpy(dst, v_src, srcPitch2); -+ v_src += srcPitch2; -+ dst += dstPitch2; -+ } -+ } -+ -+ /* tex base need 256B alignment */ -+ if (h2 & 1) -+ dst += dstPitch2; -+ -+ /* U */ -+ if (srcPitch2 == dstPitch2) { -+ memcpy(dst, u_src, srcPitch2 * h2); -+ dst += (dstPitch2 * h2); -+ } else { -+ for (i = 0; i < h2; i++) { -+ memcpy(dst, u_src, srcPitch2); -+ u_src += srcPitch2; -+ dst += dstPitch2; -+ } -+ } -+} -+ -+static void -+R600CopyPackedSW(ScrnInfoPtr pScrn, -+ unsigned char *src, unsigned char *dst, -+ int srcPitch, int dstPitch, -+ int w, int h) -+{ -+ int i; -+ -+ if (srcPitch == dstPitch) { -+ memcpy(dst, src, srcPitch * h); -+ dst += (dstPitch * h); -+ } else { -+ for (i = 0; i < h; i++) { -+ memcpy(dst, src, srcPitch); -+ src += srcPitch; -+ dst += dstPitch; -+ } -+ } -+} -+ - static int - RADEONPutImageTextured(ScrnInfoPtr pScrn, - short src_x, short src_y, -@@ -214,7 +353,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - break; - } - -- dstPitch = (dstPitch + 63) & ~63; -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ dstPitch = (dstPitch + 255) & ~255; -+ else -+ dstPitch = (dstPitch + 63) & ~63; - - if (pPriv->video_memory != NULL && size != pPriv->size) { - radeon_legacy_free_memory(pScrn, pPriv->video_memory); -@@ -222,16 +364,21 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - } - - if (pPriv->video_memory == NULL) { -- pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, -- &pPriv->video_memory, -- size * 2, 64); -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, -+ &pPriv->video_memory, -+ size * 2, 256); -+ else -+ pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, -+ &pPriv->video_memory, -+ size * 2, 64); - if (pPriv->video_offset == 0) - return BadAlloc; - } - - /* Bicubic filter setup */ - pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF); -- if (!(IS_R300_3D || IS_R500_3D)) -+ if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D)) - pPriv->bicubic_enabled = FALSE; - if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) { - /* -@@ -280,7 +427,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left; - - pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset; -- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset); -+ else -+ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); - pPriv->src_pitch = dstPitch; - pPriv->size = size; - pPriv->pDraw = pDraw; -@@ -294,35 +444,77 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - switch(id) { - case FOURCC_YV12: - case FOURCC_I420: -- top &= ~1; -- nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; -- s2offset = srcPitch * height; -- s3offset = (srcPitch2 * (height >> 1)) + s2offset; -- top &= ~1; -- pPriv->src_addr += left << 1; -- tmp = ((top >> 1) * srcPitch2) + (left >> 1); -- s2offset += tmp; -- s3offset += tmp; -- if (id == FOURCC_I420) { -- tmp = s2offset; -- s2offset = s3offset; -- s3offset = tmp; -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ s2offset = srcPitch * height; -+ s3offset = (srcPitch2 * (height >> 1)) + s2offset; -+ if (info->DMAForXv) { -+ if (id == FOURCC_YV12) -+ R600CopyPlanarHW(pScrn, buf, buf + s3offset, buf + s2offset, -+ pPriv->src_offset, -+ srcPitch, srcPitch2, pPriv->src_pitch, -+ width, height); -+ else -+ R600CopyPlanarHW(pScrn, buf, buf + s2offset, buf + s3offset, -+ pPriv->src_offset, -+ srcPitch, srcPitch2, pPriv->src_pitch, -+ width, height); -+ } else { -+ if (id == FOURCC_YV12) -+ R600CopyPlanarSW(pScrn, buf, buf + s3offset, buf + s2offset, -+ pPriv->src_addr, -+ srcPitch, srcPitch2, pPriv->src_pitch, -+ width, height); -+ else -+ R600CopyPlanarSW(pScrn, buf, buf + s2offset, buf + s3offset, -+ pPriv->src_addr, -+ srcPitch, srcPitch2, pPriv->src_pitch, -+ width, height); -+ } -+ } else { -+ top &= ~1; -+ nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; -+ s2offset = srcPitch * height; -+ s3offset = (srcPitch2 * (height >> 1)) + s2offset; -+ top &= ~1; -+ pPriv->src_addr += left << 1; -+ tmp = ((top >> 1) * srcPitch2) + (left >> 1); -+ s2offset += tmp; -+ s3offset += tmp; -+ if (id == FOURCC_I420) { -+ tmp = s2offset; -+ s2offset = s3offset; -+ s3offset = tmp; -+ } -+ RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, -+ buf + s2offset, buf + s3offset, pPriv->src_addr, -+ srcPitch, srcPitch2, dstPitch, nlines, npixels); - } -- RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, -- buf + s2offset, buf + s3offset, pPriv->src_addr, -- srcPitch, srcPitch2, dstPitch, nlines, npixels); - break; - case FOURCC_UYVY: - case FOURCC_YUY2: - default: -- nlines = ((y2 + 0xffff) >> 16) - top; -- RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ if (info->DMAForXv) -+ R600CopyPackedHW(pScrn, buf, pPriv->src_offset, -+ 2 * width, pPriv->src_pitch, -+ width, height); -+ else -+ R600CopyPackedSW(pScrn, buf, pPriv->src_addr, -+ 2 * width, pPriv->src_pitch, -+ width, height); -+ } else { -+ nlines = ((y2 + 0xffff) >> 16) - top; -+ RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); -+ } - break; - } - - /* Upload bicubic filter tex */ -- if (pPriv->bicubic_enabled) -- RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, (uint8_t *)(info->FB + pPriv->bicubic_offset), 1024, 1024, 1, 512, 2); -+ if (pPriv->bicubic_enabled) { -+ if (info->ChipFamily < CHIP_FAMILY_R600) -+ RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, -+ (uint8_t *)(info->FB + pPriv->bicubic_offset), 1024, 1024, 1, 512, 2); -+ } - - /* update cliplist */ - if (!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes)) { -@@ -340,7 +532,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - pPriv->h = height; - - #ifdef XF86DRI -- if (info->directRenderingEnabled) -+ if (IS_R600_3D) -+ R600DisplayTexturedVideo(pScrn, pPriv); -+ else if (info->directRenderingEnabled) - RADEONDisplayTexturedVideoCP(pScrn, pPriv); - else - #endif -@@ -370,6 +564,16 @@ static XF86VideoEncodingRec DummyEncodingR500[1] = - } - }; - -+static XF86VideoEncodingRec DummyEncodingR600[1] = -+{ -+ { -+ 0, -+ "XV_IMAGE", -+ IMAGE_MAX_WIDTH_R600, IMAGE_MAX_HEIGHT_R600, -+ {1, 1} -+ } -+}; -+ - #define NUM_FORMATS 3 - - static XF86VideoFormatRec Formats[NUM_FORMATS] = -@@ -471,7 +675,9 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) - adapt->flags = 0; - adapt->name = "Radeon Textured Video"; - adapt->nEncodings = 1; -- if (IS_R500_3D) -+ if (IS_R600_3D) -+ adapt->pEncodings = DummyEncodingR600; -+ else if (IS_R500_3D) - adapt->pEncodings = DummyEncodingR500; - else - adapt->pEncodings = DummyEncoding; -diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c -index c6ed472..f55ae12 100644 ---- a/src/radeon_textured_videofuncs.c -+++ b/src/radeon_textured_videofuncs.c -@@ -1475,16 +1475,16 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - } - } - -- FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, -- radeon_covering_crtc_num(pScrn, -- pPriv->drw_x, -- pPriv->drw_x + pPriv->dst_w, -- pPriv->drw_y, -- pPriv->drw_y + pPriv->dst_h, -- pPriv->desired_crtc), -- pPriv->drw_y, -- pPriv->drw_y + pPriv->dst_h, -- pPriv->vsync); -+ if (pPriv->vsync) -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, -+ radeon_covering_crtc_num(pScrn, -+ pPriv->drw_x, -+ pPriv->drw_x + pPriv->dst_w, -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h, -+ pPriv->desired_crtc), -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h); - - /* - * Rendering of the actual polygon is done in two different -diff --git a/src/radeon_video.c b/src/radeon_video.c -index 2fb5fcc..92d1a71 100644 ---- a/src/radeon_video.c -+++ b/src/radeon_video.c -@@ -310,7 +310,7 @@ void RADEONInitVideo(ScreenPtr pScreen) - } else - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to set up textured video\n"); - } else -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Textured video requires CP on R5xx/IGP\n"); -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Textured video requires CP on R5xx/R6xx/R7xx/IGP\n"); - } else - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Textured video disabled on RV250 due to HW bug\n"); - diff --git a/radeon-modeset.patch b/radeon-modeset.patch index ed439cc..bb9824a 100644 --- a/radeon-modeset.patch +++ b/radeon-modeset.patch @@ -1,5 +1,5 @@ diff --git a/configure.ac b/configure.ac -index b094a50..29a19e3 100644 +index 3475f6a..727e2a0 100644 --- a/configure.ac +++ b/configure.ac @@ -114,8 +114,19 @@ if test "$DRI" = yes; then @@ -2403,7 +2403,7 @@ index 0000000..6d3b6fe +int radeon_bo_gem_name_buffer(dri_bo *bo, uint32_t *name); +#endif diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c -index eabd87d..8712a74 100644 +index a9bc7d2..6c22339 100644 --- a/src/radeon_commonfuncs.c +++ b/src/radeon_commonfuncs.c @@ -62,12 +62,15 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) @@ -4079,7 +4079,7 @@ index 0000000..a19d7ec + +#endif diff --git a/src/radeon_driver.c b/src/radeon_driver.c -index c0f5e7b..3c824a8 100644 +index d1b2f54..052ed1f 100644 --- a/src/radeon_driver.c +++ b/src/radeon_driver.c @@ -67,7 +67,7 @@ @@ -4091,7 +4091,7 @@ index c0f5e7b..3c824a8 100644 /* Driver data structures */ #include "radeon.h" #include "radeon_reg.h" -@@ -226,7 +226,10 @@ radeonShadowWindow(ScreenPtr screen, CARD32 row, CARD32 offset, int mode, +@@ -229,7 +229,10 @@ radeonShadowWindow(ScreenPtr screen, CARD32 row, CARD32 offset, int mode, stride = (pScrn->displayWidth * pScrn->bitsPerPixel) / 8; *size = stride; @@ -4103,7 +4103,7 @@ index c0f5e7b..3c824a8 100644 } static Bool RADEONCreateScreenResources (ScreenPtr pScreen) -@@ -247,6 +250,13 @@ RADEONCreateScreenResources (ScreenPtr pScreen) +@@ -250,6 +253,13 @@ RADEONCreateScreenResources (ScreenPtr pScreen) radeonShadowWindow, 0, NULL)) return FALSE; } @@ -4117,7 +4117,7 @@ index c0f5e7b..3c824a8 100644 return TRUE; } -@@ -1658,6 +1668,7 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) +@@ -1661,6 +1671,7 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) } pScrn->videoRam &= ~1023; @@ -4125,7 +4125,7 @@ index c0f5e7b..3c824a8 100644 info->FbMapSize = pScrn->videoRam * 1024; /* if the card is PCI Express reserve the last 32k for the gart table */ -@@ -1788,58 +1799,64 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) +@@ -1791,58 +1802,64 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) } from = X_PROBED; @@ -4233,7 +4233,7 @@ index c0f5e7b..3c824a8 100644 #ifdef XF86DRI /* AGP/PCI */ -@@ -2014,6 +2031,15 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) +@@ -2017,6 +2034,15 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) } info->accel_state->fifo_slots = 0; @@ -4249,7 +4249,7 @@ index c0f5e7b..3c824a8 100644 if ((info->ChipFamily == CHIP_FAMILY_RS100) || (info->ChipFamily == CHIP_FAMILY_RS200) || (info->ChipFamily == CHIP_FAMILY_RS300) || -@@ -2038,6 +2064,9 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) +@@ -2041,6 +2067,9 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { int errmaj = 0, errmin = 0; @@ -4259,7 +4259,7 @@ index c0f5e7b..3c824a8 100644 from = X_DEFAULT; #if defined(USE_EXA) #if defined(USE_XAA) -@@ -2048,6 +2077,7 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) +@@ -2051,6 +2080,7 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) info->useEXA = TRUE; } else if (xf86NameCmp(optstr, "XAA") == 0) { from = X_CONFIG; @@ -4267,7 +4267,7 @@ index c0f5e7b..3c824a8 100644 } } #else /* USE_XAA */ -@@ -2151,15 +2181,9 @@ static Bool RADEONPreInitInt10(ScrnInfoPtr pScrn, xf86Int10InfoPtr *ppInt10) +@@ -2155,15 +2185,9 @@ static Bool RADEONPreInitInt10(ScrnInfoPtr pScrn, xf86Int10InfoPtr *ppInt10) return TRUE; } @@ -4284,7 +4284,7 @@ index c0f5e7b..3c824a8 100644 if (!(info->dri = xcalloc(1, sizeof(struct radeon_dri)))) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR,"Unable to allocate dri rec!\n"); -@@ -2170,6 +2194,22 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) +@@ -2174,6 +2198,22 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_ERROR,"Unable to allocate cp rec!\n"); return FALSE; } @@ -4307,7 +4307,7 @@ index c0f5e7b..3c824a8 100644 info->cp->CPInUse = FALSE; info->cp->CPStarted = FALSE; info->cp->CPusecTimeout = RADEON_DEFAULT_CP_TIMEOUT; -@@ -2727,6 +2767,37 @@ static const xf86CrtcConfigFuncsRec RADEONCRTCResizeFuncs = { +@@ -2731,6 +2771,37 @@ static const xf86CrtcConfigFuncsRec RADEONCRTCResizeFuncs = { RADEONCRTCResize }; @@ -4345,7 +4345,7 @@ index c0f5e7b..3c824a8 100644 Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) { xf86CrtcConfigPtr xf86_config; -@@ -2747,6 +2818,8 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2751,6 +2822,8 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) info = RADEONPTR(pScrn); info->MMIO = NULL; @@ -4354,7 +4354,7 @@ index c0f5e7b..3c824a8 100644 info->IsSecondary = FALSE; info->IsPrimary = FALSE; -@@ -2781,62 +2854,63 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2785,62 +2858,63 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) } info->PciInfo = xf86GetPciInfoForEntity(info->pEnt->index); @@ -4465,7 +4465,7 @@ index c0f5e7b..3c824a8 100644 if (xf86RegisterResources(info->pEnt->index, 0, ResExclusive)) goto fail; -@@ -2846,10 +2920,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2850,10 +2924,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) pScrn->racMemFlags = RAC_FB | RAC_COLORMAP | RAC_VIEWPORT | RAC_CURSOR; pScrn->monitor = pScrn->confScreen->monitor; @@ -4482,7 +4482,7 @@ index c0f5e7b..3c824a8 100644 if (!RADEONPreInitVisual(pScrn)) goto fail; -@@ -2863,164 +2939,224 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2867,167 +2943,227 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) memcpy(info->Options, RADEONOptions, sizeof(RADEONOptions)); xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, info->Options); @@ -4583,6 +4583,11 @@ index c0f5e7b..3c824a8 100644 + + RADEONPostInt10Check(pScrn, int10_save); +- /* Save BIOS scratch registers */ +- RADEONSaveBIOSRegisters(pScrn, info->SavedReg); ++ /* Save BIOS scratch registers */ ++ RADEONSaveBIOSRegisters(pScrn, info->SavedReg); + + if (!RADEONPreInitBIOS(pScrn, pInt10)) + goto fail; #ifdef XF86DRI @@ -4772,7 +4777,9 @@ index c0f5e7b..3c824a8 100644 + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Kernel modesetting setup failed\n"); + goto fail; + } -+ + +- for (i = 0; i < xf86_config->num_output; i++) { +- xf86OutputPtr output = xf86_config->output[i]; + info->dri->drmFD = info->drmmode.fd; + info->dri2.drm_fd = info->drmmode.fd; + info->dri2.enabled = FALSE; @@ -4793,9 +4800,7 @@ index c0f5e7b..3c824a8 100644 + { + struct drm_radeon_getparam gp; + int value; - -- for (i = 0; i < xf86_config->num_output; i++) { -- xf86OutputPtr output = xf86_config->output[i]; ++ + memset(&gp, 0, sizeof(gp)); + gp.param = RADEON_PARAM_FB_LOCATION; + gp.value = &value; @@ -4834,7 +4839,7 @@ index c0f5e7b..3c824a8 100644 /* Get ScreenInit function */ if (!xf86LoadSubModule(pScrn, "fb")) return FALSE; -@@ -3035,10 +3171,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -3042,10 +3178,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) if (!RADEONPreInitXv(pScrn)) goto fail; } @@ -4851,7 +4856,7 @@ index c0f5e7b..3c824a8 100644 } if (pScrn->modes == NULL) { -@@ -3191,6 +3329,9 @@ static void RADEONBlockHandler(int i, pointer blockData, +@@ -3198,6 +3336,9 @@ static void RADEONBlockHandler(int i, pointer blockData, #ifdef USE_EXA info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; @@ -4861,7 +4866,7 @@ index c0f5e7b..3c824a8 100644 #endif } -@@ -3279,7 +3420,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3286,7 +3427,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, int subPixelOrder = SubPixelUnknown; char* s; #endif @@ -4870,7 +4875,7 @@ index c0f5e7b..3c824a8 100644 info->accelOn = FALSE; #ifdef USE_XAA -@@ -3299,58 +3440,61 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3306,58 +3447,61 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, "RADEONScreenInit %lx %ld\n", pScrn->memPhysBase, pScrn->fbOffset); #endif @@ -4971,7 +4976,7 @@ index c0f5e7b..3c824a8 100644 /* Visual setup */ miClearVisualTypes(); if (!miSetVisualTypes(pScrn->depth, -@@ -3384,19 +3528,21 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3391,19 +3535,21 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, hasDRI = info->directRenderingEnabled; #endif /* XF86DRI */ @@ -5005,7 +5010,7 @@ index c0f5e7b..3c824a8 100644 } } -@@ -3433,7 +3579,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3440,7 +3586,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, #ifdef XF86DRI if (hasDRI) { info->accelDFS = xf86ReturnOptValBool(info->Options, OPTION_ACCEL_DFS, @@ -5017,7 +5022,7 @@ index c0f5e7b..3c824a8 100644 /* Reserve approx. half of offscreen memory for local textures by * default, can be overridden with Option "FBTexPercent". -@@ -3459,7 +3608,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3466,7 +3615,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, #endif #if defined(XF86DRI) && defined(USE_XAA) @@ -5026,7 +5031,7 @@ index c0f5e7b..3c824a8 100644 info->dri->textureSize = -1; if (xf86GetOptValInteger(info->Options, OPTION_FBTEX_PERCENT, &(info->dri->textureSize))) { -@@ -3477,7 +3626,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3484,7 +3633,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, #endif #ifdef USE_XAA @@ -5035,7 +5040,7 @@ index c0f5e7b..3c824a8 100644 return FALSE; #endif -@@ -3498,7 +3647,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3505,7 +3654,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, info->CurrentLayout.pixel_bytes); int maxy = info->FbMapSize / width_bytes; @@ -5044,7 +5049,7 @@ index c0f5e7b..3c824a8 100644 xf86DrvMsg(scrnIndex, X_ERROR, "Static buffer allocation failed. Disabling DRI.\n"); xf86DrvMsg(scrnIndex, X_ERROR, -@@ -3508,19 +3657,54 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3515,19 +3664,54 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, info->CurrentLayout.pixel_bytes * 3 + 1023) / 1024); info->directRenderingEnabled = FALSE; } else { @@ -5103,7 +5108,7 @@ index c0f5e7b..3c824a8 100644 #endif xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "Initializing fb layer\n"); -@@ -3544,7 +3728,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3551,7 +3735,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, if (info->r600_shadow_fb == FALSE) { /* Init fb layer */ @@ -5112,7 +5117,7 @@ index c0f5e7b..3c824a8 100644 pScrn->virtualX, pScrn->virtualY, pScrn->xDpi, pScrn->yDpi, pScrn->displayWidth, pScrn->bitsPerPixel)) -@@ -3586,8 +3770,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3593,8 +3777,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, /* restore the memory map here otherwise we may get a hang when * initializing the drm below */ @@ -5125,7 +5130,7 @@ index c0f5e7b..3c824a8 100644 /* Backing store setup */ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -@@ -3597,7 +3783,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3604,7 +3790,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, /* DRI finalisation */ #ifdef XF86DRI @@ -5134,7 +5139,7 @@ index c0f5e7b..3c824a8 100644 info->dri->pKernelDRMVersion->version_minor >= 19) { if (RADEONDRISetParam(pScrn, RADEON_SETPARAM_PCIGART_LOCATION, info->dri->pciGartOffset) < 0) -@@ -3613,14 +3799,24 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3620,14 +3806,24 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, if (info->directRenderingEnabled) { xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "DRI Finishing init !\n"); @@ -5160,7 +5165,7 @@ index c0f5e7b..3c824a8 100644 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Direct rendering enabled\n"); -@@ -3716,10 +3912,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3723,10 +3919,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, return FALSE; } } @@ -5170,15 +5175,15 @@ index c0f5e7b..3c824a8 100644 - if (!xf86SetDesiredModes (pScrn)) + if (info->drm_mode_setting) { + if (!drmmode_set_desired_modes(pScrn, &info->drmmode)) - return FALSE; ++ return FALSE; + } else { + if (!xf86SetDesiredModes (pScrn)) -+ return FALSE; + return FALSE; + } /* Provide SaveScreen & wrap BlockHandler and CloseScreen */ /* Wrap CloseScreen */ -@@ -5296,7 +5498,7 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) +@@ -5303,7 +5505,7 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) #ifdef XF86DRI Bool CPStarted = info->cp->CPStarted; @@ -5187,7 +5192,7 @@ index c0f5e7b..3c824a8 100644 DRILock(pScrn->pScreen, 0); RADEONCP_STOP(pScrn, info); } -@@ -5319,8 +5521,10 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) +@@ -5326,8 +5528,10 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) #endif } @@ -5200,7 +5205,7 @@ index c0f5e7b..3c824a8 100644 ret = xf86SetSingleMode (pScrn, mode, RR_Rotate_0); -@@ -5332,16 +5536,19 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) +@@ -5339,16 +5543,19 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) /* xf86SetRootClip would do, but can't access that here */ } @@ -5228,7 +5233,7 @@ index c0f5e7b..3c824a8 100644 } #endif -@@ -5539,6 +5746,11 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) +@@ -5546,6 +5753,11 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) xf86OutputPtr output = config->output[config->compat_output]; xf86CrtcPtr crtc = output->crtc; @@ -5240,7 +5245,7 @@ index c0f5e7b..3c824a8 100644 /* not handled */ if (IS_AVIVO_VARIANT) return; -@@ -5578,76 +5790,103 @@ Bool RADEONEnterVT(int scrnIndex, int flags) +@@ -5585,76 +5797,103 @@ Bool RADEONEnterVT(int scrnIndex, int flags) xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "RADEONEnterVT\n"); @@ -5393,7 +5398,7 @@ index c0f5e7b..3c824a8 100644 } #endif /* this will get XVideo going again, but only if XVideo was initialised -@@ -5662,7 +5901,7 @@ Bool RADEONEnterVT(int scrnIndex, int flags) +@@ -5669,7 +5908,7 @@ Bool RADEONEnterVT(int scrnIndex, int flags) info->accel_state->XInited3D = FALSE; #ifdef XF86DRI @@ -5402,7 +5407,7 @@ index c0f5e7b..3c824a8 100644 if (info->ChipFamily >= CHIP_FAMILY_R600) R600LoadShaders(pScrn); RADEONCP_START(pScrn, info); -@@ -5687,26 +5926,28 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -5694,26 +5933,28 @@ void RADEONLeaveVT(int scrnIndex, int flags) "RADEONLeaveVT\n"); #ifdef XF86DRI if (RADEONPTR(pScrn)->directRenderingInited) { @@ -5446,7 +5451,7 @@ index c0f5e7b..3c824a8 100644 /* Make sure 3D clients will re-upload textures to video RAM */ if (info->dri->textureSize) { -@@ -5722,6 +5963,11 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -5729,6 +5970,11 @@ void RADEONLeaveVT(int scrnIndex, int flags) i = list[i].next; } while (i != 0); } @@ -5458,7 +5463,7 @@ index c0f5e7b..3c824a8 100644 } #endif -@@ -5748,10 +5994,18 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -5755,10 +6001,18 @@ void RADEONLeaveVT(int scrnIndex, int flags) xf86_hide_cursors (pScrn); @@ -5480,7 +5485,7 @@ index c0f5e7b..3c824a8 100644 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "Ok, leaving now...\n"); -@@ -5805,7 +6059,8 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) +@@ -5812,7 +6066,8 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) #endif /* USE_XAA */ if (pScrn->vtSema) { @@ -5490,7 +5495,7 @@ index c0f5e7b..3c824a8 100644 } xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -@@ -5840,6 +6095,12 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) +@@ -5847,6 +6102,12 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) info->DGAModes = NULL; xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "Unmapping memory\n"); @@ -8023,7 +8028,7 @@ index 0000000..67868cc + + diff --git a/src/radeon_probe.h b/src/radeon_probe.h -index afc8e21..6138f36 100644 +index a0c6b2c..30fee18 100644 --- a/src/radeon_probe.h +++ b/src/radeon_probe.h @@ -146,6 +146,27 @@ typedef struct diff --git a/sources b/sources index f4a5a58..105f04d 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -b4120aa6a3bb22a7a3bc83cba060675a xf86-video-ati-6.11.0.tar.bz2 +540b25842f8e09164cf4d2376995dc68 xf86-video-ati-6.12.0.tar.bz2 diff --git a/xorg-x11-drv-ati.spec b/xorg-x11-drv-ati.spec index f9dfef3..1a6580f 100644 --- a/xorg-x11-drv-ati.spec +++ b/xorg-x11-drv-ati.spec @@ -4,8 +4,8 @@ Summary: Xorg X11 ati video driver Name: xorg-x11-drv-ati -Version: 6.11.0 -Release: 10%{?dist} +Version: 6.12.0 +Release: 1%{?dist} URL: http://www.x.org License: MIT Group: User Interface/X Hardware Support @@ -14,7 +14,6 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) Source0: http://www.x.org/pub/individual/driver/%{tarball}-%{version}.tar.bz2 Source1: radeon.xinf -Patch0: radeon-6.11.0-git.patch Patch1: radeon-modeset.patch #Patch4: radeon-6.9.0-remove-limit-heuristics.patch Patch5: radeon-6.9.0-panel-size-sanity.patch @@ -40,7 +39,6 @@ X.Org X11 ati video driver. %prep %setup -q -n %{tarball}-%{version} -%patch0 -p1 -b .git %patch1 -p1 -b .modeset #patch4 -p1 -b .remove-limit-heuristics #%patch5 -p1 -b .panel-size @@ -76,6 +74,9 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man4/radeon.4* %changelog +* Sat Mar 14 2009 Dave Airlie 6.12.0-1 +- rebase to latest -ati upstream release + * Fri Mar 13 2009 Dave Airlie 6.11.0-10 - radeon-modeset.patch: merge patches into kms patch - radeon-6.11.0-git.patch: fix suspend/resume on r600