summaryrefslogtreecommitdiff
path: root/src/radeon_render.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/radeon_render.c')
-rw-r--r--src/radeon_render.c1036
1 files changed, 1036 insertions, 0 deletions
diff --git a/src/radeon_render.c b/src/radeon_render.c
new file mode 100644
index 0000000..0fab39f
--- /dev/null
+++ b/src/radeon_render.c
@@ -0,0 +1,1036 @@
+/*
+ * Copyright 2004 Eric Anholt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <anholt@FreeBSD.org>
+ * Hui Yu <hyu@ati.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef USE_XAA
+
+#include "dixstruct.h"
+
+#include "xaa.h"
+#include "xaalocal.h"
+
+#ifndef RENDER_GENERIC_HELPER
+#define RENDER_GENERIC_HELPER
+
+struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ CARD32 blend_cntl;
+};
+
+/* The first part of blend_cntl corresponds to Fa from the render "protocol"
+ * document, and the second part to Fb.
+ */
+static const struct blendinfo RadeonBlendOp[] = {
+ /* Clear */
+ {0, 0, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* Src */
+ {0, 0, RADEON_SRC_BLEND_GL_ONE |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* Dst */
+ {0, 0, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ONE},
+ /* Over */
+ {0, 1, RADEON_SRC_BLEND_GL_ONE |
+ RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+ /* OverReverse */
+ {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
+ RADEON_DST_BLEND_GL_ONE},
+ /* In */
+ {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* InReverse */
+ {0, 1, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_SRC_ALPHA},
+ /* Out */
+ {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* OutReverse */
+ {0, 1, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+ /* Atop */
+ {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA |
+ RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+ /* AtopReverse */
+ {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
+ RADEON_DST_BLEND_GL_SRC_ALPHA},
+ /* Xor */
+ {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
+ RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+ /* Add */
+ {0, 0, RADEON_SRC_BLEND_GL_ONE |
+ RADEON_DST_BLEND_GL_ONE},
+ /* Saturate */
+ {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
+ RADEON_DST_BLEND_GL_ONE},
+ {0, 0, 0},
+ {0, 0, 0},
+ /* DisjointClear */
+ {0, 0, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* DisjointSrc */
+ {0, 0, RADEON_SRC_BLEND_GL_ONE |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* DisjointDst */
+ {0, 0, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ONE},
+ /* DisjointOver unsupported */
+ {0, 0, 0},
+ /* DisjointOverReverse */
+ {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
+ RADEON_DST_BLEND_GL_ONE},
+ /* DisjointIn unsupported */
+ {0, 0, 0},
+ /* DisjointInReverse unsupported */
+ {0, 0, 0},
+ /* DisjointOut unsupported */
+ {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* DisjointOutReverse unsupported */
+ {0, 0, 0},
+ /* DisjointAtop unsupported */
+ {0, 0, 0},
+ /* DisjointAtopReverse unsupported */
+ {0, 0, 0},
+ /* DisjointXor unsupported */
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ /* ConjointClear */
+ {0, 0, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* ConjointSrc */
+ {0, 0, RADEON_SRC_BLEND_GL_ONE |
+ RADEON_DST_BLEND_GL_ZERO},
+ /* ConjointDst */
+ {0, 0, RADEON_SRC_BLEND_GL_ZERO |
+ RADEON_DST_BLEND_GL_ONE},
+};
+#define RadeonOpMax (sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
+
+/* Note on texture formats:
+ * TXFORMAT_Y8 expands to (Y,Y,Y,1). TXFORMAT_I8 expands to (I,I,I,I)
+ * The RADEON and R200 TXFORMATS we use are the same on r100/r200.
+ */
+
+static CARD32 RADEONTextureFormats[] = {
+ PICT_a8r8g8b8,
+ PICT_a8,
+ PICT_x8r8g8b8,
+ PICT_r5g6b5,
+ PICT_a1r5g5b5,
+ PICT_x1r5g5b5,
+ 0
+};
+
+static CARD32 RADEONDstFormats[] = {
+ PICT_a8r8g8b8,
+ PICT_x8r8g8b8,
+ PICT_r5g6b5,
+ PICT_a1r5g5b5,
+ PICT_x1r5g5b5,
+ 0
+};
+
+static CARD32
+RadeonGetTextureFormat(CARD32 format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ return RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ case PICT_a8:
+ return RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ case PICT_x8r8g8b8:
+ return RADEON_TXFORMAT_ARGB8888;
+ case PICT_r5g6b5:
+ return RADEON_TXFORMAT_RGB565;
+ case PICT_a1r5g5b5:
+ return RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+ case PICT_x1r5g5b5:
+ return RADEON_TXFORMAT_ARGB1555;
+ default:
+ return 0;
+ }
+}
+
+static CARD32
+RadeonGetColorFormat(CARD32 format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ return RADEON_COLOR_FORMAT_ARGB8888;
+ case PICT_r5g6b5:
+ return RADEON_COLOR_FORMAT_RGB565;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ return RADEON_COLOR_FORMAT_ARGB1555;
+ default:
+ return 0;
+ }
+}
+
+/* Returns a RADEON_RB3D_BLENDCNTL value, or 0 if the operation is not
+ * supported
+ */
+static CARD32
+RadeonGetBlendCntl(CARD8 op, CARD32 dstFormat)
+{
+ CARD32 blend_cntl;
+
+ if (op >= RadeonOpMax || RadeonBlendOp[op].blend_cntl == 0)
+ return 0;
+
+ blend_cntl = RadeonBlendOp[op].blend_cntl;
+
+ if (RadeonBlendOp[op].dst_alpha && !PICT_FORMAT_A(dstFormat)) {
+ CARD32 srcblend = blend_cntl & RADEON_SRC_BLEND_MASK;
+
+ /* If there's no destination alpha channel, we need to wire the blending
+ * to treat the alpha channel as always 1.
+ */
+ if (srcblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA ||
+ srcblend == RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE)
+ blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
+ RADEON_SRC_BLEND_GL_ZERO;
+ else if (srcblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
+ blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
+ RADEON_SRC_BLEND_GL_ONE;
+ }
+
+ return blend_cntl;
+}
+
+static __inline__ CARD32 F_TO_DW(float val)
+{
+ union {
+ float f;
+ CARD32 l;
+ } tmp;
+ tmp.f = val;
+ return tmp.l;
+}
+
+/* Compute log base 2 of val. */
+static __inline__ int
+ATILog2(int val)
+{
+ int bits;
+
+ for (bits = 0; val != 0; val >>= 1, ++bits)
+ ;
+ return bits - 1;
+}
+
+static void
+RemoveLinear (FBLinearPtr linear)
+{
+ RADEONInfoPtr info = (RADEONInfoPtr)(linear->devPrivate.ptr);
+
+ info->RenderTex = NULL;
+}
+
+static void
+RenderCallback (ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if ((currentTime.milliseconds > info->RenderTimeout) && info->RenderTex) {
+ xf86FreeOffscreenLinear(info->RenderTex);
+ info->RenderTex = NULL;
+ }
+
+ if (!info->RenderTex)
+ info->RenderCallback = NULL;
+}
+
+static Bool
+AllocateLinear (
+ ScrnInfoPtr pScrn,
+ int sizeNeeded
+){
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ int cpp = info->CurrentLayout.bitsPerPixel / 8;
+
+ info->RenderTimeout = currentTime.milliseconds + 30000;
+ info->RenderCallback = RenderCallback;
+
+ /* XAA allocates in units of pixels at the screen bpp, so adjust size
+ * appropriately.
+ */
+ sizeNeeded = (sizeNeeded + cpp - 1) / cpp;
+
+ if (info->RenderTex) {
+ if (info->RenderTex->size >= sizeNeeded)
+ return TRUE;
+ else {
+ if (xf86ResizeOffscreenLinear(info->RenderTex, sizeNeeded))
+ return TRUE;
+
+ xf86FreeOffscreenLinear(info->RenderTex);
+ info->RenderTex = NULL;
+ }
+ }
+
+ info->RenderTex = xf86AllocateOffscreenLinear(pScrn->pScreen, sizeNeeded, 32,
+ NULL, RemoveLinear, info);
+
+ return (info->RenderTex != NULL);
+}
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+static Bool RADEONSetupRenderByteswap(ScrnInfoPtr pScrn, int tex_bytepp)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ unsigned char *RADEONMMIO = info->MMIO;
+ CARD32 swapper = info->ModeReg.surface_cntl;
+
+ swapper &= ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP |
+ RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP);
+
+ /* Set up byte swapping for the framebuffer aperture as needed */
+ switch (tex_bytepp) {
+ case 1:
+ break;
+ case 2:
+ swapper |= RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP;
+ break;
+ case 4:
+ swapper |= RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP;
+ break;
+ default:
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: Don't know what to do for "
+ "tex_bytepp == %d!\n", __func__, tex_bytepp);
+ return FALSE;
+ }
+ OUTREG(RADEON_SURFACE_CNTL, swapper);
+ return TRUE;
+}
+
+static void RADEONRestoreByteswap(RADEONInfoPtr info)
+{
+ unsigned char *RADEONMMIO = info->MMIO;
+
+ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
+}
+#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
+
+#endif /* RENDER_GENERIC_HELPER */
+
+#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
+#error Cannot define both MMIO and CP acceleration!
+#endif
+
+#if !defined(UNIXCPP) || defined(ANSICPP)
+#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
+#else
+#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
+#endif
+
+#ifdef ACCEL_MMIO
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
+#else
+#ifdef ACCEL_CP
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
+#else
+#error No accel type defined!
+#endif
+#endif
+
+static Bool FUNC_NAME(R100SetupTexture)(
+ ScrnInfoPtr pScrn,
+ CARD32 format,
+ CARD8 *src,
+ int src_pitch,
+ unsigned int width,
+ unsigned int height,
+ int flags)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ CARD8 *dst;
+ CARD32 tex_size = 0, txformat;
+ int dst_pitch, offset, size, i, tex_bytepp;
+#ifdef ACCEL_CP
+ CARD32 buf_pitch;
+ unsigned int hpass;
+ CARD8 *tmp_dst;
+#endif
+ ACCEL_PREAMBLE();
+
+ if ((width > 2048) || (height > 2048))
+ return FALSE;
+
+ txformat = RadeonGetTextureFormat(format);
+ tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
+
+#ifndef ACCEL_CP
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
+ "failed!\n", __func__);
+ return FALSE;
+ }
+#endif
+
+#endif
+
+ dst_pitch = (width * tex_bytepp + 63) & ~63;
+ size = dst_pitch * height;
+
+ if (!AllocateLinear(pScrn, size))
+ return FALSE;
+
+ if (flags & XAA_RENDER_REPEAT) {
+ txformat |= ATILog2(width) << RADEON_TXFORMAT_WIDTH_SHIFT;
+ txformat |= ATILog2(height) << RADEON_TXFORMAT_HEIGHT_SHIFT;
+ } else {
+ tex_size = ((height - 1) << 16) | (width - 1);
+ txformat |= RADEON_TXFORMAT_NON_POWER2;
+ }
+
+ offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
+ dst = (CARD8*)(info->FB + offset);
+
+ /* Upload texture to card. */
+
+#ifdef ACCEL_CP
+
+ while ( height )
+ {
+ tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
+ dst_pitch, &buf_pitch,
+ &dst, &height, &hpass);
+ RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
+ hpass, buf_pitch, src_pitch );
+ src += hpass * src_pitch;
+ }
+
+ RADEON_PURGE_CACHE();
+ RADEON_WAIT_UNTIL_IDLE();
+
+#else
+
+ i = height;
+
+ if (info->accel->NeedToSync)
+ info->accel->Sync(pScrn);
+
+ while(i--) {
+ memcpy(dst, src, width * tex_bytepp);
+ src += src_pitch;
+ dst += dst_pitch;
+ }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ RADEONRestoreByteswap(info);
+#endif
+
+#endif /* ACCEL_CP */
+
+ BEGIN_ACCEL(5);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, tex_size);
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, dst_pitch - 32);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, offset + info->fbLocation +
+ pScrn->fbOffset);
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_WRAP |
+ RADEON_CLAMP_T_WRAP);
+ FINISH_ACCEL();
+
+ return TRUE;
+}
+
+static Bool
+FUNC_NAME(R100SetupForCPUToScreenAlphaTexture) (
+ ScrnInfoPtr pScrn,
+ int op,
+ CARD16 red,
+ CARD16 green,
+ CARD16 blue,
+ CARD16 alpha,
+ CARD32 maskFormat,
+ CARD32 dstFormat,
+ CARD8 *alphaPtr,
+ int alphaPitch,
+ int width,
+ int height,
+ int flags
+)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ CARD32 colorformat, srccolor, blend_cntl;
+ ACCEL_PREAMBLE();
+
+ blend_cntl = RadeonGetBlendCntl(op, dstFormat);
+ if (blend_cntl == 0)
+ return FALSE;
+
+ if (!info->XInited3D)
+ RADEONInit3DEngine(pScrn);
+
+ if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
+ width, height, flags))
+ return FALSE;
+
+ colorformat = RadeonGetColorFormat(dstFormat);
+
+ srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
+ (green & 0xff00);
+
+ BEGIN_ACCEL(7);
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
+ OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE);
+ OUT_ACCEL_REG(RADEON_PP_TFACTOR_0, srccolor);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_A_TFACTOR_COLOR |
+ RADEON_COLOR_ARG_B_T0_ALPHA);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_A_TFACTOR_ALPHA |
+ RADEON_ALPHA_ARG_B_T0_ALPHA);
+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
+ RADEON_SE_VTX_FMT_ST0);
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
+ FINISH_ACCEL();
+
+ return TRUE;
+}
+
+
+static Bool
+FUNC_NAME(R100SetupForCPUToScreenTexture) (
+ ScrnInfoPtr pScrn,
+ int op,
+ CARD32 srcFormat,
+ CARD32 dstFormat,
+ CARD8 *texPtr,
+ int texPitch,
+ int width,
+ int height,
+ int flags
+)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ CARD32 colorformat, blend_cntl;
+ ACCEL_PREAMBLE();
+
+ blend_cntl = RadeonGetBlendCntl(op, dstFormat);
+ if (blend_cntl == 0)
+ return FALSE;
+
+ if (!info->XInited3D)
+ RADEONInit3DEngine(pScrn);
+
+ if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
+ height, flags))
+ return FALSE;
+
+ colorformat = RadeonGetColorFormat(dstFormat);
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
+ OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE);
+ if (srcFormat != PICT_a8)
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_T0_COLOR);
+ else
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_ZERO);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_C_T0_ALPHA);
+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
+ RADEON_SE_VTX_FMT_ST0);
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
+ FINISH_ACCEL();
+
+ return TRUE;
+}
+
+
+static void
+FUNC_NAME(R100SubsequentCPUToScreenTexture) (
+ ScrnInfoPtr pScrn,
+ int dstx,
+ int dsty,
+ int srcx,
+ int srcy,
+ int width,
+ int height
+)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ int byteshift;
+ CARD32 fboffset;
+ float l, t, r, b, fl, fr, ft, fb;
+
+ ACCEL_PREAMBLE();
+
+ /* Note: we can't simply set up the 3D surface at the same location as the
+ * front buffer, because the 2048x2048 limit on coordinates may be smaller
+ * than the (MergedFB) screen.
+ * Can't use arbitrary offsets for color tiling
+ */
+ if (info->tilingEnabled) {
+ /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
+ fboffset = info->fbLocation + pScrn->fbOffset +
+ (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
+ l = dstx;
+ t = (dsty % 16);
+ }
+ else {
+ byteshift = (pScrn->bitsPerPixel >> 4);
+ fboffset = (info->fbLocation + pScrn->fbOffset +
+ ((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
+ l = ((dstx << byteshift) % 16) >> byteshift;
+ t = 0.0;
+ }
+
+ r = width + l;
+ b = height + t;
+ fl = srcx;
+ fr = srcx + width;
+ ft = srcy;
+ fb = srcy + height;
+
+#ifdef ACCEL_CP
+ BEGIN_RING(25);
+
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
+ ((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
+ OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 17));
+ /* RADEON_SE_VTX_FMT */
+ OUT_RING(RADEON_CP_VC_FRMT_XY |
+ RADEON_CP_VC_FRMT_ST0);
+ /* SE_VF_CNTL */
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+ OUT_RING(F_TO_DW(l));
+ OUT_RING(F_TO_DW(t));
+ OUT_RING(F_TO_DW(fl));
+ OUT_RING(F_TO_DW(ft));
+
+ OUT_RING(F_TO_DW(r));
+ OUT_RING(F_TO_DW(t));
+ OUT_RING(F_TO_DW(fr));
+ OUT_RING(F_TO_DW(ft));
+
+ OUT_RING(F_TO_DW(r));
+ OUT_RING(F_TO_DW(b));
+ OUT_RING(F_TO_DW(fr));
+ OUT_RING(F_TO_DW(fb));
+
+ OUT_RING(F_TO_DW(l));
+ OUT_RING(F_TO_DW(b));
+ OUT_RING(F_TO_DW(fl));
+ OUT_RING(F_TO_DW(fb));
+
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+
+ ADVANCE_RING();
+#else
+ BEGIN_ACCEL(20);
+
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
+ ((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
+
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
+ RADEON_VF_PRIM_WALK_DATA |
+ RADEON_VF_RADEON_MODE |
+ (4 << RADEON_VF_NUM_VERTICES_SHIFT));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
+
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
+#endif
+
+}
+
+static Bool FUNC_NAME(R200SetupTexture)(
+ ScrnInfoPtr pScrn,
+ CARD32 format,
+ CARD8 *src,
+ int src_pitch,
+ unsigned int width,
+ unsigned int height,
+ int flags)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ CARD8 *dst;
+ CARD32 tex_size = 0, txformat;
+ int dst_pitch, offset, size, i, tex_bytepp;
+#ifdef ACCEL_CP
+ CARD32 buf_pitch;
+ unsigned int hpass;
+ CARD8 *tmp_dst;
+#endif
+ ACCEL_PREAMBLE();
+
+ if ((width > 2048) || (height > 2048))
+ return FALSE;
+
+ txformat = RadeonGetTextureFormat(format);
+ tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
+
+#ifndef ACCEL_CP
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
+ "failed!\n", __func__);
+ return FALSE;
+ }
+#endif
+
+#endif
+
+ dst_pitch = (width * tex_bytepp + 63) & ~63;
+ size = dst_pitch * height;
+
+ if (!AllocateLinear(pScrn, size))
+ return FALSE;
+
+ if (flags & XAA_RENDER_REPEAT) {
+ txformat |= ATILog2(width) << R200_TXFORMAT_WIDTH_SHIFT;
+ txformat |= ATILog2(height) << R200_TXFORMAT_HEIGHT_SHIFT;
+ } else {
+ tex_size = ((height - 1) << 16) | (width - 1);
+ txformat |= RADEON_TXFORMAT_NON_POWER2;
+ }
+
+ offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
+ dst = (CARD8*)(info->FB + offset);
+
+ /* Upload texture to card. */
+
+#ifdef ACCEL_CP
+
+ while ( height )
+ {
+ tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
+ dst_pitch, &buf_pitch,
+ &dst, &height, &hpass );
+ RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
+ hpass, buf_pitch, src_pitch );
+ src += hpass * src_pitch;
+ }
+
+ RADEON_PURGE_CACHE();
+ RADEON_WAIT_UNTIL_IDLE();
+
+#else
+
+ i = height;
+ if (info->accel->NeedToSync)
+ info->accel->Sync(pScrn);
+
+ while(i--) {
+ memcpy(dst, src, width * tex_bytepp);
+ src += src_pitch;
+ dst += dst_pitch;
+ }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ RADEONRestoreByteswap(info);
+#endif
+
+#endif /* ACCEL_CP */
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0, tex_size);
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, dst_pitch - 32);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, offset + info->fbLocation +
+ pScrn->fbOffset);
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, R200_MAG_FILTER_NEAREST |
+ R200_MIN_FILTER_NEAREST |
+ R200_CLAMP_S_WRAP |
+ R200_CLAMP_T_WRAP);
+ FINISH_ACCEL();
+
+ return TRUE;
+}
+
+static Bool
+FUNC_NAME(R200SetupForCPUToScreenAlphaTexture) (
+ ScrnInfoPtr pScrn,
+ int op,
+ CARD16 red,
+ CARD16 green,
+ CARD16 blue,
+ CARD16 alpha,
+ CARD32 maskFormat,
+ CARD32 dstFormat,
+ CARD8 *alphaPtr,
+ int alphaPitch,
+ int width,
+ int height,
+ int flags
+)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ CARD32 colorformat, srccolor, blend_cntl;
+ ACCEL_PREAMBLE();
+
+ blend_cntl = RadeonGetBlendCntl(op, dstFormat);
+ if (blend_cntl == 0)
+ return FALSE;
+
+ if (!info->XInited3D)
+ RADEONInit3DEngine(pScrn);
+
+ if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
+ width, height, flags))
+ return FALSE;
+
+ colorformat = RadeonGetColorFormat(dstFormat);
+
+ srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
+ (green & 0xff00);
+
+ BEGIN_ACCEL(10);
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
+ OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE);
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, srccolor);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_ARG_B_R0_ALPHA);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_A_TFACTOR_ALPHA |
+ R200_TXA_ARG_B_R0_ALPHA);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
+ FINISH_ACCEL();
+
+ return TRUE;
+}
+
+static Bool
+FUNC_NAME(R200SetupForCPUToScreenTexture) (
+ ScrnInfoPtr pScrn,
+ int op,
+ CARD32 srcFormat,
+ CARD32 dstFormat,
+ CARD8 *texPtr,
+ int texPitch,
+ int width,
+ int height,
+ int flags
+)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ CARD32 colorformat, blend_cntl;
+ ACCEL_PREAMBLE();
+
+ blend_cntl = RadeonGetBlendCntl(op, dstFormat);
+ if (blend_cntl == 0)
+ return FALSE;
+
+ if (!info->XInited3D)
+ RADEONInit3DEngine(pScrn);
+
+ if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
+ height, flags))
+ return FALSE;
+
+ colorformat = RadeonGetColorFormat(dstFormat);
+
+ BEGIN_ACCEL(9);
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
+ OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE);
+ if (srcFormat != PICT_a8)
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_R0_COLOR);
+ else
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_ZERO);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_C_R0_ALPHA);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
+ FINISH_ACCEL();
+
+ return TRUE;
+}
+
+static void
+FUNC_NAME(R200SubsequentCPUToScreenTexture) (
+ ScrnInfoPtr pScrn,
+ int dstx,
+ int dsty,
+ int srcx,
+ int srcy,
+ int width,
+ int height
+)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ int byteshift;
+ CARD32 fboffset;
+ float l, t, r, b, fl, fr, ft, fb;
+ ACCEL_PREAMBLE();
+
+ /* Note: we can't simply set up the 3D surface at the same location as the
+ * front buffer, because the 2048x2048 limit on coordinates may be smaller
+ * than the (MergedFB) screen.
+ * Can't use arbitrary offsets for color tiling
+ */
+ if (info->tilingEnabled) {
+ /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
+ fboffset = info->fbLocation + pScrn->fbOffset +
+ (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
+ l = dstx;
+ t = (dsty % 16);
+ }
+ else {
+ byteshift = (pScrn->bitsPerPixel >> 4);
+ fboffset = (info->fbLocation + pScrn->fbOffset +
+ ((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
+ l = ((dstx << byteshift) % 16) >> byteshift;
+ t = 0.0;
+ }
+
+ r = width + l;
+ b = height + t;
+ fl = srcx;
+ fr = srcx + width;
+ ft = srcy;
+ fb = srcy + height;
+
+#ifdef ACCEL_CP
+ BEGIN_RING(24);
+
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
+ ((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
+
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 16));
+ /* RADEON_SE_VF_CNTL */
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+ OUT_RING(F_TO_DW(l));
+ OUT_RING(F_TO_DW(t));
+ OUT_RING(F_TO_DW(fl));
+ OUT_RING(F_TO_DW(ft));
+
+ OUT_RING(F_TO_DW(r));
+ OUT_RING(F_TO_DW(t));
+ OUT_RING(F_TO_DW(fr));
+ OUT_RING(F_TO_DW(ft));
+
+ OUT_RING(F_TO_DW(r));
+ OUT_RING(F_TO_DW(b));
+ OUT_RING(F_TO_DW(fr));
+ OUT_RING(F_TO_DW(fb));
+
+ OUT_RING(F_TO_DW(l));
+ OUT_RING(F_TO_DW(b));
+ OUT_RING(F_TO_DW(fl));
+ OUT_RING(F_TO_DW(fb));
+
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+
+ ADVANCE_RING();
+#else
+ BEGIN_ACCEL(20);
+
+ /* Note: we can't simply setup 3D surface at the same location as the front buffer,
+ some apps may draw offscreen pictures out of the limitation of radeon 3D surface.
+ */
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
+ ((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
+
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ 4 << RADEON_VF_NUM_VERTICES_SHIFT));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
+
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
+ OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
+
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+
+ FINISH_ACCEL();
+#endif
+}
+
+#undef FUNC_NAME
+#endif /* USE_XAA */