/*
   (c) Copyright 2000-2002  convergence integrated media GmbH.
   (c) Copyright 2002-2005  convergence GmbH.

   All rights reserved.

   Written by Denis Oliver Kropp <dok@directfb.org>,
              Andreas Hundt <andi@fischlustig.de>,
              Sven Neumann <neo@directfb.org>,
              Ville Syrjl <syrjala@sci.fi> and
              Claudio Ciccani <klan@users.sf.net>.

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, write to the
   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
*/

#include <config.h>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <directfb.h>

#include <direct/messages.h>
#include <direct/memcpy.h>

#include <core/coredefs.h>
#include <core/coretypes.h>

#include <core/gfxcard.h>
#include <core/surfaces.h>

#include "nvidia.h"
#include "nvidia_mmio.h"
#include "nvidia_2d.h"


static void
nv_copy32( volatile __u32 *dst, __u8 *src, int n )
{
     __u32 *D = (__u32*) dst;
     __u32 *S = (__u32*) src;
     
#ifdef ARCH_X86
     __asm__ __volatile__(
          "rep; movsl"
          : "=&D" (D), "=&S" (S)
          : "c" (n), "0" (D), "1" (S)
          : "memory" );
#else
     do {
          *D++ = *S++;
     } while (--n);
#endif
}

static void
nv_copy16( volatile __u32 *dst, __u8 *src, int n )
{
     __u32 *D = (__u32*) dst;
     __u16 *S = (__u16*) src;

#ifdef ARCH_X86
     __asm__ __volatile__(
          "rep; movsl"
          : "=&D" (D), "=&S" (S)
          : "c" (n/2), "0" (D), "1" (S)
          : "memory" );
#else
     int n2;
     for (n2 = n/2; n2--;) {
          *D++ = *((__u32*)S);
          S += 2;
     }
#endif

     if (n & 1) {
          register __u32 s = *S;
          *D = s;
     }
}

static inline bool
nv_clip_source( DFBRectangle *rect, __u32 width, __u32 height )
{
     if (rect->x >= width || rect->y >= height)
          return false;
          
     if (rect->x < 0) {
          rect->w += rect->x;
          rect->x  = 0;
     }
     if (rect->y < 0) {
          rect->h += rect->y;
          rect->y  = 0;
     }
               
     rect->w = MIN( rect->w, width  - rect->x );
     rect->h = MIN( rect->h, height - rect->y );
     
     return (rect->w > 0 && rect->h > 0);
}



bool nvFillRectangle2D( void *drv, void *dev, DFBRectangle *rect )
{
     NVidiaDriverData *nvdrv     = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev     = (NVidiaDeviceData*) dev;
     NVRectangle      *Rectangle = nvdrv->Rectangle;
     
     if (nvdev->dst_422) {
          rect->x /= 2;
          rect->w = (rect->w+1) >> 1;
     }

     nv_waitfifo( nvdrv, nvdev, 3 );
     Rectangle->Color       = nvdev->color2d;
     Rectangle->TopLeft     = (rect->y << 16) | (rect->x & 0xFFFF);
     Rectangle->WidthHeight = (rect->h << 16) | (rect->w & 0xFFFF);

     return true;
}

bool nvFillTriangle2D( void *drv, void *dev, DFBTriangle *tri )
{
     NVidiaDriverData *nvdrv    = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev    = (NVidiaDeviceData*) dev;
     NVTriangle       *Triangle = nvdrv->Triangle;
     
     nv_waitfifo( nvdrv, nvdev, 4 );
     Triangle->Color          = nvdev->color2d;
     Triangle->TrianglePoint0 = (tri->y1 << 16) | (tri->x1 & 0xFFFF);
     Triangle->TrianglePoint1 = (tri->y2 << 16) | (tri->x2 & 0xFFFF);
     Triangle->TrianglePoint2 = (tri->y3 << 16) | (tri->x3 & 0xFFFF);

     return true;
}

bool nvDrawRectangle2D( void *drv, void *dev, DFBRectangle *rect )
{
     NVidiaDriverData *nvdrv     = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev     = (NVidiaDeviceData*) dev;
     NVRectangle      *Rectangle = nvdrv->Rectangle;
     
     if (nvdev->dst_422) {
          rect->x /= 2;
          rect->w = (rect->w+1) >> 1;
     }
     
     nv_waitfifo( nvdrv, nvdev, 9 );
     Rectangle->Color       = nvdev->color2d;
     /* top */
     Rectangle->TopLeft     = (rect->y << 16) | (rect->x & 0xFFFF);
     Rectangle->WidthHeight = (1       << 16) | (rect->w & 0xFFFF);
     /* bottom */
     Rectangle->TopLeft     = ((rect->y + rect->h - 1) << 16) | (rect->x & 0xFFFF);
     Rectangle->WidthHeight = (1                       << 16) | (rect->w & 0xFFFF);
     /* left */
     Rectangle->TopLeft     = ((rect->y + 1) << 16) | (rect->x & 0xFFFF);
     Rectangle->WidthHeight = ((rect->h - 2) << 16) | 1;
     /* right */
     Rectangle->TopLeft     = ((rect->y + 1) << 16) | ((rect->x + rect->w - 1) & 0xFFFF);
     Rectangle->WidthHeight = ((rect->h - 2) << 16) | 1;

     return true;
}

bool nvDrawLine2D( void *drv, void *dev, DFBRegion *line )
{
     NVidiaDriverData *nvdrv = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev = (NVidiaDeviceData*) dev;
     NVLine           *Line  = nvdrv->Line;
     
     nv_waitfifo( nvdrv, nvdev, 3 );
     Line->Color         = nvdev->color2d;
     Line->Lin[0].point0 = (line->y1 << 16) | (line->x1 & 0xFFFF);
     Line->Lin[0].point1 = (line->y2 << 16) | (line->x2 & 0xFFFF);

     return true;
}

bool nvBlit( void *drv, void *dev, DFBRectangle *rect, int dx, int dy )
{
     NVidiaDriverData *nvdrv = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev = (NVidiaDeviceData*) dev;

     if (nvdev->blittingflags & DSBLIT_DEINTERLACE) {
          DFBRectangle dr = { dx, dy, rect->w, rect->h };
          return nvStretchBlit( drv, dev, rect, &dr );
     }
 
     if (nvdev->dst_422) {
          dx      /= 2;
          rect->x /= 2;
          rect->w  = (rect->w+1) >> 1;
     }
     
     if (nvdev->blittingflags || nvdev->src_format != nvdev->dst_format) {
          NVScaledImage *ScaledImage = nvdrv->ScaledImage;
          DFBRectangle  *clip        = &nvdev->clip;
          __u32          src_width   = (nvdev->src_width  + 1) & ~1;
          __u32          src_height  = (nvdev->src_height + 1) & ~1;
          __u32          filter      = 0;

          if (nvdev->dst_422)
               src_width >>= 1; 

          if (nvdev->arch > NV_ARCH_04)
               filter = SCALEDIMAGE_IN_FORMAT_ORIGIN_CORNER |
                        SCALEDIMAGE_IN_FORMAT_FILTER_NEAREST;

          nv_waitfifo( nvdrv, nvdev, 1 );
          ScaledImage->SetColorFormat = nvdev->scaler_format;
          
          nv_waitfifo( nvdrv, nvdev, 6 );
          ScaledImage->ClipPoint     = (clip->y << 16) | (clip->x & 0xFFFF);
          ScaledImage->ClipSize      = (clip->h << 16) | (clip->w & 0xFFFF);
          ScaledImage->ImageOutPoint = (dy      << 16) | (dx      & 0xFFFF);
          ScaledImage->ImageOutSize  = (rect->h << 16) | (rect->w & 0xFFFF);
          ScaledImage->DuDx          = 0x100000;
          ScaledImage->DvDy          = 0x100000;

          nv_waitfifo( nvdrv, nvdev, 4 );
          ScaledImage->ImageInSize   = (src_height << 16)  |
                                       (src_width & 0xFFFF);
          ScaledImage->ImageInFormat = (nvdev->src_pitch & 0xFFFF) | filter;
          ScaledImage->ImageInOffset = nvdev->src_offset;
          ScaledImage->ImageInPoint  = (rect->y << 20) | ((rect->x<<4) & 0xFFFF);
     }
     else {
          NVScreenBlt *ScreenBlt = nvdrv->ScreenBlt;

          nv_waitfifo( nvdrv, nvdev, 3 );
          ScreenBlt->TopLeftSrc  = (rect->y << 16) | (rect->x & 0xFFFF);
          ScreenBlt->TopLeftDst  = (dy      << 16) | (dx      & 0xFFFF);
          ScreenBlt->WidthHeight = (rect->h << 16) | (rect->w & 0xFFFF);
     }

     return true;
}   

bool nvBlitFromCPU( void *drv, void *dev, DFBRectangle *rect, int dx, int dy )
{
     NVidiaDriverData *nvdrv    = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev    = (NVidiaDeviceData*) dev;
     NVImageBlt       *ImageBlt = nvdrv->ImageBlt;
     __u8             *src      = nvdev->src_address;
     __u32             src_w;
     __u32             src_h;
     int               w, h;
     
     if (nvdev->blittingflags & DSBLIT_DEINTERLACE) {
          DFBRectangle dr = { dx, dy, rect->x, rect->y };
          return nvStretchBlitFromCPU( drv, dev, rect, &dr );
     }
     
     if (!nv_clip_source( rect, nvdev->src_width, nvdev->src_height ))
          return true;

     src_w = (DFB_BYTES_PER_PIXEL(nvdev->src_format) == 2)
             ? ((rect->w + 1) & ~1) : rect->w;
     src_h = rect->h;

     nv_waitfifo( nvdrv, nvdev, 1 );
     ImageBlt->SetColorFormat = nvdev->system_format;
     
     nv_waitfifo( nvdrv, nvdev, 3 );
     ImageBlt->Point   = (dy      << 16) | (dx      & 0xFFFF);
     ImageBlt->SizeOut = (rect->h << 16) | (rect->w & 0xFFFF);
     ImageBlt->SizeIn  = (src_h   << 16) | (src_w   & 0xFFFF);
     
     switch (nvdev->src_format) {
          case DSPF_ARGB1555:
          case DSPF_RGB16:
               src += rect->y * nvdev->src_pitch + rect->x * 2;
               for (h = rect->h; h--;) {
                    __u8 *S = src;
                    
                    for (w = rect->w; w > 255; w -= 256) {
                         nv_waitfifo( nvdrv, nvdev, 128 );
                         direct_memcpy( (void*)&ImageBlt->Pixel[0], S, 128*4 );
                         S += 128*4;
                    }
                    if (w > 0) {
                         nv_waitfifo( nvdrv, nvdev, (w+1)>>1 );
                         nv_copy16( &ImageBlt->Pixel[0], S, w );
                    }
                    
                    src += nvdev->src_pitch;
               }
               break;
               
          default:
               src += rect->y * nvdev->src_pitch + rect->x * 4;
               for (h = rect->h; h--;) {
                    __u8 *S = src;
                    
                    for (w = rect->w; w > 127; w -= 128) {
                         nv_waitfifo( nvdrv, nvdev, 128 );
                         direct_memcpy( (void*)&ImageBlt->Pixel[0], S, 128*4 );
                         S += 128*4;
                    }
                    if (w > 0) {
                         nv_waitfifo( nvdrv, nvdev, w );
                         nv_copy32( &ImageBlt->Pixel[0], S, w );
                    }
                    
                    src += nvdev->src_pitch;
               }
               break;
     }
     
     return true;
}

bool nvStretchBlit( void *drv, void *dev, DFBRectangle *sr, DFBRectangle *dr )
{
     NVidiaDriverData *nvdrv       = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev       = (NVidiaDeviceData*) dev;
     NVScaledImage    *ScaledImage = nvdrv->ScaledImage;
     DFBRectangle     *cr          = &nvdev->clip;
     __u32             src_width   = (nvdev->src_width  + 1) & ~1;
     __u32             src_height  = (nvdev->src_height + 1) & ~1;
     
     if (nvdev->dst_422) {
          sr->x /= 2;
          sr->w  = (sr->w+1) >> 1;
          dr->x /= 2;
          dr->w  = (dr->w+1) >> 1;
          src_width >>= 1;
     }

     if (nvdev->blittingflags & DSBLIT_DEINTERLACE) {
          sr->y /= 2;
          sr->h  = (sr->h+1) / 2;
     }

     nv_waitfifo( nvdrv, nvdev, 1 );
     ScaledImage->SetColorFormat = nvdev->scaler_format;
     
     nv_waitfifo( nvdrv, nvdev, 6 );
     ScaledImage->ClipPoint     = (cr->y << 16) | (cr->x & 0xFFFF);
     ScaledImage->ClipSize      = (cr->h << 16) | (cr->w & 0xFFFF);
     ScaledImage->ImageOutPoint = (dr->y << 16) | (dr->x & 0xFFFF);
     ScaledImage->ImageOutSize  = (dr->h << 16) | (dr->w & 0xFFFF);
     ScaledImage->DuDx          = (sr->w << 20) /  dr->w;
     ScaledImage->DvDy          = (sr->h << 20) /  dr->h;

     nv_waitfifo( nvdrv, nvdev, 4 );
     ScaledImage->ImageInSize   = (src_height << 16) | (src_width & 0xFFFF);
     ScaledImage->ImageInFormat = (nvdev->src_pitch & 0xFFFF) | 
                                   nvdev->scaler_filter;
     ScaledImage->ImageInOffset = nvdev->src_offset;
     ScaledImage->ImageInPoint  = (sr->y << 20) | ((sr->x << 4) & 0xFFFF);
     
     return true;
}

bool nvStretchBlitFromCPU( void *drv, void *dev, 
                           DFBRectangle *sr, DFBRectangle *dr )
{
     NVidiaDriverData *nvdrv          = (NVidiaDriverData*) drv;
     NVidiaDeviceData *nvdev          = (NVidiaDeviceData*) dev;
     NVStretchedImage *StretchedImage = nvdrv->StretchedImage;
     DFBRectangle     *cr             = &nvdev->clip; 
     __u8             *src            = nvdev->src_address;
     __u32             src_w;
     __u32             src_h;
     int               h, w;

     if (!nv_clip_source( sr, nvdev->src_width, nvdev->src_height ))
          return true;

     if (nvdev->blittingflags & DSBLIT_DEINTERLACE) {
          sr->y /= 2;
          sr->h /= 2;
     }
     
     src_w = (DFB_BYTES_PER_PIXEL(nvdev->src_format) == 2)
             ? ((sr->w + 1) & ~1) : sr->w;
     src_h = sr->h;

     nv_waitfifo( nvdrv, nvdev, 1 );
     StretchedImage->SetColorFormat = nvdev->system_format;
     
     nv_waitfifo( nvdrv, nvdev, 4 );
     StretchedImage->ImageInSize   = (src_h << 16) | (src_w & 0xFFFF);
     StretchedImage->DxDu          = (dr->w << 20) /  src_w;
     StretchedImage->DyDv          = (dr->h << 20) /  src_h;
     StretchedImage->ClipPoint     = (cr->y << 16) | (cr->x & 0xFFFF);
     StretchedImage->ClipSize      = (cr->h << 16) | (cr->w & 0xFFFF);
     StretchedImage->ImageOutPoint = (dr->y << 20) | ((dr->x<<4) & 0xFFFF);

     switch (nvdev->src_format) {
          case DSPF_ARGB1555:
          case DSPF_RGB16:
               src += sr->y * nvdev->src_pitch + sr->x * 2;
               for (h = sr->h; h--;) {
                    __u8 *S = src;
                    
                    for (w = sr->w; w > 255; w -= 256) {
                         nv_waitfifo( nvdrv, nvdev, 128 );
                         direct_memcpy( (void*)&StretchedImage->Pixel[0], S, 128*4 );
                         S += 128*4;
                    }
                    if (w > 0) {
                         nv_waitfifo( nvdrv, nvdev, (w+1)>>1 );
                         nv_copy16( &StretchedImage->Pixel[0], S, w );
                    }

                    src += nvdev->src_pitch;
               }
               break;
               
          default:
               src += sr->y * nvdev->src_pitch + sr->x * 4;
               for (h = sr->h; h--;) {
                    __u8 *S = src;
                    
                    for (w = sr->w; w > 127; w -= 128) {
                         nv_waitfifo( nvdrv, nvdev, 128 );
                         direct_memcpy( (void*)&StretchedImage->Pixel[0], S, 128*4 );
                         S += 128*4;
                    }
                    if (w > 0) {
                         nv_waitfifo( nvdrv, nvdev, w );
                         nv_copy32( &StretchedImage->Pixel[0], S, w );
                    }

                    src += nvdev->src_pitch;
               }
               break;
     }

     return true;
}

