/**************************************************************************
* DSemu: Scanline-based tiled and bitmap graphics (gpu.c)                 *
* Released under the terms of the BSD Public Licence                      *
* gladius, 2004-5; ported to DSemu by Imran Nazar                         *
**************************************************************************/

#include <windows.h>
#include <stdio.h>
#include "err.h"
#include "defs.h"
#include "dsioreg.h"
#include "gpu.h"
#include "dma.h"
#include "vtbl.h"
#include "int.h"
#include "res.h"

//#define GPUDEBUG

u16 *VRAM;
u8  *VRAM8;

u16 OAM[1024];
u16 GPUPAL[1024];
u8 *GPUPAL8=(u8*)GPUPAL;

u32 BGX[2][2],BGY[2][2];
u8 mospy[2][4];

u16 curline[2];
__int64 framecount;

extern u32 VRAMmap[553];

int sprsize[16][2]={
    { 8, 8},{16,16},{32,32},{64,64},
    {16, 8},{32, 8},{32,16},{64,32},
    { 8,16},{ 8,32},{16,32},{32,64},
    {-1,-1},{-1,-1},{-1,-1},{-1,-1},
};

int GPUinit()
{
    u8 r,g,b; char str[50]; int a;

    VRAM = (u16*)malloc(1024*1024);
    if(!VRAM) RETFAIL("GPU: FAIL: VRAM allocation.");
    logvt->append("GPU: VRAM allocated.");
    VRAM8=(u8*)VRAM;

    GPUreset();

    RETPASS("GPU: Initialised.");
}

void GPUfini()
{
    char str[2048];
    free(VRAM);
    sprintf(str,"GPU: Shutdown after %lld frames.",framecount);
    logvt->append(str);
}

void GPUreset()
{
    int a;
    memset(VRAM, 0, 1024*1024);
    memset(GPUPAL,0, 1024*2);
    memset(OAM, 0, 512*2);
    curline[0]=0; curline[1]=0;
    BGX[0][0]=0; BGY[0][0]=0;
    BGX[0][1]=0; BGY[0][1]=0;
    BGX[1][0]=0; BGY[1][0]=0;
    BGX[1][1]=0; BGY[1][1]=0;
    REG(BG2PA)=256; REG(BG2PD)=256;
    REG(BG3PA)=256; REG(BG3PD)=256;
    REG(BG2PA+0x800)=256; REG(BG2PD+0x800)=256;
    REG(BG3PA+0x800)=256; REG(BG3PD+0x800)=256;
    framecount=0;
    for(a=0;a<4;a++) mospy[0][a]=0;
    for(a=0;a<4;a++) mospy[1][a]=0;
}

void GPUpal(u16 *pbuf)
{
    int a,b,c,d; u32 xoff=0,yoff=8,idx=0;
    dbgOut(pbuf,264,8,"BGPAL_Main",0,0,0x7FFF);
    for(a=0;a<16;a++)
    {
        for(b=0;b<16;b++)
        {
	    for(c=0;c<8;c++)
	    {
	        for(d=0;d<8;d++)
	        {
	            pbuf[yoff*264+xoff]=GPUPAL[idx]; xoff++;
	        }
                yoff++; xoff-=8;
	    }
	    idx++; xoff+=8; yoff-=8;
	}
	xoff=0; yoff+=8;
    }

    xoff=136; yoff=8; idx=0;
    dbgOut(pbuf,264,8,"OBJPAL_Main",136,0,0x7FFF);
    for(a=0;a<16;a++)
    {
        for(b=0;b<16;b++)
        {
	    for(c=0;c<8;c++)
	    {
	        for(d=0;d<8;d++)
	        {
	            pbuf[yoff*264+xoff]=GPUPAL[256+idx]; xoff++;
	        }
                yoff++; xoff-=8;
	    }
	    idx++; xoff+=8; yoff-=8;
	}
	xoff=136; yoff+=8;
    }

    xoff=0; yoff=168; idx=0;
    dbgOut(pbuf,264,8,"BGPAL_Sub",0,160,0x7FFF);
    for(a=0;a<16;a++)
    {
        for(b=0;b<16;b++)
        {
	    for(c=0;c<8;c++)
	    {
	        for(d=0;d<8;d++)
	        {
	            pbuf[yoff*264+xoff]=GPUPAL[512+idx]; xoff++;
	        }
                yoff++; xoff-=8;
	    }
	    idx++; xoff+=8; yoff-=8;
	}
	xoff=0; yoff+=8;
    }

    xoff=136; yoff=168; idx=0;
    dbgOut(pbuf,264,8,"OBJPAL_Sub",136,160,0x7FFF);
    for(a=0;a<16;a++)
    {
        for(b=0;b<16;b++)
        {
	    for(c=0;c<8;c++)
	    {
	        for(d=0;d<8;d++)
	        {
	            pbuf[yoff*264+xoff]=GPUPAL[768+idx]; xoff++;
	        }
                yoff++; xoff-=8;
	    }
	    idx++; xoff+=8; yoff-=8;
	}
	xoff=136; yoff+=8;
    }
    
    for(a=0;a<264;a++) pbuf[156*264+a]=0x7FFF;
    for(a=0;a<312;a++) pbuf[a*264+132]=0x7FFF;
}

void GPUpalcol(u16 *pbuf, int idx)
{
    char str[80];
    u16 col; u8 r,g,b,a;
    col=GPUPAL[idx&1023];
    r=(col&0x001F); g=(col&0x03E0)>>5; b=(col&0x7C00)>>10;
    for(a=0;a<8;a++)
    {
        dbgOutClear(pbuf+264*(144+a),132,1);
        dbgOutClear(pbuf+264*(144+a)+136,132,1);
    }    
    dbgOutClear(pbuf+264*304,264,8);
    sprintf(str,"R:%02Xh  G:%02Xh  B:%02Xh",r,g,b);
    dbgOut(pbuf,264,8,str,(idx&256)?136:0,(idx&512)?304:144,0x7FFF);
}

//---START: Gladius' code--------------------------------------------------

void GPUscanTile(u16 *scanline, int lcd, int bg)
{
    u16 bgcnt, scrnBase, charBase, hofs, vofs, x, y, tileIdx, tileY;
    u16 tmpTileIdx, width, height, a, bgx, bgy, tileChar; u8 b, palnum;
    u8 mosx, mospx, mosy; u16 *vram; u8 *vram8; char str[100];

//    sprintf(str,"GPU: LCD%d: Scanline #%d: BG%d.",lcd,curline[lcd],bg);
//    logvt->append(str);

    bgcnt=VREG(BG0CNT+bg);
    switch((bgcnt>>14)&3)
    {
        case 0: width=256; height=256; break;
        case 1: width=512; height=256; break;
        case 2: width=256; height=512; break;
        case 3: width=512; height=512; break;
    }
    scrnBase=((bgcnt>>8)&0x1F)*0x400;
    charBase=((bgcnt>>2)&0x3)*0x4000;
    hofs=VREG(BG0HOFS+bg*2);
    vofs=VREG(BG0VOFS+bg*2);
    vram=VRAM+(VRAMmap[lcd*128]<<13);
    vram8=VRAM8+(VRAMmap[lcd*128]<<14);
    if(bgcnt&0x0040)
    {
        mosx=(VREG(MOSAIC)&0x000F);
        mosy=((VREG(MOSAIC)&0x00F0)>>4);
        mospx=mosx; if(!curline[lcd]) mospy[lcd][bg]=mosy;
        mospy[lcd][bg]++; if(mospy[lcd][bg]<=mosy) vofs-=mospy[lcd][bg];
        else mospy[lcd][bg]=0;
    } else { mospx=0; mosx=0; }
    if(bgcnt&0x0080)
    {
        bgy=((curline[lcd]+vofs)&(height-1))/8;
        tileIdx=scrnBase+(((bgy&31)*32));
        switch((bgcnt>>14)&3)
        {
            case 2: if(bgy>=32) tileIdx += 32*32; break;
            case 3: if(bgy>=32) tileIdx += 32*32*2; break;
        }
        tileY=((curline[lcd]+vofs)&7)*8;
//        sprintf(str,"lcd:%d line:%03d bg:%1d(8b) tileIdx:%08X tileY:%08X",(lcd),curline[(lcd)],(bg),tileIdx,tileY);
//        logvt->append(str);
        for(a=0;a<256;a++)
        {
            bgx=((a+hofs)&(width-1))/8;
            tmpTileIdx=tileIdx+((bgx&31));
            if(bgx>=32) tmpTileIdx += 32*32;
            tileChar=vram[tmpTileIdx];
            x=(a+hofs)&7; y=tileY;
            if(tileChar&0x0400) x=7-x;
            if(tileChar&0x0800) y=56-y;
            if(mospx>=mosx) b=vram8[charBase+((tileChar&0x03FF)*64)+x+y];
            mospx++; if(mospx>mosx) mospx=0;
            if(b) scanline[a]=GPUPAL[lcd*512+b];
        }
    } else {
        bgy=((curline[lcd]+vofs)&(height-1))/8;
        tileIdx=scrnBase+(((bgy&31)*32));
        switch((bgcnt>>14)&3)
        {
            case 2: if(bgy>=32) tileIdx += 32*32; break;
            case 3: if(bgy>=32) tileIdx += 32*32*2; break;
        }
        tileY=((curline[lcd]+vofs)&7)*4;
//        sprintf(str,"lcd:%d line:%03d bg:%1d(8b) tileIdx:%08X tileY:%08X",(lcd),curline[(lcd)],(bg),tileIdx,tileY);
//        logvt->append(str);
        for(a=0;a<256;a++)
        {
            bgx=((a+hofs)&(width-1))/8;
            tmpTileIdx=tileIdx+((bgx&31));
            if(bgx>=32) tmpTileIdx += 32*32;
            tileChar=vram[tmpTileIdx];
            x=(a+hofs)&7; y=tileY;
            if(tileChar&0x0400) x=7-x;
            if(tileChar&0x0800) y=28-y;
            if(mospx>=mosx)
            {
                b=vram8[charBase+((tileChar&0x03FF)*32)+(x/2)+y];
                if(x&1) b>>=4; b&=15;
            }
            palnum=((tileChar>>12)&15)*16;
            mospx++; if(mospx>mosx) mospx=0;
            if(b) scanline[a]=GPUPAL[lcd*512+b+palnum];
        }
    }
}

void GPUscanRot(u16 *scanline, int lcd, int bg)
{
    u16 bgcnt, scrnBase, charBase, hofs, vofs, tileIdx, tileY;
    u32 x,y; u16 ax,ay; s16 dx,dy; int trans; u8 mosx, mospx, mosy;
    u16 tmpTileIdx, width, height, a, bgx, bgy, tileChar; u8 b, palnum;
    u16 *vram; u8 *vram8;
    vram=VRAM+(VRAMmap[lcd*128]<<13);
    vram8=VRAM8+(VRAMmap[lcd*128]<<14);
    bgcnt=VREG(BG0CNT+bg); bg-=2;
    trans=(bgcnt&0x2000)==0;
    switch((bgcnt>>14)&3)
    {
        case 0: width=128; height=128; break;
        case 1: width=256; height=256; break;
        case 2: width=512; height=512; break;
        case 3: width=1024; height=1024; break;
    }
    scrnBase=((bgcnt>>8)&0x1F)*0x800;
    charBase=((bgcnt>>2)&0x3)*0x4000;
    x=BGX[lcd][bg]; y=BGY[lcd][bg];
    dx=VREG(BG2PA+(bg*8));
    dy=VREG(BG2PC+(bg*8));
    if(bgcnt&0x0040)
    {
        mosx=(VREG(MOSAIC)&0x000F);
        mosy=((VREG(MOSAIC)&0x00F0)>>4);
        mospx=mosx; if(!curline[lcd]) mospy[lcd][bg]=mosy;
        mospy[lcd][bg]++; if(mospy[lcd][bg]<=mosy) y-=mospy[lcd][bg]*256;
        else mospy[lcd][bg]=0;
    } else { mospx=0; mosx=0; }
    for(a=0;a<256;a++)
    {
        ax=x>>8; ay=y>>8;
        if ((ax >= 0 && ax < width && ay >= 0 && ay < height) || !trans)
        {
            tmpTileIdx=scrnBase+(((ay&(height-1))/8)*(width/8)+((ax&(width-1))/8));
            tileChar=vram8[tmpTileIdx];
            if(mospx==mosx) b=vram8[charBase+tileChar*64+((ay&7)*8)+(ax&7)];
            mospx+=(dx>>8); if(mospx>mosx) mospx=0;
            if(b) scanline[a]=GPUPAL[lcd*512+b];
        }
        x+=dx; y+=dy;
    }
}

void GPUsprites(u16 *scanline, int lcd, int pri)
{
    u16 dispcnt=VREG(DISPCNT), oamNum, attr0,attr1,attr2;
    int x, y, i, width=-1, height=-1, rwidth, rheight, scale=1;
    int spritey, baseSprite, baseInc, curIdx, palIdx; u8 b;
    s16 dx,dmx,rx,dy,dmy,ry; int tx,ty,cx,cy,pitch,rotScaleParam;
    char str[100];
    u16 *vram; u8 *vram8; u16 *oam;
    vram=VRAM+(VRAMmap[lcd*128]<<13);
    vram8=VRAM8+(VRAMmap[lcd*128]<<14);
    oam=OAM+lcd*512;

    if(!(dispcnt&0x1000)) return;
    pri<<=10;
    oamNum=128; do
    {
        oamNum--;
        attr2=oam[oamNum*4+2];
        if((attr2&0x0C00)!=pri) continue;
        attr0=oam[oamNum*4+0];
        attr1=oam[oamNum*4+1];
        x=attr1&0x01FF; y=attr0&0x00FF;

                switch ((attr0 >> 14) & 3)
                {
                    case 0:
                        // Square
                        switch ((attr1 >> 14) & 3)
                        {
                            case 0: width = 8; height = 8; break;
                            case 1: width = 16; height = 16; break;
                            case 2: width = 32; height = 32; break;
                            case 3: width = 64; height = 64; break;
                        }
                        break;
                    case 1:
                        // Horizontal Rectangle
                        switch ((attr1 >> 14) & 3)
                        {
                            case 0: width = 16; height = 8; break;
                            case 1: width = 32; height = 8; break;
                            case 2: width = 32; height = 16; break;
                            case 3: width = 64; height = 32; break;
                        }
                        break;
                    case 2:
                        // Vertical Rectangle
                        switch ((attr1 >> 14) & 3)
                        {
                            case 0: width = 8; height = 16; break;
                            case 1: width = 8; height = 32; break;
                            case 2: width = 16; height = 32; break;
                            case 3: width = 32; height = 64; break;
                        }
                        break;
                }

                rwidth=width; rheight=height;
                if (attr0&0x0100)
                    // Rot-scale on
                    if (attr0&0x0200) { rwidth*=2; rheight*=2; }
                else
                    // Invalid sprite
                    if (attr0&0x0200) width = -1;

                if (width == -1)
                    // Invalid sprite
                    continue;

                // Y clipping
                if (y > ((y + rheight) & 0xff))
                    if (curline[lcd] >= ((y + rheight) & 0xff) && !(y < curline[lcd])) continue;
                else
                    if (curline[lcd] < y || curline[lcd] >= ((y + rheight) & 0xff)) continue;

                if (attr0&0x2000) scale = 2;

                spritey = curline[lcd] - y;
                if (spritey < 0) spritey += 256;

                if (!(attr0&0x0100))
                {
                    if (attr1&0x2000) spritey = (height - 1) - spritey;

                    if (dispcnt&0x0040)
                        // 1 dimensional
                        baseSprite = (attr2 & 0x3FF) + ((spritey / 8) * (width / 8)) * scale;
                    else
                        // 2 dimensional
                        baseSprite = (attr2 & 0x3FF) + ((spritey / 8) * 0x20);

                    baseInc = scale;
                    if (attr1&0x1000)
                    {
                        baseSprite += ((width / 8) * scale) - scale;
                        baseInc = -baseInc;
                    }

                    if (attr0&0x2000)
                    {
                        // 256 colors
//                        sprintf(str,"GPU: Line #%d: Sprite #%d: %dx%d, non-rotating, 8-bit",curline,oamNum,width,height);
//                        logvt->append(str);
                        for (i = x; i < x + width; i++)
                        {
                            if ((i & 0x1ff) < 240 && spritey >= 0 && spritey < height)
                            {
                                tx = (i - x) & 7;
                                if (attr1&0x1000) tx = 7 - tx;
                                curIdx = baseSprite * 32 + ((spritey & 7) * 8) + tx;
                                b = vram8[0x10000 + curIdx];
                                if (b) scanline[i&0x1ff]=GPUPAL[lcd*512+256+b];
                            }
                            if (((i - x) & 7) == 7) baseSprite += baseInc;
                        }
                    }
                    else
                    {
                        // 16 colors
//                        sprintf(str,"GPU: Line #%d: Sprite #%d: %dx%d, non-rotating, 4-bit",curline,oamNum,width,height);
//                        logvt->append(str);
                        palIdx = ((attr2 >> 8) & 0xF0);
                        for (i = x; i < x + width; i++)
                        {
                            if ((i & 0x1ff) < 240 && spritey >= 0 && spritey < height)
                            {
                                tx = (i - x) & 7;
                                if ((attr1 & (1 << 12)) != 0) tx = 7 - tx;
                                curIdx = baseSprite * 32 + ((spritey & 7) * 4) + (tx / 2);
                                b = vram8[0x10000 + curIdx];
                                if (tx & 1) b>>=4; b&=15;
                                if(b) scanline[i&0x1ff]=GPUPAL[lcd*512+256+palIdx+b];
                            }
                            if (((i - x) & 7) == 7) baseSprite += baseInc;
                        }
                    }
                }
                else
                {
                    rotScaleParam = (attr1 >> 9) & 0x1F;

                    dx =  (s16)oam[(rotScaleParam * 4 * 4) + 3];
                    dmx = (s16)oam[(rotScaleParam * 4 * 4) + 7];
                    dy =  (s16)oam[(rotScaleParam * 4 * 4) + 11];
                    dmy = (s16)oam[(rotScaleParam * 4 * 4) + 15];

                    cx = rwidth / 2;
                    cy = rheight / 2;

                    baseSprite = attr2 & 0x3FF;

                    if (dispcnt&0x0040)
                        // 1 dimensional
                        pitch = (width / 8) * scale;
                    else
                        // 2 dimensional
                        pitch = 0x20;

                    rx = (s16)((dmx * (spritey - cy)) - (cx * dx) + (width << 7));
                    ry = (s16)((dmy * (spritey - cy)) - (cx * dy) + (height << 7));

                    // Draw a rot/scale sprite
                    if (attr0&0x2000)
                    {
//                        sprintf(str,"GPU: Line #%d: Sprite #%d: %dx%d, rotating, 8-bit",curline,oamNum,width,height);
//                        logvt->append(str);
                        // 256 colors
                        for (i = x; i < x + rwidth; i++)
                        {
                            tx = rx >> 8;
                            ty = ry >> 8;

                            if ((i & 0x1ff) < 240 && tx >= 0 && tx < width && ty >= 0 && ty < height)
                            {
                                curIdx = (baseSprite + ((ty / 8) * pitch) + ((tx / 8) * scale)) * 32 + ((ty & 7) * 8) + (tx & 7);
                                b = vram8[0x10000 + curIdx];
                                if(b) scanline[i&0x1ff]=GPUPAL[lcd*512+256+b];
                            }

                            rx += dx;
                            ry += dy;
                        }
                    }
                    else
                    {
                        // 16 colors
//                        sprintf(str,"GPU: Line #%d: Sprite #%d: %dx%d, rotating, 8-bit",curline,oamNum,width,height);
//                        logvt->append(str);
                        palIdx = ((attr2 >> 8) & 0xF0);
                        for (i = x; i < x + rwidth; i++)
                        {
                            tx = rx >> 8;
                            ty = ry >> 8;

                            if ((i & 0x1ff) < 240 && tx >= 0 && tx < width && ty >= 0 && ty < height)
                            {
                                curIdx = (baseSprite + ((ty / 8) * pitch) + ((tx / 8) * scale)) * 32 + ((ty & 7) * 4) + ((tx & 7) / 2);
                                b = vram8[0x10000 + curIdx];
                                if (tx & 1) b>>=4; b&=15;
                                if(b) scanline[i&0x1ff]=GPUPAL[lcd*512+256+palIdx+b];
                            }

                            rx += dx;
                            ry += dy;
                        }
                    }
                }
    } while(oamNum);
}

void GPUscanMode0(u16 *scanline, int lcd)
{
    u16 dispcnt=VREG(DISPCNT), i; int pri;
    for(pri=3;pri>=0;pri--)
    {
        if(dispcnt&0x0800) if((VREG(BG3CNT)&3)==pri) GPUscanTile(scanline,lcd,3);
        if(dispcnt&0x0400) if((VREG(BG2CNT)&3)==pri) GPUscanTile(scanline,lcd,2);
        if(dispcnt&0x0200) if((VREG(BG1CNT)&3)==pri) GPUscanTile(scanline,lcd,1);
        if(dispcnt&0x0100) if((VREG(BG0CNT)&3)==pri) GPUscanTile(scanline,lcd,0);
        GPUsprites(scanline,lcd,pri);
    }
}

void GPUscanMode1(u16 *scanline, int lcd)
{
    u16 dispcnt=VREG(DISPCNT), i; int pri;
    for(pri=3;pri>=0;pri--)
    {
        if(dispcnt&0x0400) if((VREG(BG2CNT)&3)==pri) GPUscanRot(scanline,lcd,2);
        if(dispcnt&0x0200) if((VREG(BG1CNT)&3)==pri) GPUscanTile(scanline,lcd,1);
        if(dispcnt&0x0100) if((VREG(BG0CNT)&3)==pri) GPUscanTile(scanline,lcd,0);
        GPUsprites(scanline,lcd,pri);
    }
}

void GPUscanMode2(u16 *scanline, int lcd)
{
    u16 dispcnt=VREG(DISPCNT), i; int pri;
    for(pri=3;pri>=0;pri--)
    {
        if(dispcnt&0x0800) if((VREG(BG3CNT)&3)==pri) GPUscanRot(scanline,lcd,3);
        if(dispcnt&0x0400) if((VREG(BG2CNT)&3)==pri) GPUscanRot(scanline,lcd,2);
        GPUsprites(scanline,lcd,pri);
    }
}

void GPUscanMode3(u16 *scanline, int lcd)
{
    u16 dispcnt=VREG(DISPCNT), bg2cnt=VREG(BG2CNT), i; int pri;
    u32 x,y,ax,ay; s16 dx,dy; u16 mosx,mospx,mosy; u16 w;
    u16 *vram; u8 *vram8;
    vram=VRAM+(VRAMmap[lcd*128]<<13);
    vram8=VRAM8+(VRAMmap[lcd*128]<<14);
    for(pri=3;pri>(bg2cnt&3);pri--) GPUsprites(scanline,lcd,pri);
    if(dispcnt&0x0400)
    {
        x=BGX[lcd][0]; y=BGY[lcd][0];
        if(bg2cnt&0x0040)
        {
            mosx=(VREG(MOSAIC)&0x000F);
            mosy=((VREG(MOSAIC)&0x00F0)>>4);
            mospx=mosx; if(!curline[lcd]) mospy[lcd][2]=mosy;
            mospy[lcd][2]++; if(mospy[lcd][2]<=mosy) y-=mospy[lcd][2]*256;
            else mospy[lcd][2]=0;
        } else { mospx=0; mosx=0; }
        dx=(signed)VREG(BG2PA); dy=(signed)VREG(BG2PC);
        for(i=0;i<256;i++)
        {
            ax=x>>8; ay=y>>8;
            if (ax >= 0 && ax < 256 && ay >= 0 && ay < 192)
            {
                if(mospx>=mosx) w=vram[ay*256+ax];
                scanline[i]=w;
            }
            mospx+=(dx>>8); if(mospx>mosx) mospx=0;
            x+=dx; y+=dy;
        }
    }
    for(pri=(bg2cnt&3);pri>=0;pri--) GPUsprites(scanline,lcd,pri);
}

void GPUscanMode4(u16 *scanline, int lcd)
{
    u16 dispcnt=VREG(DISPCNT), bg2cnt=VREG(BG2CNT), i; int pri;
    u32 x,y,ax,ay; s16 dx,dy; u8 b; u16 mosx,mospx,mosy;
    u16 baseIdx=(bg2cnt&0x0010)?0xA000:0;
    u16 *vram; u8 *vram8;
    vram=VRAM+(VRAMmap[lcd*128]<<13);
    vram8=VRAM8+(VRAMmap[lcd*128]<<14);
    for(pri=3;pri>(bg2cnt&3);pri--) GPUsprites(scanline,lcd,pri);
    if(dispcnt&0x0400)
    {
        x=BGX[lcd][0]; y=BGY[lcd][0];
        if(bg2cnt&0x0040)
        {
            mosx=(VREG(MOSAIC)&0x000F);
            mosy=((VREG(MOSAIC)&0x00F0)>>4);
            mospx=mosx; if(!curline[lcd]) mospy[lcd][2]=mosy;
            mospy[lcd][2]++; if(mospy[lcd][2]<=mosy) y-=mospy[lcd][2]*256;
            else mospy[lcd][2]=0;
        } else { mospx=0; mosx=0; }
        dx=(signed)VREG(BG2PA); dy=(signed)VREG(BG2PC);
        for(i=0;i<256;i++)
        {
            ax=x>>8; ay=y>>8;
            if (ax >= 0 && ax < 256 && ay >= 0 && ay < 192)
            {
                if(mospx>=mosx) b=vram8[baseIdx+ay*256+ax];
                if(b) scanline[i]=GPUPAL[lcd*512+b];
            }
            mospx+=(dx>>8); if(mospx>mosx) mospx=0;
            x+=dx; y+=dy;
        }
    }
    for(pri=(bg2cnt&3);pri>=0;pri--) GPUsprites(scanline,lcd,pri);
}

void GPUscanMode5(u16 *scanline, int lcd)
{
    u16 dispcnt=VREG(DISPCNT), bg2cnt=VREG(BG2CNT), i; int pri;
    u32 x,y,ax,ay; s16 dx,dy; u16 mosx,mospx,mosy; u16 w;
    u16 baseIdx=(bg2cnt&0x0010)?(160*128):0;
    u16 *vram; u8 *vram8;
    vram=VRAM+(VRAMmap[lcd*128]<<13);
    vram8=VRAM8+(VRAMmap[lcd*128]<<14);
    for(pri=3;pri>(bg2cnt&3);pri--) GPUsprites(scanline,lcd,pri);
    if(dispcnt&0x0400)
    {
        x=BGX[lcd][0]; y=BGY[lcd][0];
        if(bg2cnt&0x0040)
        {
            mosx=(VREG(MOSAIC)&0x000F);
            mosy=((VREG(MOSAIC)&0x00F0)>>4);
            mospx=mosx; if(!curline[lcd]) mospy[lcd][2]=mosy;
            mospy[lcd][2]++; if(mospy[lcd][2]<=mosy) y-=mospy[lcd][2]*256;
            else mospy[lcd][2]=0;
        } else { mospx=0; mosx=0; }
        dx=(signed)VREG(BG2PA); dy=(signed)VREG(BG2PC);
        for(i=0;i<256;i++)
        {
            ax=x>>8; ay=y>>8;
            if (ax >= 0 && ax < 160 && ay >= 0 && ay < 128)
            {
                if(mospx>=mosx) w=vram[baseIdx+ay*160+ax];
                scanline[i]=w;
            }
            mospx+=(dx>>8); if(mospx>mosx) mospx=0;
            x+=dx; y+=dy;
        }
    }
    for(pri=(bg2cnt&3);pri>=0;pri--) GPUsprites(scanline,lcd,pri);
}

void GPUscanNULL(u16 *scanline, int lcd)
{
}

fptr GPUscanModes[]={GPUscanMode0,GPUscanMode1,GPUscanMode2,
                     GPUscanMode3,GPUscanMode4,GPUscanMode5,
                     GPUscanNULL,GPUscanNULL};

void GPUscanLCD0(u16 *screen)
{
    int lcd=0; char str[80];
    u16 dispcnt=VREG(DISPCNT), voff=curline[lcd]*256, scanline[256], i;
    if(curline[lcd]<192)
    {
        if(dispcnt&0x0080) for(i=0;i<256;i++) scanline[i]=0x7FFF;
        else
        {
            switch(VREG(GREENSWAP)&3)
            {
                case 2:
//                    sprintf(str,"GPU: LCD%d: Scanline #%d: Framebuffer.",lcd,curline[lcd]);
//                    logvt->append(str);
                    memcpy(screen+(1-lcd)*256*192+voff, VRAM+VRAMmap[(lcd*128+(voff>>14))&1023]*0x4000+(voff&0x3FFF), 512);
                    break;
                case 1: case 3:
//                    sprintf(str,"GPU: LCD%d: Scanline #%d: Mode %d.",lcd,curline[lcd],dispcnt&7);
//                    logvt->append(str);
                    for(i=0;i<256;i++) scanline[i]=GPUPAL[lcd*512+0];
                    GPUscanModes[dispcnt&7](scanline,lcd);
                    memcpy(screen+(1-lcd)*256*192+voff, scanline, 512);
                    break;
                case 0: break;
            }
        }
        if(VREG(DISPSTAT)&STAT_INTHBL) IntFire(INT_HBLANK);
        DMAcheck(DMA_TIMEHBL);
    }
    VREG(DISPSTAT)|=STAT_HBLANK;
}

void GPUscanLCD1(u16 *screen)
{
    int lcd=1;
    u16 dispcnt=VREG(DISPCNT), voff=curline[lcd]*256, scanline[256], i;
    if(curline[lcd]<192)
    {
        if(dispcnt&0x0080) for(i=0;i<256;i++) scanline[i]=0x7FFF;
        else
        {
            switch(VREG(GREENSWAP)&3)
            {
                case 2:
                    memcpy(screen+(1-lcd)*256*192+voff, VRAM+VRAMmap[(lcd*128+(voff>>14))&1023]*0x4000+(voff&0x3FFF), 512);
                    break;
                case 1: case 3:
                    for(i=0;i<256;i++) scanline[i]=GPUPAL[lcd*512+0];
                    GPUscanModes[dispcnt&7](scanline,lcd);
                    memcpy(screen+(1-lcd)*256*192+voff, scanline, 512);
                    break;
            }
        }
        if(VREG(DISPSTAT)&STAT_INTHBL) IntFire(INT_HBLANK);
        DMAcheck(DMA_TIMEHBL);
    }
    VREG(DISPSTAT)|=STAT_HBLANK;
}

void GPUscanline(u16 *screen) { GPUscanLCD0(screen); GPUscanLCD1(screen); }

//---END: Gladius' code----------------------------------------------------

void GPUhblLCD0()
{
    int lcd=0;
    VREG(DISPSTAT)&=(0xFFFF-STAT_HBLANK);
    curline[lcd]++; if(curline[lcd]>262) curline[lcd]=0;
    BGX[lcd][0]+=(s16)VREG(BG2PB); BGY[lcd][0]+=(s16)VREG(BG2PD);
    BGX[lcd][1]+=(s16)VREG(BG3PB); BGY[lcd][1]+=(s16)VREG(BG3PD);
    VREG(VCOUNT)=curline[lcd];
    switch(curline[lcd])
    {
        case 0:
           VREG(DISPSTAT)&=(0xFFFF-STAT_VBLANK);
           BGX[lcd][0]=VREG(BG2XL)+(VREG(BG2XH)<<16);
           BGX[lcd][1]=VREG(BG3XL)+(VREG(BG3XH)<<16);
           BGY[lcd][0]=VREG(BG2YL)+(VREG(BG2YH)<<16);
           BGY[lcd][1]=VREG(BG3YL)+(VREG(BG3YH)<<16);
           break;
        case 192:
           framecount++;
	   VREG(DISPSTAT)|=STAT_VBLANK;
           if(VREG(DISPSTAT)&STAT_INTVBL) IntFire(INT_VBLANK);
           DMAcheck(DMA_TIMEVBL);
	   break;
    }
    if(curline[lcd]==(VREG(DISPSTAT)>>6))
    {
        VREG(DISPSTAT)|=STAT_VCOUNT;
        if(VREG(DISPSTAT)&STAT_INTVCT) IntFire(INT_VCOUNT);
    }
    else
        VREG(DISPSTAT)&=(0xFFFF-STAT_VCOUNT);
}

void GPUhblLCD1()
{
    int lcd=1;
    VREG(DISPSTAT)&=(0xFFFF-STAT_HBLANK);
    curline[lcd]++; if(curline[lcd]>262) curline[lcd]=0;
    BGX[lcd][0]+=(s16)VREG(BG2PB); BGY[lcd][0]+=(s16)VREG(BG2PD);
    BGX[lcd][1]+=(s16)VREG(BG3PB); BGY[lcd][1]+=(s16)VREG(BG3PD);
    VREG(VCOUNT)=curline[lcd];
    switch(curline[lcd])
    {
        case 0:
           VREG(DISPSTAT)&=(0xFFFF-STAT_VBLANK);
           BGX[lcd][0]=VREG(BG2XL)+(VREG(BG2XH)<<16);
           BGX[lcd][1]=VREG(BG3XL)+(VREG(BG3XH)<<16);
           BGY[lcd][0]=VREG(BG2YL)+(VREG(BG2YH)<<16);
           BGY[lcd][1]=VREG(BG3YL)+(VREG(BG3YH)<<16);
           break;
        case 192:
           framecount++;
	   VREG(DISPSTAT)|=STAT_VBLANK;
           if(VREG(DISPSTAT)&STAT_INTVBL) IntFire(INT_VBLANK);
           DMAcheck(DMA_TIMEVBL);
	   break;
    }
    if(curline[lcd]==(VREG(DISPSTAT)>>6))
    {
        VREG(DISPSTAT)|=STAT_VCOUNT;
        if(VREG(DISPSTAT)&STAT_INTVCT) IntFire(INT_VCOUNT);
    }
    else
        VREG(DISPSTAT)&=(0xFFFF-STAT_VCOUNT);
}

void GPUclearHBL() { GPUhblLCD0(); GPUhblLCD1(); }

/*** EOF:gpu.c ***********************************************************/
