/**************************************************************************
* DSemu: Scanline-based tiled/bitmap graphics (gpu.c)                     *
* Released under the terms of the BSD Public Licence                      *
* Imran Nazar (tf@oopsilon.com), 2004                                     *
**************************************************************************/

#include <windows.h>
#include <stdio.h>
#include "err.h"
#include "defs.h"
#include "dsioreg.h"
#include "gpu.h"
#include "dma.h"
#include "vtbl.h"
#include "int.h"
#include "res.h"

//#define GPUDEBUG

u16 *VRAM;
u8  *VRAM8;

u16 OAM[1024];
u16 BGPAL[256];
u16 OBJPAL[256];
u8 *BGPAL8=(u8*)BGPAL;
u8 *OBJPAL8=(u8*)OBJPAL;

u16 curline[2];
__int64 framecount;

extern u32 VRAMmap[553];

int sprsize[16][2]={
    { 8, 8},{16,16},{32,32},{64,64},
    {16, 8},{32, 8},{32,16},{64,32},
    { 8,16},{ 8,32},{16,32},{32,64},
    {-1,-1},{-1,-1},{-1,-1},{-1,-1},
};

u16 sprrndcol[128];

int GPUinit()
{
    u8 r,g,b; char str[50]; int a;

    VRAM = (u16*)malloc(1024*1024);
    if(!VRAM) RETFAIL("GPU: FAIL: VRAM allocation.");
    logvt->append("GPU: VRAM allocated.");
    VRAM8=(u8*)VRAM;

    GPUreset();

    RETPASS("GPU: Initialised.");
}

void GPUfini()
{
    char str[2048];
    free(VRAM);
    sprintf(str,"GPU: Shutdown after %lld frames.",framecount);
    logvt->append(str);
}

void GPUreset()
{
    int a;
    memset(VRAM, 0, 1024*1024);
    memset(BGPAL,0, 256*2);
    memset(OBJPAL,0, 256*2);
    memset(OAM, 0, 512*2);
    curline[0]=0; curline[1]=0;
    framecount=0;
    for(a=0;a<128;a++) sprrndcol[a]=rand()&0x7FFF;
}

#define GPUscanTile(bg,lcd) \
    bgcnt=DSio[(lcd)*0x800+REG_BG0CNT+(bg)].data; \
    switch((bgcnt>>14)&3) \
    { \
        case 0: width=256; height=256; break; \
        case 1: width=512; height=256; break; \
        case 2: width=256; height=512; break; \
        case 3: width=512; height=512; break; \
    } \
    scrnBase=((bgcnt>>8)&0x1F)*0x400; \
    charBase=((bgcnt>>2)&0x3)*0x4000; \
    hofs=DSio[(lcd)*0x800+REG_BG0HOFS+(bg)*2].data; \
    vofs=DSio[(lcd)*0x800+REG_BG0VOFS+(bg)*2].data; \
    /*sprintf(str,"LCD %d, phys=%06X",(lcd),VRAMmap[(lcd)*128]<<14); logvt->append(str); */\
    vramptr=VRAM+(VRAMmap[(lcd)*128]<<13); \
    vram8ptr=VRAM8+(VRAMmap[(lcd)*128]<<14); \
    if(bgcnt&0x0080) \
    { \
        bgy=((curline[(lcd)]+vofs)&(height-1))/8; \
        tileIdx=scrnBase+(((bgy&31)*32)); \
        switch((bgcnt>>14)&3) \
        { \
            case 2: if(bgy>=32) tileIdx += 32*32; break; \
            case 3: if(bgy>=32) tileIdx += 32*32*2; break; \
        } \
        tileY=((curline[(lcd)]+vofs)&7)*8; \
        /*sprintf(str,"lcd:%d line:%03d bg:%1d(8b) tileIdx:%08X tileY:%08X",(lcd),curline[(lcd)],(bg),tileIdx,tileY); \
        logvt->append(str);*/ \
        for(a=0;a<256;a++) \
        { \
            bgx=((a+hofs)&(width-1))/8; \
            tmpTileIdx=tileIdx+((bgx&31)); \
            if(bgx>=32) tmpTileIdx += 32*32; \
            tileChar=vramptr[tmpTileIdx]; \
            x=(a+hofs)&7; y=tileY; \
            if(tileChar&0x0400) x=7-x; \
            if(tileChar&0x0800) y=56-y; \
            b=vram8ptr[charBase+((tileChar&0x03FF)*64)+x+y]; \
            if(b) scrnbuf[4][a]=BGPAL[b]; \
            scrnbuf[(bg)][b]=BGPAL[b]; \
        } \
    } else { \
        bgy=((curline[(lcd)]+vofs)&(height-1))/8; \
        tileIdx=scrnBase+(((bgy&31)*32)); \
        switch((bgcnt>>14)&3) \
        { \
            case 2: if(bgy>=32) tileIdx += 32*32; break; \
            case 3: if(bgy>=32) tileIdx += 32*32*2; break; \
        } \
        tileY=((curline[(lcd)]+vofs)&7)*4; \
        /*sprintf(str,"lcd:%d line:%03d bg:%1d(4b) tileIdx:%08X tileY:%08X",(lcd),curline[(lcd)],(bg),tileIdx,tileY); \
        logvt->append(str);*/ \
        for(a=0;a<256;a++) \
        { \
            bgx=((a+hofs)&(width-1))/8; \
            tmpTileIdx=tileIdx+((bgx&31)); \
            if(bgx>=32) tmpTileIdx += 32*32; \
            tileChar=vramptr[tmpTileIdx]; \
            x=(a+hofs)&7; y=tileY; \
            if(tileChar&0x0400) x=7-x; \
            if(tileChar&0x0800) y=28-y; \
            b=vram8ptr[charBase+((tileChar&0x03FF)*32)+(x/2)+y]; \
            if(x&1) b>>=4; b&=15; \
            palnum=((tileChar>>12)&15)*16; \
            if(b) scrnbuf[4][a]=BGPAL[b+palnum]; \
            scrnbuf[(bg)][b]=BGPAL[b+palnum]; \
        } \
    }

#define GPUscanMode0(lcd) \
    prioset[0]=(DSio[(lcd)*0x800+REG_BG0CNT].data&3)*4+0; \
    prioset[1]=(DSio[(lcd)*0x800+REG_BG1CNT].data&3)*4+1; \
    prioset[2]=(DSio[(lcd)*0x800+REG_BG2CNT].data&3)*4+2; \
    prioset[3]=(DSio[(lcd)*0x800+REG_BG3CNT].data&3)*4+3; \
    for(a=0;a<256;a++) priobuf[a]=255; \
    for(a=0;a<256;a++) scrnbuf[4][a]=BGPAL[0]; \
    \
    GPUscanTile(0,(lcd)); GPUscanTile(1,(lcd)); \
    memcpy(screen+(1-(lcd))*256*192+voff, scrnbuf[4], 512); \

#define GPUscanMode2(lcd) \
    prioset[2]=(REG(BG2CNT)&3)*4+2; \
    prioset[3]=(REG(BG3CNT)&3)*4+3; \
    for(a=0;a<240;a++) priobuf[a]=255; \
    for(a=0;a<240;a++) screen[voff+a]=BGPAL[0]; \
    \
    if(REG(DISPCNT)&0x0400) \
    { \
        if(!curline[(lcd)]) \
	{ \
	    bg2x=REG(BG2XL)+(REG(BG2XH)<<16); \
            bg2y=REG(BG2YL)+(REG(BG2YH)<<16); \
        } \
        switch(REG(BG2CNT)>>14) \
        { \
            case 0: tilemsk=15;  tileshft=4; tilesize=128;  break; \
            case 1: tilemsk=31;  tileshft=5; tilesize=256;  break; \
            case 2: tilemsk=63;  tileshft=6; tilesize=512;  break; \
            case 3: tilemsk=127; tileshft=7; tilesize=1024; break; \
        } \
        bg2xint=bg2x; bg2yint=bg2y; skip=0; \
        if(REG(BG2CNT)&0x0040) \
        { \
            mosx=(REG(MOSAIC)&0x000F)+1; \
            mosy=((REG(MOSAIC)&0x00F0)>>4); \
            mospx=mosx; if(!curline[(lcd)]) mospy[2]=mosy; \
            mospy[2]++; if(mospy[2]<=mosy) skip=1; \
            else mospy[2]=0; \
        } else mosx=1; \
        if(!skip) \
        { \
        for(a=0;a<240;a+=mosx) \
        { \
            if((REG(BG2CNT)&0x2000) || \
               ((bg2xint<=tilesize*256) && \
               (bg2yint<=tilesize*256))) \
            { \
                scrnline=((REG(BG2CNT)&0x1F00)<<3)+           /*BBBBB-----------*/ \
                         (((bg2yint>>11)&tilemsk)<<tileshft)+ /*--------VVVV----*/ \
                         ((bg2xint>>11)&tilemsk);             /*------------HHHH*/ \
                charoff=((REG(BG2CNT)&0x000C)<<12)+ /*CC--------------*/ \
                        VRAM8[scrnline]*64+         /*--TTTTTTTT------*/ \
                        ((bg2yint&0x0700)>>5)+      /*----------VVV---*/ \
                        ((bg2xint&0x0700)>>8);      /*-------------HHH*/ \
                if(prioset[2]<priobuf[a]) \
                { \
                    x=VRAM8[charoff]; \
                    if(x) scrnbuf[4][a]=BGPAL[x]; \
		    scrnbuf[2][a]=BGPAL[x]; \
                    priobuf[a]=prioset[2]; \
                } \
                bg2xint+=REG(BG2PA); \
                bg2yint+=REG(BG2PC); \
            } \
        } \
        for(b=0;b<240;b++) scrnbuf[2][b]=scrnbuf[2][(b/mosx)*mosx]; \
        bg2x+=REG(BG2PB); \
        bg2y+=REG(BG2PD); \
        } \
    } \
    memcpy(screen+voff, scrnbuf[2], 480)

#define GPUscanMode3(lcd) \
    skip=0; \
    if(REG(BG2CNT)&0x0040) \
    { \
        mosx=(REG(MOSAIC)&0x000F)+1; \
        mosy=((REG(MOSAIC)&0x00F0)>>4); \
        mospx=mosx; if(!curline[(lcd)]) mospy[2]=mosy; \
        mospy[2]++; if(mospy[2]<=mosy) skip=1; \
        else mospy[2]=0; \
    } else mosx=1; \
    if(!skip) \
    { \
        for(b=0;b<240;b+=mosx) scrnbuf[4][b]=VRAM[voff+b]; \
        for(b=0;b<240;b++) scrnbuf[4][b]=scrnbuf[4][(b/mosx)*mosx]; \
    } \
    memcpy(screen+voff, scrnbuf[4], 480); \

#define GPUscanMode4(lcd) \
    base=((REG(DISPCNT)&0x0010)?0xA000:0)+voff; skip=0; \
    if(REG(BG2CNT)&0x0040) \
    { \
        mosx=(REG(MOSAIC)&0x000F)+1; \
        mosy=((REG(MOSAIC)&0x00F0)>>4); \
        mospx=mosx; if(!curline[(lcd)]) mospy[2]=mosy; \
        mospy[2]++; if(mospy[2]<=mosy) skip=1; \
        else mospy[2]=0; \
    } else mosx=1; \
    if(!skip) \
    { \
        for(b=0;b<240;b+=mosx) scrnbuf[4][b]=BGPAL[VRAM8[base+b]]; \
        for(b=0;b<240;b++) scrnbuf[4][b]=scrnbuf[4][(b/mosx)*mosx]; \
    } \
    memcpy(screen+voff, scrnbuf[4], 480); \

#define GPUscanMode5(lcd) \
    memset(screen+voff,0,480); \
    if(curline[(lcd)]>=16 && curline[(lcd)]<144) \
    { \
        base=((REG(DISPCNT)&0x0010)?0x5000:0)+(curline[(lcd)]-16)*160; skip=0; \
        if(REG(BG2CNT)&0x0040) \
        { \
            mosx=(REG(MOSAIC)&0x000F)+1; \
            mosy=((REG(MOSAIC)&0x00F0)>>4); \
            mospx=mosx; if(!curline[(lcd)]) mospy[2]=mosy; \
            mospy[2]++; if(mospy[2]<=mosy) skip=1; \
            else mospy[2]=0; \
        } else mosx=1; \
        if(!skip) \
        { \
            for(b=0;b<160;b+=mosx) scrnbuf[4][b]=VRAM[base+b]; \
            for(b=0;b<160;b++) scrnbuf[4][b]=scrnbuf[4][(b/mosx)*mosx]; \
        } \
        memcpy(screen+voff+40, scrnbuf[4], 480); \
    }

void GPUscanline_LCD0(u16 *screen)
{
    char str[80];
    int x,y,a,b; int lcd=0;
    u32 base, voff, soff, skip=0;
    static u16 priobuf[256], scrnbuf[5][256], prioset[4];
    u32 scrnline, scrnbase, charbase, charline, charoff, palnum, xorc;

    u16 bgcnt; int width,height,hofs,vofs,bgy,bgx;
    u32 tileIdx,tmpTileIdx,tileChar,tileY; u32 scrnBase,charBase;
    u32 mosx, mosy, mospx, mosval; static int mospy[4]={0};
    u8 *vram8ptr; u16 *vramptr;

    int tilemsk, tilesize, tileshft;
    static u32 bg2x, bg3x, bg2y, bg3y;
    u32 bg2xint, bg3xint, bg2yint, bg3yint;

    int sprcyc=0, sprnum=0, sprtotcyc=(DSio[lcd*0x800+REG_DISPCNT].data&0x0020)?1536:2130;
    int sprxdim, sprydim, sprxpos, sprypos;
    u32 sprbase, sprramcnt, sprline;

    if(curline[lcd]<192)
    {
        voff=curline[lcd]*256;
        switch(DSio[lcd*0x800+REG_GREENSWAP].data&3)
        {
            case 2:
                memcpy(screen+(1-lcd)*256*192+voff, VRAM+VRAMmap[(lcd*128+(voff>>14))&1023]*0x4000+(voff&0x3FFF), 512);
                break;
            case 1: case 3: switch(DSio[lcd*0x800+REG_DISPCNT].data&7)
            {
                case 0: sprbase=0x10000; sprramcnt=1023; GPUscanMode0(lcd); break;
                case 2: sprbase=0x10000; sprramcnt=1023; GPUscanMode2(lcd); break;
                case 3: sprbase=0x14000; sprramcnt=511;  GPUscanMode3(lcd); break;
                case 4: sprbase=0x14000; sprramcnt=511;  GPUscanMode4(lcd); break;
                case 5: sprbase=0x14000; sprramcnt=511;  GPUscanMode5(lcd); break;
            }
            break;
        }
/*        while(sprcyc<sprtotcyc && sprnum<128)
        {
	    sprxdim=sprsize[((OAM0&0xC000)>>12)|
		            ((OAM1&0xC000)>>14)][0];
	    sprydim=sprsize[((OAM0&0xC000)>>12)|
		            ((OAM1&0xC000)>>14)][1];
	    sprypos=OAM0&255;
	    sprxpos=OAM1&511;

	    if(sprypos<=curline &&
	       (sprypos+sprydim)>curline)
	    {
	        scrnline=(OAM2&sprramcnt)+(((curline-sprypos)>>3)*(sprydim>>3));
		soff=0;
	        charline=(curline-sprypos)&7;
	        if(OAM0&0x2000)
	        {
                    charoff=sprbase+scrnline*32+charline*8;
                    for(a=0;a<sprxdim;a++)
                    {
                        x=VRAM8[charoff+soff];
                        if(x) screen[voff+sprxpos+a]=OBJPAL[x];
                        soff++; if(soff>=8)
                        {
                            scrnline++;
                            charoff=sprbase+scrnline*32+charline*8;
                            soff&=7;
                        }
                        sprcyc++;
                    }
	        }
	        else
	        {
                    charoff=sprbase+scrnline*32+charline*4;
                    palnum=(OAM2&0xF000)>>8;
                    for(a=0;a<sprxdim;a++)
                    {
                        if(a&1) x=VRAM8[charoff+(soff>>1)]>>4;
                        else    x=VRAM8[charoff+(soff>>1)]&15;
                        if(x) screen[voff+sprxpos+a]=OBJPAL[palnum+x];
                        soff++; if(soff>=8)
                        {
                            scrnline++;
                            charoff=sprbase+scrnline*32+charline*4;
                            soff&=7;
                        }
                        sprcyc++;
                    }
                }
	    }
	    sprnum++;
        }
*/        if(DSio[lcd*0x800+REG_DISPSTAT].data&STAT_INTHBL) IntFire(INT_HBLANK);
        DMAcheck(DMA_TIMEHBL);
    }
    DSio[lcd*0x800+REG_DISPSTAT].data|=STAT_HBLANK;
}

void GPUclearHBL_LCD0()
{
    int lcd=0;
    DSio[lcd*0x800+REG_DISPSTAT].data&=(0xFFFF-STAT_HBLANK);
    curline[lcd]++; if(curline[lcd]>262) curline[lcd]=0;
    DSio[lcd*0x800+REG_VCOUNT].data=curline[lcd];
    switch(curline[lcd])
    {
        case 0: DSio[lcd*0x800+REG_DISPSTAT].data&=(0xFFFF-STAT_VBLANK); break;
        case 193:
	   DSio[lcd*0x800+REG_DISPSTAT].data|=STAT_VBLANK;
           if(DSio[lcd*0x800+REG_DISPSTAT].data&STAT_INTVBL) IntFire(INT_VBLANK);
           DMAcheck(DMA_TIMEVBL);
	   break;
    }
    if(curline[lcd]==(DSio[lcd*0x800+REG_DISPSTAT].data>>6))
    {
        DSio[lcd*0x800+REG_DISPSTAT].data|=STAT_VCOUNT;
        if(DSio[lcd*0x800+REG_DISPSTAT].data&STAT_INTVCT) IntFire(INT_VCOUNT);
    }
    else
        DSio[lcd*0x800+REG_DISPSTAT].data&=(0xFFFF-STAT_VCOUNT);
}

void GPUscanline_LCD1(u16 *screen)
{
    char str[80];
    int x,y,a,b; int lcd=1;
    u32 base, voff, soff, skip=0;
    static u16 priobuf[256], scrnbuf[5][256], prioset[4];
    u32 scrnline, scrnbase, charbase, charline, charoff, palnum, xorc;

    u16 bgcnt; int width,height,hofs,vofs,bgy,bgx;
    u32 tileIdx,tmpTileIdx,tileChar,tileY; u32 scrnBase,charBase;
    u32 mosx, mosy, mospx, mosval; static int mospy[4]={0};
    u8 *vram8ptr; u16 *vramptr;

    int tilemsk, tilesize, tileshft;
    static u32 bg2x, bg3x, bg2y, bg3y;
    u32 bg2xint, bg3xint, bg2yint, bg3yint;

    int sprcyc=0, sprnum=0, sprtotcyc=(DSio[lcd*0x800+REG_DISPCNT].data&0x0020)?1536:2130;
    int sprxdim, sprydim, sprxpos, sprypos;
    u32 sprbase, sprramcnt, sprline;

    if(curline[lcd]<192)
    {
        voff=curline[lcd]*256;
        switch(DSio[lcd*0x800+REG_GREENSWAP].data&3)
        {
            case 2:
                memcpy(screen+(1-lcd)*256*192+voff, VRAM+VRAMmap[(lcd*128+(voff>>14))&1023]*0x4000+(voff&0x3FFF), 512);
                break;
            case 1: case 3: switch(DSio[lcd*0x800+REG_DISPCNT].data&7)
            {
                case 0: sprbase=0x10000; sprramcnt=1023; GPUscanMode0(lcd); break;
                case 2: sprbase=0x10000; sprramcnt=1023; GPUscanMode2(lcd); break;
                case 3: sprbase=0x14000; sprramcnt=511;  GPUscanMode3(lcd); break;
                case 4: sprbase=0x14000; sprramcnt=511;  GPUscanMode4(lcd); break;
                case 5: sprbase=0x14000; sprramcnt=511;  GPUscanMode5(lcd); break;
            }
            break;
        }
/*        while(sprcyc<sprtotcyc && sprnum<128)
        {
	    sprxdim=sprsize[((OAM0&0xC000)>>12)|
		            ((OAM1&0xC000)>>14)][0];
	    sprydim=sprsize[((OAM0&0xC000)>>12)|
		            ((OAM1&0xC000)>>14)][1];
	    sprypos=OAM0&255;
	    sprxpos=OAM1&511;

	    if(sprypos<=curline &&
	       (sprypos+sprydim)>curline)
	    {
	        scrnline=(OAM2&sprramcnt)+(((curline-sprypos)>>3)*(sprydim>>3));
		soff=0;
	        charline=(curline-sprypos)&7;
	        if(OAM0&0x2000)
	        {
                    charoff=sprbase+scrnline*32+charline*8;
                    for(a=0;a<sprxdim;a++)
                    {
                        x=VRAM8[charoff+soff];
                        if(x) screen[voff+sprxpos+a]=OBJPAL[x];
                        soff++; if(soff>=8)
                        {
                            scrnline++;
                            charoff=sprbase+scrnline*32+charline*8;
                            soff&=7;
                        }
                        sprcyc++;
                    }
	        }
	        else
	        {
                    charoff=sprbase+scrnline*32+charline*4;
                    palnum=(OAM2&0xF000)>>8;
                    for(a=0;a<sprxdim;a++)
                    {
                        if(a&1) x=VRAM8[charoff+(soff>>1)]>>4;
                        else    x=VRAM8[charoff+(soff>>1)]&15;
                        if(x) screen[voff+sprxpos+a]=OBJPAL[palnum+x];
                        soff++; if(soff>=8)
                        {
                            scrnline++;
                            charoff=sprbase+scrnline*32+charline*4;
                            soff&=7;
                        }
                        sprcyc++;
                    }
                }
	    }
	    sprnum++;
        }
*/        if(DSio[lcd*0x800+REG_DISPSTAT].data&STAT_INTHBL) IntFire(INT_HBLANK);
        DMAcheck(DMA_TIMEHBL);
    }
    DSio[lcd*0x800+REG_DISPSTAT].data|=STAT_HBLANK;
}

void GPUclearHBL_LCD1()
{
    int lcd=1;
    DSio[lcd*0x800+REG_DISPSTAT].data&=(0xFFFF-STAT_HBLANK);
    curline[lcd]++; if(curline[lcd]>262) curline[lcd]=0;
    DSio[lcd*0x800+REG_VCOUNT].data=curline[lcd];
    switch(curline[lcd])
    {
        case 0: DSio[lcd*0x800+REG_DISPSTAT].data&=(0xFFFF-STAT_VBLANK); break;
        case 193:
           framecount++;
	   DSio[lcd*0x800+REG_DISPSTAT].data|=STAT_VBLANK;
           if(DSio[lcd*0x800+REG_DISPSTAT].data&STAT_INTVBL) IntFire(INT_VBLANK);
           DMAcheck(DMA_TIMEVBL);
	   break;
    }
    if(curline[lcd]==(DSio[lcd*0x800+REG_DISPSTAT].data>>6))
    {
        DSio[lcd*0x800+REG_DISPSTAT].data|=STAT_VCOUNT;
        if(DSio[lcd*0x800+REG_DISPSTAT].data&STAT_INTVCT) IntFire(INT_VCOUNT);
    }
    else
        DSio[lcd*0x800+REG_DISPSTAT].data&=(0xFFFF-STAT_VCOUNT);
}

void GPUpalcol(u16 *pbuf, int idx)
{
    char str[80];
    u16 col=BGPAL[idx];
    u8 r=(col&0x001F), g=(col&0x03E0)>>5, b=(col&0x7C00)>>10;
    dbgOutClear(pbuf+128*136,128,8);
    sprintf(str,"R:%02Xh  G:%02Xh  B:%02Xh",r,g,b);
    dbgOut(pbuf+128*136,128,8,str,0,0,0x7FFF);
}

void GPUscanline(u16 *screen)
{
    GPUscanline_LCD0(screen);
    GPUscanline_LCD1(screen);
}

void GPUclearHBL()
{
    GPUclearHBL_LCD0();
    GPUclearHBL_LCD1();
}

void GPUpal(u16 *pbuf)
{
    int a,b,c,d; u32 xoff=0,yoff=0,idx=0;
    dbgOut(pbuf,128,144,"BGPAL",0,0,0x7FFF);
    for(a=0;a<16;a++)
    {
        for(b=0;b<16;b++)
        {
	    for(c=0;c<8;c++)
	    {
	        for(d=0;d<8;d++)
	        {
//	            pbuf[(143-(a*8+c))*128+(b*8+d)]=LCDcolour[BGPAL[a*16+b]];
//                  pbuf[off]=LCDcolour[BGPAL[idx]];
//                  off++;
	            pbuf[yoff*128+xoff]=BGPAL[idx];
                    xoff++;
	        }
//	        off+=137;
                yoff++; xoff-=8;
	    }
//	    off-=1008;
	    idx++;
            xoff+=8; yoff-=8;
	}
//	idx+=16;
	xoff=0; yoff+=8;
    }
}

/*** EOF:gpu.c ***********************************************************/

