#include "disp.h"
#include "hw/CortexEmu/lcdIface.h"
#include "printf.h"
#include "memmap.h"
#include <MemoryMgr.h>
#include <string.h>
#include "heap.h"
#include "dal.h"
#include "kal.h"


static uint16_t mCurClutComputed[256] = {0,};
static uint32_t mRefreshThread = 0;
static uint16_t mDispW, mDispH;
static uint8_t mCurDepth;
static void* mFb;


#define USE_LE_INDEXED_FORMAT							0


static void halDisplayRefreshThread(void* unused)		//XXX: this is not clever on real hw where we do not need a thread!
{
	while (1) {
		
	//	loge("disp refresh slowed!!!!!\n");
		halDisplayRefreshManual();
		KALTaskDelay(100);
	}
}

static void dispPrvPower(bool on)
{
	if (on)
		*(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_POWER_CFG) |= 1;
	else
		*(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_POWER_CFG) &=~ 1;
}

bool dispDrvInit(uint16_t *wP, uint16_t* hP, uint16_t* densityP, uint16_t *realDpiP, uint16_t *supportedDepthMapP, void** framebufferP, bool *indexedFmtIsLEP)
{
	uint32_t w, h, vramSz;
	
	dispPrvPower(true);
	
	mDispW = *wP = w = *(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_WIDTH);
	mDispH = *hP = h = *(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_HEIGHT);
	*densityP = *(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_DENSITY);
	
	if (*(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_STRIDE) != w * 2)
		fatal("sorry - unsupported stride\n");
	
	logi("DISP is %u x %u, %u ppi\n", *wP, *hP, *densityP);
	
	vramSz = w * h * 3;	//one 16bpp image for real HW and at least 8 bpp for other data
	
	#if CPU_HARDWIRED_VRAM_ADDR
		if (CPU_HARDWIRED_VRAM_SIZE >= vramSz)
			mFb = (void*)CPU_HARDWIRED_VRAM_ADDR;
		else
			fatal("VRAM is too small %u < %u\n", CPU_HARDWIRED_VRAM_SIZE, vramSz);
	#else
		mFb = kheapAlloc(vramSz);	//one 16bpp image for real HW and at least 8 bpp for other data
		if (!mFb) {
			loge("Cannot alloc fb\n");
			return false;
		}
	#endif
	
	*supportedDepthMapP = ((1 << 1) | (1 << 2) | (1 << 4)) >> 1;
	*framebufferP = mFb;
	*indexedFmtIsLEP = USE_LE_INDEXED_FORMAT;
	
	mCurDepth = 4;
	*realDpiP = 72; //why not?
	
	return true;
}

void dispSetClut(int32_t firstIdx, uint32_t numEntries, const struct PalmClutEntry *entries)
{
	uint32_t i;
	
	if (firstIdx == -1) {
		
		if (numEntries > 256)
			return;
	}
	else if (firstIdx < 0)
		return;
	else if (firstIdx >= 256 || numEntries + firstIdx > 256)
		return;

	for (i = 0; i < numEntries; i++) {
		
		uint32_t where = (firstIdx == -1) ? entries[i].idx : i + firstIdx;
		uint32_t r = entries[i].r;
		uint32_t g = entries[i].g;
		uint32_t b = entries[i].b;
		
		mCurClutComputed[where] = ((r & 0xF8) << 8) | ((g & 0xFC) << 3) | (b >> 3);
	}
}

void dispSetDepth(uint32_t depth)
{
	mCurDepth = depth;
}

void dispManualUpdate(void)
{
	uint16_t *hwrFb = (uint16_t*)mFb + (mDispW * mDispH * mCurDepth / 8 / sizeof(uint16_t));
	uint32_t i;

	if (mCurDepth == 16)
		hwrFb = (uint16_t*)mFb;
	else if (mCurDepth == 8) {
		const uint8_t *src = (const uint8_t*)mFb;
		uint16_t* dst = hwrFb;
		
		for (i = 0; i < mDispW * mDispH; i += 8) {		//this is very slow, but ok for now //XXX
			
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
			*dst++ = mCurClutComputed[*src++];
		}
	}
	else {
		const uint8_t *src = (const uint8_t*)mFb;
		uint16_t* dst = hwrFb;
		uint32_t bitOfst = 0;
		
		for (i = 0; i < mDispW * mDispH;) {		//this is very slow, but ok for now //XXX
			
			uint8_t clr = *src++;
			
			if (USE_LE_INDEXED_FORMAT) {
				
				for (bitOfst = 0; bitOfst < 8; bitOfst += mCurDepth, clr >>= mCurDepth, i++) {	//lowbits come first
					
					*dst++ = mCurClutComputed[clr & ((1 << mCurDepth) - 1)];
				}
				
			}
			else {
				
				for (bitOfst = 0; bitOfst < 8; bitOfst += mCurDepth, clr <<= mCurDepth, i++) {	//high bits come first
					
					*dst++ = mCurClutComputed[clr >> (8 - mCurDepth)];
				}
			}
		}
	}
	*(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_DATA_BASE) = (uintptr_t)hwrFb;
	*(volatile uint32_t*)(LCD_UNIT_BASE + LCD_OFST_REDRAW) = 0;
}

void dispSetBri(uint8_t bri)
{
	//todo, if possible!	
}

void dispSetContrast(uint8_t bri)
{
	//todo, if possible!	
}

bool dispSetBacklight(bool on)
{
	return false;	//not switchable
}

bool dispGetBacklight(void)
{
	return true;	//always on
}

void dispRequestUpdate(void)
{
	Err e;
	
	if (!mRefreshThread) {	//need init?
		
		static const struct KalTaskCreateInfo tci = {.codeToRun = halDisplayRefreshThread, .stackSz = 1024, .prio = 1, .tag = CREATE_4CC('_','s','r','f'), };	//very high prio
		
		//make sure enough of the system i sup to do this (we need MemMgr)
		e = KALTaskCreate((uint32_t*)&mRefreshThread, &tci);
		if (e)
			fatal("Cannot create screen refresh thread\n");
		
		e = KALTaskStart(mRefreshThread, NULL);
		if (e)
			fatal("Cannot start screen refresh thread\n");
	}
}

void dispSleep(void)
{
	KALTaskSuspend(mRefreshThread);
	dispPrvPower(false);
}

void dispWake(void)
{
	dispPrvPower(true);
	KALTaskResume(mRefreshThread);
}
