/***************************************************************************
 *                       StreamTemplate.cpp  -  description
 *                               -------------------
 *  begin                : Tue March 1 10:40:21 BST 2003
 *  copyright            : (C) 2002 by Dmitri Skachkov
 *  email                : d_skachkov@yahoo.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/


#include "Stream.h"

Stream::Stream()
{
    textstream = 0;
    tempLinks = 0;
    links = 0;
    decoder = new toUnicode();
    recode = false;
    encoding = "ISO 8859-1";
    format = FORMAT_HTML;
    fileOpened = false;
}

void Stream::resetTags()
{
    tag.color = Qt::black;
    tag.href = "";
    tag.base = "";
    tag.h = 0;
    tag.p = tag.br = tag.hr = false;
    tag.ul = tag.ol = tag.dl = false;
    tag.pre = tag.bq = tag.address = false;
    tag.em = tag.cite = tag.var = tag.strong = tag.code = tag.samp = tag.kbd = false;
    tag.dfn = tag.del = false;
    tag.small = tag.i = tag.b = tag.b = tag.tt = tag.big = false;
    tag.strike = tag.u = false;
    tag.a = tag.link = tag.html = tag.head = tag.title = tag.body = false;
    tag.abbr = tag.acronym = tag.div = tag.img = false;
    tagChanged = true;
}

void Stream::setEncoding(QString e)
{
    if (e.length() == 0) e = "ISO 8859-1";
    recode = false;
    encoding = e;
    if (!textstream) return;
    if (encoding == "UTF-8")
    {
    	textstream->setEncoding(QTextStream::UnicodeUTF8);
	return;
    }
    decoder->setCodec(e);
    recode = true;
    if (decoder->Index > -1) return;
    recode = false;
    textstream->setCodec(QTextCodec::codecForName(encoding));
}

QChar Stream::readNextChar()
{
    int p;
    p = 0;
    tagChanged = false;
    newLine = false;
    if (textstream->atEnd())
    {
	return QChar();
    } else {
	textstream->operator>>(c);
	if (int(c) == int(0xFFFFFFA0)) {c = ' ';}
	c = isTag(c);
	if (c.isNull())
	{
	    tagChanged = true;
	    /*while (c.isNull())
	    {
		p = getPosition();
		if (tag.img) break;
		if (atEnd()) break;
		textstream->operator>>(c);
		c = isTag(c);
	    }
	    setPosition(p);*/
	    return QChar();
	}
	if (recode) c = decoder->getQChar(c);
	return c;
    }
}

QChar Stream::isTag(QChar t)
{
    if (!format) return t;
    //tagChanged = false;
    switch (format)
    {
	case FORMAT_HTML:
	    if (t == '\n')
	    {
		if (tag.pre)
		{
		    newLine = true;
		    return QChar();
		}
		return ' ';
	    }
	    if ((t != '<') && (t !='&')) return t;
	    //tagChanged = true;
	    return getTag(t);
	case FORMAT_XML:
	case FORMAT_PLUCKER:
	default:
	    return t;
	    break;
    }
}

QChar Stream::getTag(QChar t)
{
    if (t == '&')
    {
	tempPos = getPosition();
	c = QChar();
	textstream->operator>>(tc);
	switch (tc)
	{
	    case 'l':
		textstream->operator>>(tc);
		if (tc == 't') c= '<';
		break;
	    case 'g':
		textstream->operator>>(tc);
		if (tc == 't') c = '>';
		break;
	    case 'a':
		textstream->operator>>(tc);
		if (tc == 'm')
		{
		    textstream->operator>>(tc);
		    if (tc == 'p') c = '&';
		}
		break;
	    case 'q':
		textstream->operator>>(tc);
		if (tc == 'u')
		{
		    textstream->operator>>(tc);
		    if (tc == 'o')
		    {
			textstream->operator>>(tc);
			if (tc == 't') c = '"';
		    }
		}
		break;
	    case 'n':
		textstream->operator>>(tc);
		if (tc == 'b')
		{
		    textstream->operator>>(tc);
		    if (tc == 's')
		    {
			textstream->operator>>(tc);
			if (tc == 'p') c = ' ';
		    }
		}
		break;
	    default:
		break;
	}
	if (!c.isNull())
	{
	    textstream->operator>>(tc);
	    if (tc == ';') return c;
	}
	setPosition(tempPos);
	return t;
    } else {
	//newParagraph = false;
	tagString = "";
	while (1)
	{
	    if (atEnd()) break;
	    //tempPos = getPosition();
	    textstream->operator>>(tc);
	    if (tc == '>') break;
	    tagString.append(tc);
	}
	//setPosition(tempPos);
	tagString = tagString.stripWhiteSpace();
	tagStringLc = tagString.lower();
	tagStart = true;
	if (tagStringLc.at(0) == '/')
	{
	    tagStart = false;
	    tagStringLc.remove(0,1);
	    tagString.remove(0,1);
	    tagName = tagStringLc;
	} else {
	    tagName = tagStringLc;
	    tagName.replace(QRegExp(" .*$"),"");
	}
	if (tagName == "p")
	{
	    //printf("<p>");
	    tag.p = tagStart;
	    newParagraph = tagStart;
	    newLine = true;
	} else if (tagName == "br")
	{
	   tag.br = tagStart;
	   newLine = true;
	} else if (tagName == "hr")
	{
	    newParagraph = true;
	    newLine = true;
	} else if (tagName == "ul")
	{
	    tag.ul = tagStart;
	} else if (tagName == "li")
	{
	    //newLine = true;
	    tag.li = tagStart;
	} else if (tagName == "ol")
	{
	    tag.ol = tagStart;
	} else if (tagName == "dl")
	{
	    tag.dl = tagStart;
	} else if (tagName == "pre")
	{
	    tag.pre = tagStart;
	} else if (tagName == "strong")
	{
	    tag.strong = tagStart;
	} else if (tagName == "i")
	{
	    tag.i = tagStart;
	} else if (tagName == "b")
	{
	    tag.b = tagStart;
	} else if (tagName == "tt")
	{
	    tag.tt = tagStart;
	} else if (tagName == "big")
	{
	    tag.big = tagStart;
	} else if (tagName == "small")
	{
	    tag.small = tagStart;
	} else if (tagName == "em")
	{
	    tag.em = tagStart;
	} else if (tagName == "dfn")
	{
	    tag.dfn = tagStart;
	} else if (tagName == "samp")
	{
	    tag.samp = tagStart;
	} else if (tagName == "kbd")
	{
	    tag.kbd = tagStart;
	} else if (tagName == "var")
	{
	    tag.var = tagStart;
	} else if (tagName == "cite")
	{
	    tag.cite = tagStart;
	} else if (tagName == "del")
	{
	    tag.del = tagStart;
	} else if ((tagName == "s") || (tagName == "strike"))
	{
	    tag.strike = tagStart;
	} else if (tagName == "u")
	{
	    tag.u = tagStart;
	} else if (tagName == "a")
	{
	    tag.a = tagStart;
	    tag.href="";
	    if (tagStringLc.find(QRegExp("href="),0)>-1)
	    {
		int i;
		i = tagStringLc.find(QRegExp("href="),0) + 5;
		while((i<tagString.length()) && (!tagString.at(i).isSpace()))
		{
		    tag.href.append(tagString.at(i++));
		}
		(tag.href.stripWhiteSpace());
		if (tag.href.at(0) == '\"') tag.href = tag.href.mid(1);
		if (tag.href.at(tag.href.length()-1) == '\"') tag.href= tag.href.left(tag.href.length()-1);
		//printf("href=\"");
		//printf(tag.href);
		//printf("\"\n");
	    }
	} else if (tagName == "link")
	{
	    tag.link = tagStart;
	} else if (tagName == "tag")
	{
	    tag.html = tagStart;
	} else if (tagName == "head")
	{
	    tag.head = tagStart;
	} else if (tagName == "title")
	{
	    tag.title = tagStart;
	} else if (tagName == "body")
	{
	    tag.body = tagStart;
	} else if (tagName == "div")
	{
	    tag.div = tagStart;
	    newLine = tagStart;
	} else if (tagName.find(QRegExp("^h[1-6]$")) > -1)
	{
	    //newParagraph = tagStart;
	    newLine = true;
	    if (tagStart)
	    {
		newParagraph = true;
		tag.h = int((7 - tagName.at(1).digitValue())/2+1);
	    } else {
		//newLine = true;
		tag.h = 0;
	    }
	} else if (tagName == "code")
	{
	    tag.code = tagStart;
	} else if (tagName == "img")
	{
	    tag.imgAlt = "";
	    tag.imgSrc = "";
	    int src;
	    int alt;
	    tag.img = tagStart;
	    src = tagStringLc.find(QRegExp("src="));
	    alt = tagStringLc.find(QRegExp("alt="));
	    if (alt>-1)
	    {
		alt += 4;
		while (1)
		{
		    if ((alt>=tagString.length()) || (tagString.at(alt).isSpace())) break;
		    tag.imgAlt.append(tagString.at(alt));
		    ++alt;
		}
		
	    }
	    if (src>-1)
	    {
		src += 4;
		while (1)
		{
		    if ((src>=tagString.length()) || (tagString.at(src).isSpace())) break;
		    tag.imgSrc.append(tagString.at(src));
		    ++src;
		}
	    }
	    (tag.imgSrc.stripWhiteSpace());
	    if (tag.imgSrc.at(0) == '\"') tag.imgSrc = tag.imgSrc.mid(1);
	    if (tag.imgSrc.at(tag.imgSrc.length()-1) == '\"') tag.imgSrc= tag.imgSrc.left(tag.imgSrc.length()-1);
	    (tag.imgAlt.stripWhiteSpace());
	    if (tag.imgAlt.at(0) == '\"') tag.imgAlt = tag.imgAlt.mid(1);
	    if (tag.imgAlt.at(tag.imgAlt.length()-1) == '\"') tag.imgAlt= tag.imgAlt.left(tag.imgAlt.length()-1);
	} else {
	}
	//printf("<");
	//printf(tagName);
	//printf(">");
	return QChar();	
    }
}

QChar Stream::readPrevChar()
{
    int pos;
    pos = file.at() - 2;
    if (pos < -1) return QChar();
    if (pos < 0) pos = 0;
    file.at(pos);
    textstream->operator>>(c);
    if (recode) c = decoder->getQChar(c);
    return c;
}

QChar Stream::curChar()
{
    position = file.at();
    textstream->operator>>(c);
    file.at(position);
    return c;
}

QString Stream::readWordForward(bool stripLeadingSpaces)
{
    QChar ch;
    int pos;
    tempTag = tag;
    if (atEnd())
    {
	return QString();
    } else {
	word = "";
	while (1)
	{
	    pos = getPosition();
	    if (atEnd()) break;
	    ch = readNextChar();
	    if (ch.isNull())
	    {
		pos = getPosition();
		break;
	    } else if (ch.isSpace() && stripLeadingSpaces && !tag.pre) {
		continue;
	    } else {
		while (1)
		{
		    word.append (ch);
		    pos = getPosition();
		    if (atEnd()) break;
		    ch = readNextChar();
		    if (ch.isSpace()) break;
		    if (ch.isNull())
		    {
			tagChanged = false;
			tag = tempTag;
			//tag.img = false;
			//pos = getPosition();
			break;
		    }
		}
		break;
	    }
	}
	setPosition(pos);
	return word;
    }
}

QString Stream::readWordBack()
{
    return QString();
}

int Stream::getPosition()
{
    return file.at();
}

void Stream::setPosition(int pos)
{
    file.at(pos);
}

int Stream::getSize()
{
    return file.size();
}

void Stream::rewind()
{
    file.at(0);
}

void Stream::gotoEnd()
{
    file.at(file.size());
}

bool Stream::openFile(const QString & filepath)
{
    QFile f;
    if ( !QFile::exists( filepath ) ) return false;
    f.setName( filepath ); 
    if (!f.open( IO_ReadOnly ))
    {
	return false;
    }
    fileOpened = true;
    f.close();
    if (file.isOpen()) file.close();
    file.setName( filepath ); 
    file.open( IO_ReadOnly );
    docSize = file.size();
    numberOfPages = int(docSize/2000) + 1;
    newParagraph = false;
    newLine = false;
    if (textstream) textstream->~QTextStream();
    textstream = new QTextStream(&file);
    resetTags();
    setEncoding(encoding);
    if (links) delete [] links;
    links = 0;
    linksFound = false;
    return true;
}

void Stream::closeFile()
{
    if (textstream) textstream->~QTextStream();
    textstream = 0;
    file.close();
}

bool Stream::atEnd()
{
    return file.atEnd();
}

bool Stream::checkEndParagraph()
{
    bool r;
    r = checkNewParagraph();
    if (r) newParagraph = true;
    return r;
}

bool Stream::checkNewParagraph()
{
    int pos;
    bool r;
    QChar c;
    r = false;
    pos = getPosition();
    if (atEnd())
    {
	r = true;
    } else if (format == FORMAT_HTML)
    {
	if (newParagraph)
	{
	    newParagraph = false;
	    return true;
	}
	return false;
    } else {
	while (!atEnd())
	{
	    //c = readNextChar();
	    textstream->operator>>(c);
	    if (c == '\n')
	    {
		pos = getPosition();
		r = true;
		break;
	    }
	    if (!c.isSpace()) break;
	}
    }
    setPosition(pos);
    return r;
}

bool Stream::checkNewLine()
{
    int pos;
    bool r;
    QChar ch;
    r = false;
    pos = getPosition();
    if (atEnd())
    {
	r = true;
    } else if (format == FORMAT_HTML)
    {
	if (newLine)
	{
	    newLine = false;
	    return true;
	}
	return false;
    } else {
	while (!atEnd())
	{
	    //c = readNextChar();
	    textstream->operator>>(ch);
	    if (ch == '\n')
	    {
		pos = getPosition();
		r = true;
		break;
	    }
	    if (!ch.isSpace()) break;
	}
    }
    setPosition(pos);
    return r;
}

void Stream::findLinks()
{
    if (linksFound) return;
    if (!textstream) return;
    //printf("Looking for links\n");
    int p;
    QChar ch;
    QString tag;
    linksCount = 0;
    tempLinks = new linksProps[500];
    p = getPosition();
    setPosition(0);
    if (format == FORMAT_HTML)
    {
	while (!atEnd())
	{
	    textstream->operator>>(ch);
	    if (ch != '<') continue;
	    tag = "";
	    while (1)
	    {
		if (atEnd()) break;
		textstream->operator>>(ch);
		if (ch == '>')
		{
		    if (tag.lower().find(QRegExp("^a\\s+name\\s*=\\s*\".+\""))>-1)
		    {
			tempLinks[linksCount].position = getPosition();
			tempLinks[linksCount].name = "";
			for (unsigned int i = (tag.find(QRegExp("\"")) + 1); i<tag.length();i++)
			{
			    if ((ch = tag.at(i)) == '"') break;
			    tempLinks[linksCount].name.append(ch);
			}
			++linksCount;
		    }
		    break;
		} else {
		    tag.append(ch);
		}
	    }
	}
	if (linksCount>0)
	{
	    if (links) delete [] links;
	    links = new linksProps[linksCount];
	    for (int i = 0; i<linksCount;i++)
	    {
		links[i] = tempLinks[i];
		//printf("%4d: %7d ",i,links[i].position);
		//printf(links[i].name);
		//printf("\n");
	    }
	}
    }
    delete [] tempLinks;
    tempLinks = 0;
    linksFound = true;
    setPosition(p);
}

int Stream::getInLinkPosition(QString l)
{
    if (!linksFound) findLinks();
    if (!linksCount) return -1;
    if (format == FORMAT_HTML)
    {
	for (int i = 0; i<linksCount;i++)
	{
	    if (l != links[i].name) continue;
	    return links[i].position;
	}
    }
    return -1;
}

QString Stream::getFileDir()
{
    QFileInfo fi(file);
    return fi.dirPath(true);
}

int Stream::getPageNumber()
{
    return int(getPosition()/2000) + 1;
}
