###########################################################################
# Copyright (c) 1998, Jeffrey Glen Rennie
# All rights reserved.
###########################################################################
namespace eval filter {
    variable scaryChars
    set scaryChars [list \
	    dollar \
	    beginComment \
	    endComment \
	    lessThan \
	    greaterThan \
	    openBrace \
	    closeBrace \
	    beginScript \
	    endScript \
	    backSlash ]

    set scaryProcs [list \
	    cd exec exit fconfigure file glob load open pwd socket source ]

    set isSafe 0

    struct::Declare filter:: Buffer Mode ParseResult CharMap \
	    TheFilter Url

    set theFilterFile "theFiltr.tcl"
}

proc filter::ModifyToken { chatToken } {
    regexp {[0-9]+$} $chatToken number
    set result Filter__$number
}

###########################################################################
# http::filter
# Description:
#    filter calls htmlParse to remove the ads
# args:
#    varText: the name of the variable containing the sequential html
# snippet.  filter modifies the contents of the variable in place.
# returns:
#    nothing

proc filter::Initialize { chatToken url filterMode } {
    set token [ModifyToken $chatToken]
    variable $token
    upvar 0 $token state
    variable scaryChars

    set state [Make]
    SetMode state $filterMode
    SetUrl state $url
    SetCharMap state [mapchar::GenerateMap scaryChars]
    if { $filterMode == "filter" } {
	SetTheFilter state [SelectFilter $url]
    } else {
	SetTheFilter state [SelectBlocker $url]
    }
}

###########################################################################
# This function should only be called once, not repeatedly on the smae text.
# It looks for chars > 128, and replaces them with their html macro for
# the same char.
proc filter::PresubSpecialChars { arrMap text } {
    array set map $arrMap
    foreach char [array names map] {
	binary scan $map($char)\0x00 s macro
	regsub -all $map($char) $text \\&#$macro text
    }
    set text
}

proc filter::Filter { chatToken varText } {
    set token [ModifyToken $chatToken]
    variable $token
    upvar 0 $token state
    variable filterInterp
    set filterMode [GetMode state]

    # little trick here.  If we are in blocking mode, then we don't
    # even have to touch the bit stream.
    if { $filterMode == "block" } {
	upvar $varText origText
	set text $origText
    } else { 
	# it's in filter mode 
	upvar $varText text
    }

    set buffer [GetBuffer state]
    append buffer [PresubSpecialChars [GetCharMap state] $text]
    set buffer [SubSpecialChars [GetCharMap state] $buffer]
    set lstSplitBuffer [Regroup $buffer]
    if [catch {
	interp eval filterInterp "
	array set map \{[GetCharMap state]\}
	set url \{[GetUrl state]\}
	set token $token
	"
	set parseResult [HtmlParse [lindex $lstSplitBuffer 0] \
		[GetTheFilter state]]
    } errMsg ] {
	dbg::puts filter "Error while HtmlParse: $errMsg\n\
		[interp eval filterInterp \
		{set errorInfo }]"
	set text [lindex $lstSplitBuffer 0]
    } else {
	if { $filterMode == "block" } {
	    set text [lindex $lstSplitBuffer 0]
	    foreach url $parseResult {
		AddBlockedURL $url
	    }
	} else {
	    set text $parseResult
	}
    }
    set text [UndoSubSpecialChars [GetCharMap state] $text]
    SetBuffer state [lindex $lstSplitBuffer 1]
}

proc filter::Clean { chatToken } {
    set token [ModifyToken $chatToken]
    variable $token
    upvar 0 $token state
    catch { unset state }
}

proc filter::Finish { chatToken } {
    dbg::puts filter "filter::Finish $chatToken"
    set token [ModifyToken $chatToken]
    variable $token
    upvar 0 $token state

    set result [Flush $token]
    append result [GetBuffer state]
    UndoSubSpecialChars [GetCharMap state] $result
}

proc filter::blockTag { token origTagname slash options args} {
    variable $token
    upvar 0 $token state
    variable filterInterp

    if [llength $args] {
	set text [lindex $args 0]
    } else {
	set text {}
    }
    
    set tagname [string toupper [string trim $origTagname]]
    if { $tagname != "" } {
	if [ catch {
	    AddBlockedURL [interp eval filterInterp \
		    [GetTheFilter state] [list $token [GetUrl state] \
		    $tagname $slash $options $text \
		    [GetCharMap state]]]
	} err ] {
	    dbg::puts filter "error while blocking tag: $err"
	}
    }
}

proc filter::AddBlockedURL { url } {
    set url [RemoveArgsFromUrl $url]
    variable blockedURLs
    if { $url != "" } {
	dbg::puts filter ">>>>>>>>>>>>>>>>>>>> Blocking $url"
	if [info exists blockedURLs($url) ] {
	    incr blockedURLs($url)
	} else {
	    set blockedURLs($url) 1
	}
	urlbase::AddBadURL $url
    }
}

proc filter::RemoveBlockedURL { url } {
    variable blockedURLs
    set result 0
    if { $url != "" && [info exists blockedURLs($url)] } {
	set result 1
	incr blockedURLs($url) -1
	if { $blockedURLs($url) == 0 } {
	    unset blockedURLs($url)
	}
    }
    set result
}

proc filter::RebuildTag { tagname slash options text } {
    if { [string trim $options] == "" } {
	set result "<$slash$tagname>$text"
    } else {
	set result "<$slash$tagname $options>$text"
    }
}

proc filter::filterTag { token origTagname slash options args} {
    variable $token
    upvar 0 $token state
    variable filterInterp

    if [llength $args] {
	set text [lindex $args 0]
    } else {
	set text {}
    }
    
    set tagname [string toupper [string trim $origTagname]]
    if { $tagname != "" } {
	set parseResult [GetParseResult state]
	if [catch { 
	    append parseResult [interp eval filterInterp \
		    [GetTheFilter state] [list $token [GetUrl state] \
		    $tagname $slash $options $text \
		    [GetCharMap state]]]
	} errMessage] {
	    dbg::puts filter "Error filtering tag: $errMessage"
	    append parseResult [RebuildTag $origTagname $slash $options $text]
	}
	SetParseResult state $parseResult
    }
}

proc filter::Flush { token } {
    variable $token
    upvar 0 $token state
    variable filterInterp

    set endOfPage {}
    if [catch {
	set endOfPage [interp eval filterInterp \
		[list [GetTheFilter state]Finish $fToken]]
    } err ] {
    }
    UndoSubSpecialChars [GetCharMap state] $endOfPage
}

###########################################################################
# subSpecialChars
# text: an html snippet
# returns: 
#  The html text modified so that all special characters (characters
# with tcl meaning) are removed and hex values above 0x80 replaces
# them. 

proc filter::SubSpecialChars { arrMap text } {

    set w " \t\r\n"					;# white space
    array set map $arrMap

    # sub out all $ because they do nasty things to us
    regsub -all \\$ $text $map(dollar) text

    # take care of < and > inside of comments
    regsub -all -- "<!--" $text $map(beginComment) text
    regsub -all -- "<--!" $text $map(beginComment) text
    regsub -all -- "--\[$w]*>" $text $map(endComment) text
    while { [regsub -all -- "($map(beginComment)\[^$map(endComment)<\]*)(<)" \
	    $text \\1$map(lessThan) text] } {}
    while { [regsub -all -- "($map(beginComment)\[^$map(endComment)>\]*)(>)" \
	    $text \\1$map(greaterThan) text] } {}

    regsub -all $map(beginComment) $text <!-- text
    regsub -all $map(endComment) $text --> text

    # change script and /script tags into one large script tag
    set scriptTag "(<\[$w]*)script(\[$w]*)"
    set endScriptTag "(<\[$w]*)/script(\[^>]*>)"
    regsub -all -nocase $scriptTag $text \\1$map(beginScript)\\2 text
    regsub -all -nocase $endScriptTag $text \\1$map(endScript)\\2 text
    while { [regsub -all -- "($map(beginScript)\[^$map(endScript)<\]*)(<)" \
	    $text \\1$map(lessThan) text] } {}
    while { [regsub -all -- "($map(beginScript)\[^$map(endScript)>\]*)(>)" \
	    $text \\1$map(greaterThan) text] } {}

    regsub -all $map(beginScript) $text SCRIPT text
    regsub -all $map(endScript) $text /SCRIPT text

    # and handle other tcl-dangerous characters
    while { [regsub -all -- "\\\{" $text $map(openBrace) text] } {}
    while { [regsub -all -- "\\\}" $text $map(closeBrace) text] } {}
    while { [regsub -all -- "\\\\" $text $map(backSlash) text] } {}

    set text
}

###########################################################################
# undoSubSpecialChars
# text: an html snippet that has been returned by subSpecialChars
# returns: an html snippet where the < and > characters have been
# substituted back in for the place-holder put in by subSpecialChars

proc filter::UndoSubSpecialChars { arrMap text } {

    array set map $arrMap

    regsub -all $map(lessThan) $text < text
    regsub -all $map(greaterThan) $text > text
    regsub -all $map(openBrace) $text \{ text
    regsub -all $map(closeBrace) $text \} text
    regsub -all $map(backSlash) $text \\ text
    regsub -all $map(dollar) $text {$} text
    set text
}

###########################################################################
# regroup text
# args:
#    text: html document snippet, beginning with a tag.  Must be
# result of a subSpecialChars call.
# regroup splits the html text into two pieces.  The first piece
# is a complete "block" of html, upto but not including the last 
# tag and all text after it.  The second piece is the left over.
# text: an html snippet
#    returns: a list of two strings
# 
proc filter::Regroup { text } {
    # look for an unterminated tag
    if [ regexp -indices {(<[^>]*)$} $text match] {
	set index [lindex $match 0]
	list [string range $text 0 [expr { $index - 1}]] \
		[string range $text $index end]
	# look for the last complete tag
    } elseif [regexp -indices {(<[^>]*>[^<]*)$} $text match] {
	set index [lindex $match 0]
	list [string range $text 0 [expr { $index - 1}]] \
		[string range $text $index end]
    } else {
	list $text {}
    }
}

proc filter::TestRegroup { text } {
    variable scaryChars
    set arrMap [mapchar::GenerateMap scaryChars]
    set text [SubSpecialChars $arrMap $text]
    set lSplit [Regroup $text]
    set result [list [UndoSubSpecialChars $arrMap [lindex $lSplit 0]] \
	    [UndoSubSpecialChars $arrMap [lindex $lSplit 1]]]	    
}

proc filter::LoadFilterTable { filename} {
    variable filterTable
    variable blockerTable
    variable filterInterp
    
    if {! [interp exists filterInterp] } {
	interp create -safe filterInterp
	interp hide filterInterp puts
	interp hide filterInterp close
	interp hide filterInterp read
	interp hide filterInterp gets
    }
    set fileHandle [open $filename r]
    set fileContents [read $fileHandle]
    catch { unset filterTable }
    interp eval filterInterp $fileContents
    array set filterTable [interp eval filterInterp {set theFilterTable}]
    array set blockerTable [interp eval filterInterp {set theBlockerTable}]
    close $fileHandle
}
###########################################################################
# htmlParse
# Description:
#    htmlParse calls cmd for every tag in the document.
# args:
#    html: The raw html code.  html should have been passed to
# subSpecialChars before being passed to htmlParse to avoid tcl
# attaching special meaning to the characters.
#    cmd: the command to be called for each tag
proc filter::HtmlParse {html cmd {start {}}} {
    variable filterInterp

    # Map braces and backslashes into HTML entities
    regsub -all \{ $html {\&ob;} html
    regsub -all \} $html {\&cb;} html
    regsub -all {\\} $html {\&bsl;} html

    # This pattern matches the parts of an HTML tag
    set w " \t\r\n"					;# white space
    set exp <(/?)(\[^$w>]+)(\[$w]*\[^>]*)>

    # This generates a call to cmd with HTML tag parts
    # \1 is the leading /, if any
    # \2 is the HTML tag name
    # \3 is the parameters to the tag, if any
    # The curly braces at either end group of all the text
    # after the HTML tag, which becomes the last arg to $cmd.
#    set sub "\} \ncatch \{\n$cmd {\\2} {\\1} {\\3} \} \{"
    set sub "\}\n$cmd {\\2} {\\1} {\\3} \{"
    regsub -all $exp $html $sub html

    # This balances the curly braces,
    # and calls $cmd with $start as a pseudo-tag 
    # at the beginning and end of the script.
#     interp eval filterInterp {set parseResult {}}
#     interp eval filterInterp "catch \{\n $cmd {$start} {} {} {$html} \}"
#     interp eval filterInterp "catch \{\n $cmd {$start} / {} {} \}"
#     interp eval filterInterp {set parseResult}

     interp eval filterInterp {set parseResult {}}
     global log
#     puts $log "$cmd {$start} {} {} {$html}"
     interp eval filterInterp "$cmd {$start} {} {} {$html}"
     interp eval filterInterp "$cmd {$start} / {} {}"
     interp eval filterInterp {set parseResult}
}

proc filter::UrlRoot { url } {
    if { ![regexp (.*)/\$ $url x root] && ![regexp (.*/).*\$ $url x root] } {
	set root ""
    }
    set root
}    

proc filter::SelectFilter { url } {
    variable filterTable
    variable filterInterp

    # remove beginning http:// if it exists
    set url [string tolower $url]
    regsub ^http:// $url {} url
    while {! [info exists filter]} {
	if [info exists filterTable($url)] {
	    set filter $filterTable($url)
	} elseif { $url == "" } {
	    error "No default filter!"
	} 
	set url [UrlRoot $url]
    }
    set filter
}    

proc filter::SelectBlocker { url } {
    variable blockerTable
    variable filterInterp
    
    # remove beginning http:// if it exists
    set url [string tolower $url]
    regsub ^http:// $url {} url
    while {! [info exists blocker]} {
	if [info exists blockerTable($url)] {
	    set blocker $blockerTable($url)
	} elseif { $url == "" } {
	    error "No default bocker!"
	} 
	set url [UrlRoot $url]
    }
    set blocker
}    

proc filter::ShouldBlockUrl { url blockingKnownAdURLs} {
    set result 0
    set url [RemoveArgsFromUrl $url]
    if [RemoveBlockedURL $url]  {
	set result 1
    } elseif $blockingKnownAdURLs {
	variable filterInterp
	catch { 
	    set result [interp eval filterInterp [list isKnownAdURL $url]]
	}
	if { !$result && [urlbase::IsBadURL $url] } {
	    set result 1
	}
    }
    set result 
}

proc filter::RemoveArgsFromUrl { url } {
    regsub {\?.*$} $url {} url
    set url
}

filter::LoadFilterTable $filter::theFilterFile
###########################################################################
# Copyright (c) 1998, Jeffrey Glen Rennie
# All rights reserved.
###########################################################################
