// =============================================================================
//
//      --- kvi_antispam.cpp ---
//
//   This file is part of the KVIrc IRC client distribution
//   Copyright (C) 1999-2000 Szymon Stefanek (stefanek@tin.it)
//
//   This program is FREE software. You can redistribute it and/or
//   modify it under the terms of the GNU General Public License
//   as published by the Free Software Foundation; either version 2
//   of the License, or (at your opinion) any later version.
//
//   This program is distributed in the HOPE that it will be USEFUL,
//   but WITHOUT ANY WARRANTY; without even the implied warranty of
//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//   See the GNU General Public License for more details.
//
//   You should have received a copy of the GNU General Public License
//   along with this program. If not, write to the Free Software Foundation,
//   Inc, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// =============================================================================

#define _KVI_DEBUG_CHECK_RANGE_
#define _KVI_DEBUG_CLASS_NAME_ "KviAntiSpam"

#define _KVI_ANTISPAM_CPP_

#include "kvi_antispam.h"
#include "kvi_string.h"

// Funky research results:

// [05:10:48] <Nataly19> :2http://www.extrababes.com5Go ahead punk, make my day!
// [05:11:04] <treeman>  :-`` -= for mp3's go to http://members.xoom.com/treemansmp3/start.htm
//                      =- ```-
// [05:10:51] <Suzi48498>:P*** **** squirms like CRAZY when **** licks between her...
// [05:12:11] <dea_>     :Hi my name is **** and I just posted pics of myself and
//						other hot girls on my website at http://133.16.114.32/~guest/
// 						tell me what you think!

//
// Yes, it is really annoying when I join a channel and
// I am flooded with all those messages.
// On large-newbie-high-traffic channels I get lagged
// with the Query windows popping up with all that stuff.
// What I can do? Ignore queries?
// Mmmmh
//
// kvi_mayBeSpamMsg : try to guess if text may be a spam message.
// Ideas:
// - A spam message is generally a single PRIVMSG <mynick> :<text>
//   so this function should be (and is) called when
//   a PRIVMSG is received from a person that has no QUERY
//   window open yet.
// - 95% of spam messages contain a URL inside (HTTP, FTP)
// - The other 5% contain words like auto-msg, msg me or query <nickname>
// - There are some really common words:
//   free, mp3, sex, teen, porn, pics, girls, babe, pass, user...
//

bool kvi_mayBeSpamMsg(const char *text)
{
	KviStr tmp(text);
	// Step 1.
	// Check for the URL or a "/QUERY|/MSG" inside the message
	bool bHasUrl = false;
	if( tmp.findFirstIdx("http", false) != -1 )
		bHasUrl = true;
	else if( tmp.findFirstIdx("ftp", false) != -1 )
		bHasUrl = true;
	else if( tmp.findFirstIdx("www", false) != -1 )
		bHasUrl = true;
	else if( tmp.findFirstIdx("auto", false) != -1 )
		bHasUrl = true;
	else if( tmp.findFirstIdx("join #", false) != -1 )
		bHasUrl = true;
	if( !bHasUrl ) return false;

	// Step 2.
	// Find one of the common words

	// Just a minor change. This allows to easily add more "banned" words.
	// Maybe there should be a config dialog for that? Some people like to get
	// porno pics, but do not like mp3's for example... ;-)
	// -- Kristoff

	const char *spam_words[] = {
		"free", "sex", "teen", "pics", "script", "photo", "girl",
		"babes", "pussy", "mp3", "porn", "fuck", "pass", "user",
		"msg", "suck", "britney", "spears", "site", "join", "mediadriven"
	};

	for( unsigned int i = 0; i < sizeof(spam_words) / sizeof(spam_words[0]); i++ ) {
		if( tmp.findFirstIdx(spam_words[i], false) != -1 )
			return true;
	}

	return false;
}
