Bug 506397 - "Support multiple spam corpus files" [r=Standard8 sr=bienvenu]
This commit is contained in:
Родитель
de3a2097df
Коммит
996ab47df3
|
@ -45,6 +45,7 @@ interface nsIMsgFilterHitNotify;
|
|||
interface nsIMsgWindow;
|
||||
interface nsIMsgDBHdr;
|
||||
interface nsIStreamListener;
|
||||
interface nsILocalFile;
|
||||
|
||||
/**
|
||||
* This interface is still very much under development, and is not yet stable.
|
||||
|
@ -145,7 +146,7 @@ interface nsIMsgTraitDetailListener : nsISupports
|
|||
[array, size_is(tokenCount)] in unsigned long runningPercents);
|
||||
};
|
||||
|
||||
[scriptable, uuid(EDB05079-3F8A-46a6-A596-E7FD8E12216B)]
|
||||
[scriptable, uuid(8EA5BBCA-F735-4d43-8541-D203D8E2FF2F)]
|
||||
interface nsIJunkMailPlugin : nsIMsgFilterPlugin
|
||||
{
|
||||
/**
|
||||
|
@ -307,15 +308,80 @@ interface nsIJunkMailPlugin : nsIMsgFilterPlugin
|
|||
in nsIMsgTraitDetailListener aListener,
|
||||
[optional] in nsIMsgWindow aMsgWindow);
|
||||
|
||||
/**
|
||||
* Gives information on token and message count information in the
|
||||
* training data corpus
|
||||
*
|
||||
* @param aTrait trait id (may be null)
|
||||
* @param aMessageCount count of messages that have been trained with aTrait
|
||||
*
|
||||
* @return token count for all traits
|
||||
*/
|
||||
|
||||
unsigned long corpusCounts(in unsigned long aTrait, out unsigned long aMessageCount);
|
||||
};
|
||||
|
||||
/**
|
||||
* The nsIMsgCorpus interface manages a corpus of mail data used for
|
||||
* statistical analysis of messages.
|
||||
*/
|
||||
[scriptable, uuid(70BAD26F-DFD4-41bd-8FAB-4C09B9C1E845)]
|
||||
interface nsIMsgCorpus : nsISupports
|
||||
{
|
||||
/**
|
||||
* Clear the corpus data for a trait id.
|
||||
*
|
||||
* @param aTrait trait id
|
||||
*/
|
||||
void clearTrait(in unsigned long aTrait);
|
||||
|
||||
/**
|
||||
* Update corpus data from a file.
|
||||
*
|
||||
* @param aFile the file with the data, in the format:
|
||||
*
|
||||
* Format of the trait file for version 1:
|
||||
* [0xFCA93601] (the 01 is the version)
|
||||
* for each trait to write:
|
||||
* [id of trait to write] (0 means end of list)
|
||||
* [number of messages per trait]
|
||||
* for each token with non-zero count
|
||||
* [count]
|
||||
* [length of word]word
|
||||
*
|
||||
* @param aIsAdd should the data be added, or removed? True if
|
||||
* adding, false if removing.
|
||||
*
|
||||
* @param aRemapCount number of items in the parallel arrays aFromTraits,
|
||||
* aToTraits. These arrays allow conversion of the
|
||||
* trait id stored in the file (which may be originated
|
||||
* externally) to the trait id used in the local corpus
|
||||
* (which is defined locally using nsIMsgTraitService, and
|
||||
* mapped by that interface to a globally unique trait
|
||||
* id string).
|
||||
*
|
||||
* @param aFromTraits array of trait ids used in aFile. If aFile contains
|
||||
* trait ids that are not in this array, they are not
|
||||
* remapped, but assummed to be local trait ids.
|
||||
*
|
||||
* @param aToTraits array of trait ids, corresponding to elements of
|
||||
* aFromTraits, that represent the local trait ids to
|
||||
* be used in storing data from aFile into the local corpus.
|
||||
*/
|
||||
void updateData(in nsILocalFile aFile, in boolean aIsAdd,
|
||||
[optional] in unsigned long aRemapCount,
|
||||
[optional, array, size_is(aRemapCount)] in unsigned long aFromTraits,
|
||||
[optional, array, size_is(aRemapCount)] in unsigned long aToTraits);
|
||||
|
||||
/**
|
||||
* Get the corpus count for a token as a string.
|
||||
*
|
||||
* @param aWord string of characters representing the token
|
||||
* @param aTrait trait id
|
||||
*
|
||||
* @return count of that token in the corpus
|
||||
*
|
||||
*/
|
||||
unsigned long getTokenCount(in AUTF8String aWord, in unsigned long aTrait);
|
||||
|
||||
/**
|
||||
* Gives information on token and message count information in the
|
||||
* training data corpus.
|
||||
*
|
||||
* @param aTrait trait id (may be null)
|
||||
* @param aMessageCount count of messages that have been trained with aTrait
|
||||
*
|
||||
* @return token count for all traits
|
||||
*/
|
||||
|
||||
unsigned long corpusCounts(in unsigned long aTrait, out unsigned long aMessageCount);
|
||||
};
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
|
||||
#include "nsISupports.idl"
|
||||
|
||||
[scriptable, uuid(e3e47690-a676-12d6-81c9-00308646b737)]
|
||||
[scriptable, uuid(2CB15FB0-A912-40d3-8882-F2765C75655F)]
|
||||
interface nsIMsgTraitService : nsISupports
|
||||
{
|
||||
/**
|
||||
|
@ -172,4 +172,35 @@ interface nsIMsgTraitService : nsISupports
|
|||
void getEnabledIndices(out unsigned long count,
|
||||
[array, size_is(count)] out unsigned long proIndices,
|
||||
[array, size_is(count)] out unsigned long antiIndices);
|
||||
|
||||
/**
|
||||
* Add a trait as an alias of another trait. An alias is a trait whose
|
||||
* counts will be combined with the aliased trait. This allows multiple sets
|
||||
* of corpus data to be used to provide information on a single message
|
||||
* characteristic, while allowing each individual set of corpus data to
|
||||
* retain its own identity.
|
||||
*
|
||||
* @param aTraitIndex the internal identifier for the aliased trait
|
||||
* @param aTraitAlias the internal identifier for the alias to add
|
||||
*/
|
||||
void addAlias(in unsigned long aTraitIndex, in unsigned long aTraitAlias);
|
||||
|
||||
/**
|
||||
* Removes a trait as an alias of another trait.
|
||||
*
|
||||
* @param aTraitIndex the internal identifier for the aliased trait
|
||||
* @param aTraitAlias the internal identifier for the alias to remove
|
||||
*/
|
||||
void removeAlias(in unsigned long aTraitIndex, in unsigned long aTraitAlias);
|
||||
|
||||
/**
|
||||
* Get an array of trait aliases for a trait index, if any
|
||||
*
|
||||
* @param aTraitIndex the internal identifier for the aliased trait
|
||||
* @param aLength length of array of aliases
|
||||
* @param aAliases array of internal identifiers for aliases
|
||||
*/
|
||||
void getAliases(in unsigned long aTraitIndex, out unsigned long aLength,
|
||||
[retval, array, size_is(aLength)] out unsigned long aAliases);
|
||||
|
||||
};
|
||||
|
|
|
@ -165,6 +165,64 @@ nsMsgTraitService.prototype =
|
|||
aAntiIndices.value = antiIndices;
|
||||
return;
|
||||
},
|
||||
|
||||
addAlias: function addAlias(aTraitIndex, aTraitAliasIndex)
|
||||
{
|
||||
let aliasesString = "";
|
||||
try {
|
||||
aliasesString = traitsBranch.getCharPref("aliases." + aTraitIndex);
|
||||
}
|
||||
catch (e) {}
|
||||
let aliases;
|
||||
if (aliasesString.length)
|
||||
aliases = aliasesString.split(",");
|
||||
else
|
||||
aliases = [];
|
||||
if (aliases.indexOf(aTraitAliasIndex.toString()) == -1)
|
||||
{
|
||||
aliases.push(aTraitAliasIndex);
|
||||
traitsBranch.setCharPref("aliases." + aTraitIndex, aliases.join());
|
||||
}
|
||||
},
|
||||
|
||||
removeAlias: function removeAlias(aTraitIndex, aTraitAliasIndex)
|
||||
{
|
||||
let aliasesString = "";
|
||||
try {
|
||||
aliasesString = traitsBranch.getCharPref("aliases." + aTraitIndex);
|
||||
}
|
||||
catch (e) {
|
||||
return;
|
||||
}
|
||||
let aliases;
|
||||
if (aliasesString.length)
|
||||
aliases = aliasesString.split(",");
|
||||
else
|
||||
aliases = [];
|
||||
let location;
|
||||
if ((location = aliases.indexOf(aTraitAliasIndex.toString())) != -1)
|
||||
{
|
||||
aliases.splice(location, 1);
|
||||
traitsBranch.setCharPref("aliases." + aTraitIndex, aliases.join());
|
||||
}
|
||||
},
|
||||
|
||||
getAliases: function getAliases(aTraitIndex, aLength)
|
||||
{
|
||||
let aliasesString = "";
|
||||
try {
|
||||
aliasesString = traitsBranch.getCharPref("aliases." + aTraitIndex);
|
||||
}
|
||||
catch (e) {}
|
||||
|
||||
let aliases;
|
||||
if (aliasesString.length)
|
||||
aliases = aliasesString.split(",");
|
||||
else
|
||||
aliases = [];
|
||||
aLength.value = aliases.length;
|
||||
return aliases;
|
||||
},
|
||||
};
|
||||
|
||||
// initialization
|
||||
|
|
|
@ -90,6 +90,30 @@ function run_test()
|
|||
do_check_eq(proArray.value[1], proIndex);
|
||||
do_check_eq(antiArray.value[1], antiIndex);
|
||||
|
||||
// check of aliases
|
||||
// add three random aliases
|
||||
ts.addAlias(1, 501);
|
||||
ts.addAlias(1, 502);
|
||||
ts.addAlias(1, 601);
|
||||
let aliases = ts.getAliases(1, {});
|
||||
do_check_eq(aliases[0], 501);
|
||||
do_check_eq(aliases[1], 502);
|
||||
do_check_eq(aliases[2], 601);
|
||||
|
||||
// remove the middle one
|
||||
ts.removeAlias(1, 502);
|
||||
aliases = ts.getAliases(1, {});
|
||||
do_check_eq(aliases.length, 2);
|
||||
do_check_eq(aliases[0], 501);
|
||||
do_check_eq(aliases[1], 601);
|
||||
|
||||
// try to add an existing value
|
||||
ts.addAlias(1, 501);
|
||||
aliases = ts.getAliases(1, {});
|
||||
do_check_eq(aliases.length, 2);
|
||||
do_check_eq(aliases[0], 501);
|
||||
do_check_eq(aliases[1], 601);
|
||||
|
||||
// now let's make sure this got saved in preferences
|
||||
do_check_eq(proId, traitsBranch.getCharPref("id." + proIndex));
|
||||
do_check_eq(proName, traitsBranch.getCharPref("name." + proIndex));
|
||||
|
|
|
@ -87,6 +87,7 @@
|
|||
#include "nsIncompleteGamma.h"
|
||||
#include <math.h>
|
||||
#include <prmem.h>
|
||||
#include "nsIMsgTraitService.h"
|
||||
|
||||
static PRLogModuleInfo *BayesianFilterLogModule = nsnull;
|
||||
|
||||
|
@ -1201,7 +1202,8 @@ NS_IMETHODIMP TokenStreamListener::OnStopRequest(nsIRequest *aRequest, nsISuppor
|
|||
|
||||
/* Implementation file */
|
||||
|
||||
NS_IMPL_ISUPPORTS2(nsBayesianFilter, nsIMsgFilterPlugin, nsIJunkMailPlugin)
|
||||
NS_IMPL_ISUPPORTS3(nsBayesianFilter, nsIMsgFilterPlugin,
|
||||
nsIJunkMailPlugin, nsIMsgCorpus)
|
||||
|
||||
nsBayesianFilter::nsBayesianFilter()
|
||||
: mTrainingDataDirty(PR_FALSE)
|
||||
|
@ -1298,7 +1300,7 @@ public:
|
|||
PRUint32 aNumMessagesToClassify,
|
||||
const char **aMessageURIs)
|
||||
: mFilter(aFilter),
|
||||
mSupports(aFilter),
|
||||
mJunkMailPlugin(aFilter),
|
||||
mJunkListener(aJunkListener),
|
||||
mTraitListener(aTraitListener),
|
||||
mDetailListener(aDetailListener),
|
||||
|
@ -1321,7 +1323,7 @@ public:
|
|||
PRUint32 aNumMessagesToClassify,
|
||||
const char **aMessageURIs)
|
||||
: mFilter(aFilter),
|
||||
mSupports(aFilter),
|
||||
mJunkMailPlugin(aFilter),
|
||||
mJunkListener(aJunkListener),
|
||||
mTraitListener(nsnull),
|
||||
mDetailListener(nsnull),
|
||||
|
@ -1378,7 +1380,7 @@ public:
|
|||
|
||||
private:
|
||||
nsBayesianFilter* mFilter;
|
||||
nsCOMPtr<nsISupports> mSupports;
|
||||
nsCOMPtr<nsIJunkMailPlugin> mJunkMailPlugin;
|
||||
nsCOMPtr<nsIJunkMailClassificationListener> mJunkListener;
|
||||
nsCOMPtr<nsIMsgTraitClassificationListener> mTraitListener;
|
||||
nsCOMPtr<nsIMsgTraitDetailListener> mDetailListener;
|
||||
|
@ -1490,6 +1492,14 @@ void nsBayesianFilter::classifyMessage(
|
|||
nsAutoTArray<PRUint32, kTraitAutoCapacity> numProMessages;
|
||||
// anti message counts per trait index
|
||||
nsAutoTArray<PRUint32, kTraitAutoCapacity> numAntiMessages;
|
||||
// array of pro aliases per trait index
|
||||
nsAutoTArray<PRUint32*, kTraitAutoCapacity > proAliasArrays;
|
||||
// number of pro aliases per trait index
|
||||
nsAutoTArray<PRUint32, kTraitAutoCapacity > proAliasesLengths;
|
||||
// array of anti aliases per trait index
|
||||
nsAutoTArray<PRUint32*, kTraitAutoCapacity> antiAliasArrays;
|
||||
// number of anti aliases per trait index
|
||||
nsAutoTArray<PRUint32, kTraitAutoCapacity > antiAliasesLengths;
|
||||
// construct the outgoing listener arrays
|
||||
nsAutoTArray<PRUint32, kTraitAutoCapacity> traits;
|
||||
nsAutoTArray<PRUint32, kTraitAutoCapacity> percents;
|
||||
|
@ -1499,14 +1509,64 @@ void nsBayesianFilter::classifyMessage(
|
|||
percents.SetCapacity(traitCount);
|
||||
numProMessages.SetCapacity(traitCount);
|
||||
numAntiMessages.SetCapacity(traitCount);
|
||||
proAliasesLengths.SetCapacity(traitCount);
|
||||
antiAliasesLengths.SetCapacity(traitCount);
|
||||
proAliasArrays.SetCapacity(traitCount);
|
||||
antiAliasArrays.SetCapacity(traitCount);
|
||||
}
|
||||
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsIMsgTraitService> traitService(do_GetService("@mozilla.org/msg-trait-service;1", &rv));
|
||||
if (NS_FAILED(rv))
|
||||
{
|
||||
NS_ERROR("Failed to get trait service");
|
||||
PR_LOG(BayesianFilterLogModule, PR_LOG_ERROR, ("Failed to get trait service"));
|
||||
}
|
||||
|
||||
// get aliases and message counts for the pro and anti traits
|
||||
for (PRUint32 traitIndex = 0; traitIndex < traitCount; traitIndex++)
|
||||
{
|
||||
numProMessages.AppendElement(
|
||||
mCorpus.getMessageCount(aProTraits[traitIndex]));
|
||||
numAntiMessages.AppendElement(
|
||||
mCorpus.getMessageCount(aAntiTraits[traitIndex]));
|
||||
nsresult rv;
|
||||
|
||||
// pro trait
|
||||
PRUint32 proAliasesLength = 0;
|
||||
PRUint32* proAliases = nsnull;
|
||||
PRUint32 proTrait = aProTraits[traitIndex];
|
||||
if (traitService)
|
||||
{
|
||||
rv = traitService->GetAliases(proTrait, &proAliasesLength, &proAliases);
|
||||
if (NS_FAILED(rv))
|
||||
{
|
||||
NS_ERROR("trait service failed to get aliases");
|
||||
PR_LOG(BayesianFilterLogModule, PR_LOG_ERROR, ("trait service failed to get aliases"));
|
||||
}
|
||||
}
|
||||
proAliasesLengths.AppendElement(proAliasesLength);
|
||||
proAliasArrays.AppendElement(proAliases);
|
||||
PRUint32 proMessageCount = mCorpus.getMessageCount(proTrait);
|
||||
for (PRUint32 aliasIndex = 0; aliasIndex < proAliasesLength; aliasIndex++)
|
||||
proMessageCount += mCorpus.getMessageCount(proAliases[aliasIndex]);
|
||||
numProMessages.AppendElement(proMessageCount);
|
||||
|
||||
// anti trait
|
||||
PRUint32 antiAliasesLength = 0;
|
||||
PRUint32* antiAliases = nsnull;
|
||||
PRUint32 antiTrait = aAntiTraits[traitIndex];
|
||||
if (traitService)
|
||||
{
|
||||
rv = traitService->GetAliases(antiTrait, &antiAliasesLength, &antiAliases);
|
||||
if (NS_FAILED(rv))
|
||||
{
|
||||
NS_ERROR("trait service failed to get aliases");
|
||||
PR_LOG(BayesianFilterLogModule, PR_LOG_ERROR, ("trait service failed to get aliases"));
|
||||
}
|
||||
}
|
||||
antiAliasesLengths.AppendElement(antiAliasesLength);
|
||||
antiAliasArrays.AppendElement(antiAliases);
|
||||
PRUint32 antiMessageCount = mCorpus.getMessageCount(antiTrait);
|
||||
for (PRUint32 aliasIndex = 0; aliasIndex < antiAliasesLength; aliasIndex++)
|
||||
antiMessageCount += mCorpus.getMessageCount(antiAliases[aliasIndex]);
|
||||
numAntiMessages.AppendElement(antiMessageCount);
|
||||
}
|
||||
|
||||
for (PRUint32 i = 0; i < tokenCount; ++i)
|
||||
|
@ -1517,10 +1577,17 @@ void nsBayesianFilter::classifyMessage(
|
|||
continue;
|
||||
for (PRUint32 traitIndex = 0; traitIndex < traitCount; traitIndex++)
|
||||
{
|
||||
double proCount =
|
||||
static_cast<double>(mCorpus.getTraitCount(t, aProTraits[traitIndex]));
|
||||
double antiCount =
|
||||
static_cast<double>(mCorpus.getTraitCount(t, aAntiTraits[traitIndex]));
|
||||
PRUint32 iProCount = mCorpus.getTraitCount(t, aProTraits[traitIndex]);
|
||||
// add in any counts for aliases to proTrait
|
||||
for (PRUint32 aliasIndex = 0; aliasIndex < proAliasesLengths[traitIndex]; aliasIndex++)
|
||||
iProCount += mCorpus.getTraitCount(t, proAliasArrays[traitIndex][aliasIndex]);
|
||||
double proCount = static_cast<double>(iProCount);
|
||||
|
||||
PRUint32 iAntiCount = mCorpus.getTraitCount(t, aAntiTraits[traitIndex]);
|
||||
// add in any counts for aliases to antiTrait
|
||||
for (PRUint32 aliasIndex = 0; aliasIndex < antiAliasesLengths[traitIndex]; aliasIndex++)
|
||||
iAntiCount += mCorpus.getTraitCount(t, antiAliasArrays[traitIndex][aliasIndex]);
|
||||
double antiCount = static_cast<double>(iAntiCount);
|
||||
|
||||
double prob, denom;
|
||||
// Prevent a divide by zero error by setting defaults for prob
|
||||
|
@ -1715,6 +1782,12 @@ void nsBayesianFilter::classifyMessage(
|
|||
traits.AppendElement(aProTraits[traitIndex]);
|
||||
percents.AppendElement(proPercent);
|
||||
}
|
||||
|
||||
// free aliases arrays returned from XPCOM
|
||||
if (proAliasesLengths[traitIndex])
|
||||
NS_Free(proAliasArrays[traitIndex]);
|
||||
if (antiAliasesLengths[traitIndex])
|
||||
NS_Free(antiAliasArrays[traitIndex]);
|
||||
}
|
||||
|
||||
if (aTraitListener)
|
||||
|
@ -1903,7 +1976,7 @@ public:
|
|||
nsTArray<PRUint32>& aNewClassifications,
|
||||
nsIJunkMailClassificationListener* aJunkListener,
|
||||
nsIMsgTraitClassificationListener* aTraitListener)
|
||||
: mFilter(filter), mSupports(filter), mJunkListener(aJunkListener),
|
||||
: mFilter(filter), mJunkMailPlugin(filter), mJunkListener(aJunkListener),
|
||||
mTraitListener(aTraitListener),
|
||||
mOldClassifications(aOldClassifications),
|
||||
mNewClassifications(aNewClassifications)
|
||||
|
@ -1920,7 +1993,7 @@ public:
|
|||
|
||||
private:
|
||||
nsBayesianFilter* mFilter;
|
||||
nsCOMPtr<nsISupports> mSupports;
|
||||
nsCOMPtr<nsIJunkMailPlugin> mJunkMailPlugin;
|
||||
nsCOMPtr<nsIJunkMailClassificationListener> mJunkListener;
|
||||
nsCOMPtr<nsIMsgTraitClassificationListener> mTraitListener;
|
||||
nsTArray<PRUint32> mOldClassifications;
|
||||
|
@ -2122,6 +2195,8 @@ NS_IMETHODIMP nsBayesianFilter::DetailMessage(const char *aMsgURI,
|
|||
return tokenizeMessage(aMsgURI, aMsgWindow, analyzer);
|
||||
}
|
||||
|
||||
// nsIMsgCorpus implementation
|
||||
|
||||
NS_IMETHODIMP nsBayesianFilter::CorpusCounts(PRUint32 aTrait,
|
||||
PRUint32 *aMessageCount,
|
||||
PRUint32 *aTokenCount)
|
||||
|
@ -2137,6 +2212,33 @@ NS_IMETHODIMP nsBayesianFilter::CorpusCounts(PRUint32 aTrait,
|
|||
return NS_ERROR_FAILURE;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP nsBayesianFilter::ClearTrait(PRUint32 aTrait)
|
||||
{
|
||||
return mCorpus.ClearTrait(aTrait);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsBayesianFilter::UpdateData(nsILocalFile *aFile,
|
||||
PRBool aIsAdd,
|
||||
PRUint32 aRemapCount,
|
||||
PRUint32 *aFromTraits,
|
||||
PRUint32 *aToTraits)
|
||||
{
|
||||
return mCorpus.UpdateData(aFile, aIsAdd, aRemapCount, aFromTraits, aToTraits);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsBayesianFilter::GetTokenCount(const nsACString &aWord,
|
||||
PRUint32 aTrait,
|
||||
PRUint32 *aCount)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aCount);
|
||||
CorpusToken* t = mCorpus.get(PromiseFlatCString(aWord).get());
|
||||
PRUint32 count = mCorpus.getTraitCount(t, aTrait);
|
||||
*aCount = count;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* Corpus Store */
|
||||
|
||||
/*
|
||||
|
@ -2262,7 +2364,8 @@ PRBool CorpusStore::writeTokens(FILE* stream, PRBool shrink, PRUint32 aTraitId)
|
|||
return PR_TRUE;
|
||||
}
|
||||
|
||||
PRBool CorpusStore::readTokens(FILE* stream, PRInt64 fileSize, PRUint32 aTraitId)
|
||||
PRBool CorpusStore::readTokens(FILE* stream, PRInt64 fileSize,
|
||||
PRUint32 aTraitId, PRBool aIsAdd)
|
||||
{
|
||||
PRUint32 tokenCount;
|
||||
if (readUInt32(stream, &tokenCount) != 1)
|
||||
|
@ -2302,7 +2405,10 @@ PRBool CorpusStore::readTokens(FILE* stream, PRInt64 fileSize, PRUint32 aTraitId
|
|||
break;
|
||||
fpos += size;
|
||||
buffer[size] = '\0';
|
||||
add(buffer, aTraitId, count);
|
||||
if (aIsAdd)
|
||||
add(buffer, aTraitId, count);
|
||||
else
|
||||
remove(buffer, aTraitId, count);
|
||||
}
|
||||
|
||||
delete[] buffer;
|
||||
|
@ -2483,8 +2589,8 @@ void CorpusStore::readTrainingData()
|
|||
(memcmp(cookie, kMagicCookie, sizeof(cookie)) == 0) &&
|
||||
(readUInt32(stream, &goodMessageCount) == 1) &&
|
||||
(readUInt32(stream, &junkMessageCount) == 1) &&
|
||||
readTokens(stream, fileSize, kGoodTrait) &&
|
||||
readTokens(stream, fileSize, kJunkTrait))) {
|
||||
readTokens(stream, fileSize, kGoodTrait, PR_TRUE) &&
|
||||
readTokens(stream, fileSize, kJunkTrait, PR_TRUE))) {
|
||||
NS_WARNING("failed to read training data.");
|
||||
PR_LOG(BayesianFilterLogModule, PR_LOG_ERROR, ("failed to read training data."));
|
||||
}
|
||||
|
@ -2508,39 +2614,9 @@ void CorpusStore::readTrainingData()
|
|||
if (NS_FAILED(rv) || !exists)
|
||||
return;
|
||||
|
||||
rv = mTraitFile->OpenANSIFileDesc("rb", &stream);
|
||||
rv = UpdateData(mTraitFile, PR_TRUE, 0, nsnull, nsnull);
|
||||
|
||||
if (NS_FAILED(rv))
|
||||
return;
|
||||
|
||||
rv = mTraitFile->GetFileSize(&fileSize);
|
||||
if (NS_FAILED(rv))
|
||||
return;
|
||||
|
||||
PRBool error;
|
||||
|
||||
while(1) // break on error or done
|
||||
{
|
||||
if (error = (fread(cookie, sizeof(cookie), 1, stream) != 1))
|
||||
break;
|
||||
|
||||
if (error = memcmp(cookie, kTraitCookie, sizeof(cookie)))
|
||||
break;
|
||||
|
||||
PRUint32 trait;
|
||||
while ( !(error = (readUInt32(stream, &trait) != 1)) && trait)
|
||||
{
|
||||
PRUint32 count;
|
||||
if (error = (readUInt32(stream, &count) != 1))
|
||||
break;
|
||||
|
||||
setMessageCount(trait, count);
|
||||
|
||||
if (error = !readTokens(stream, fileSize, trait))
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (error)
|
||||
{
|
||||
NS_WARNING("failed to read training data.");
|
||||
PR_LOG(BayesianFilterLogModule, PR_LOG_ERROR, ("failed to read training data."));
|
||||
|
@ -2684,3 +2760,87 @@ void CorpusStore::setMessageCount(PRUint32 aTraitId, PRUint32 aCount)
|
|||
mMessageCounts[index] = aCount;
|
||||
}
|
||||
}
|
||||
|
||||
nsresult
|
||||
CorpusStore::UpdateData(nsILocalFile *aFile,
|
||||
PRBool aIsAdd,
|
||||
PRUint32 aRemapCount,
|
||||
PRUint32 *aFromTraits,
|
||||
PRUint32 *aToTraits)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aFile);
|
||||
if (aRemapCount)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aFromTraits);
|
||||
NS_ENSURE_ARG_POINTER(aToTraits);
|
||||
}
|
||||
|
||||
FILE* stream;
|
||||
nsresult rv = aFile->OpenANSIFileDesc("rb", &stream);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
PRInt64 fileSize;
|
||||
rv = aFile->GetFileSize(&fileSize);
|
||||
|
||||
PRBool error;
|
||||
while(NS_SUCCEEDED(rv)) // break on error or done
|
||||
{
|
||||
char cookie[4];
|
||||
if (error = (fread(cookie, sizeof(cookie), 1, stream) != 1))
|
||||
break;
|
||||
|
||||
if (error = memcmp(cookie, kTraitCookie, sizeof(cookie)))
|
||||
break;
|
||||
|
||||
PRUint32 fileTrait;
|
||||
while ( !(error = (readUInt32(stream, &fileTrait) != 1)) && fileTrait)
|
||||
{
|
||||
PRUint32 count;
|
||||
if (error = (readUInt32(stream, &count) != 1))
|
||||
break;
|
||||
|
||||
PRUint32 localTrait = fileTrait;
|
||||
// remap the trait
|
||||
for (PRUint32 i = 0; i < aRemapCount; i++)
|
||||
{
|
||||
if (aFromTraits[i] == fileTrait)
|
||||
localTrait = aToTraits[i];
|
||||
}
|
||||
|
||||
PRUint32 messageCount = getMessageCount(localTrait);
|
||||
if (aIsAdd)
|
||||
messageCount += count;
|
||||
else if (count > messageCount)
|
||||
messageCount = 0;
|
||||
else
|
||||
messageCount -= count;
|
||||
setMessageCount(localTrait, messageCount);
|
||||
|
||||
if (error = !readTokens(stream, fileSize, localTrait, aIsAdd))
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
fclose(stream);
|
||||
if (error || NS_FAILED(rv))
|
||||
return NS_ERROR_FAILURE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsresult CorpusStore::ClearTrait(PRUint32 aTrait)
|
||||
{
|
||||
// clear message counts
|
||||
setMessageCount(aTrait, 0);
|
||||
|
||||
// clear token counts
|
||||
PRUint32 tokenCount = countTokens();
|
||||
TokenEnumeration tokens = getTokens();
|
||||
while (tokens.hasMoreTokens())
|
||||
{
|
||||
CorpusToken* token = static_cast<CorpusToken*>(tokens.nextToken());
|
||||
PRInt32 wordCount = static_cast<PRInt32>(getTraitCount(token, aTrait));
|
||||
updateTrait(token, aTrait, -wordCount);
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
|
|
@ -277,6 +277,49 @@ public:
|
|||
*/
|
||||
PRUint32 getTraitCount(CorpusToken *token, PRUint32 aTraitId);
|
||||
|
||||
/**
|
||||
* Add (or remove) data from a particular file to the corpus data.
|
||||
*
|
||||
* @param aFile the file with the data, in the format:
|
||||
*
|
||||
* Format of the trait file for version 1:
|
||||
* [0xFCA93601] (the 01 is the version)
|
||||
* for each trait to write:
|
||||
* [id of trait to write] (0 means end of list)
|
||||
* [number of messages per trait]
|
||||
* for each token with non-zero count
|
||||
* [count]
|
||||
* [length of word]word
|
||||
*
|
||||
* @param aIsAdd should the data be added, or removed? PR_TRUE if adding,
|
||||
* else removing.
|
||||
*
|
||||
* @param aRemapCount number of items in the parallel arrays aFromTraits,
|
||||
* aToTraits. These arrays allow conversion of the
|
||||
* trait id stored in the file (which may be originated
|
||||
* externally) to the trait id used in the local corpus
|
||||
* (which is defined locally using nsIMsgTraitService).
|
||||
*
|
||||
* @param aFromTraits array of trait ids used in aFile. If aFile contains
|
||||
* trait ids that are not in this array, they are not
|
||||
* remapped, but assummed to be local trait ids.
|
||||
*
|
||||
* @param aToTraits array of trait ids, corresponding to elements of
|
||||
* aFromTraits, that represent the local trait ids to be
|
||||
* used in storing data from aFile into the local corpus.
|
||||
*
|
||||
*/
|
||||
nsresult UpdateData(nsILocalFile *aFile, PRBool aIsAdd,
|
||||
PRUint32 aRemapCount, PRUint32 *aFromTraits,
|
||||
PRUint32 *aToTraits);
|
||||
|
||||
/**
|
||||
* remove all counts (message and tokens) for a trait id
|
||||
*
|
||||
* @param aTrait trait id for the trait to remove
|
||||
*/
|
||||
nsresult ClearTrait(PRUint32 aTrait);
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
|
@ -291,8 +334,16 @@ protected:
|
|||
|
||||
/**
|
||||
* read token strings from the data file
|
||||
*
|
||||
* @param stream file stream with token data
|
||||
* @param fileSize file size
|
||||
* @param aTraitId id for the trait whose counts will be read
|
||||
* @param aIsAdd true to add the counts, false to remove them
|
||||
*
|
||||
* @return true if successful, false if error
|
||||
*/
|
||||
PRBool readTokens(FILE* stream, PRInt64 fileSize, PRUint32 aTraitId);
|
||||
PRBool readTokens(FILE* stream, PRInt64 fileSize, PRUint32 aTraitId,
|
||||
PRBool aIsAdd);
|
||||
|
||||
/**
|
||||
* write token strings to the data file
|
||||
|
@ -326,11 +377,12 @@ protected:
|
|||
// the corresponding trait ID
|
||||
};
|
||||
|
||||
class nsBayesianFilter : public nsIJunkMailPlugin {
|
||||
class nsBayesianFilter : public nsIJunkMailPlugin, nsIMsgCorpus {
|
||||
public:
|
||||
NS_DECL_ISUPPORTS
|
||||
NS_DECL_NSIMSGFILTERPLUGIN
|
||||
NS_DECL_NSIJUNKMAILPLUGIN
|
||||
NS_DECL_NSIMSGCORPUS
|
||||
|
||||
nsBayesianFilter();
|
||||
virtual ~nsBayesianFilter();
|
||||
|
|
Двоичные данные
mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat
Normal file
Двоичные данные
mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat
Normal file
Двоичный файл не отображается.
|
@ -0,0 +1,6 @@
|
|||
From - Sat Jan 26 08:43:42 2008
|
||||
Subject: test1
|
||||
Content-Type: text/plain; charset=iso-8859-1
|
||||
|
||||
important
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
From - Sat Jan 26 08:43:42 2008
|
||||
Subject: test2
|
||||
Content-Type: text/plain; charset=iso-8859-1
|
||||
|
||||
work
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
From - Sat Jan 26 08:43:42 2008
|
||||
Subject: test3
|
||||
Content-Type: text/plain; charset=iso-8859-1
|
||||
|
||||
very important work
|
||||
|
Двоичные данные
mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat
Normal file
Двоичные данные
mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat
Normal file
Двоичный файл не отображается.
|
@ -520,10 +520,11 @@ function startCommand()
|
|||
case kCounts:
|
||||
// test counts
|
||||
let msgCount = {};
|
||||
let tokenCount = nsIJunkMailPlugin.corpusCounts(null, {});
|
||||
nsIJunkMailPlugin.corpusCounts(kJunkTrait, msgCount);
|
||||
let nsIMsgCorpus = nsIJunkMailPlugin.QueryInterface(Ci.nsIMsgCorpus);
|
||||
let tokenCount = nsIMsgCorpus.corpusCounts(null, {});
|
||||
nsIMsgCorpus.corpusCounts(kJunkTrait, msgCount);
|
||||
let junkCount = msgCount.value;
|
||||
nsIJunkMailPlugin.corpusCounts(kGoodTrait, msgCount);
|
||||
nsIMsgCorpus.corpusCounts(kGoodTrait, msgCount);
|
||||
let goodCount = msgCount.value;
|
||||
print("tokenCount, junkCount, goodCount is " + tokenCount, junkCount, goodCount);
|
||||
do_check_eq(tokenCount, gTest.tokenCount);
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Kent James <kent@caspia.com>.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2009
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
// Tests corpus management functions using nsIMsgCorpus
|
||||
|
||||
var msgCorpus =
|
||||
Cc["@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter"]
|
||||
.getService(Ci.nsIMsgCorpus);
|
||||
|
||||
// tokens found in the test corpus file. trait 1001 was trained with
|
||||
// 2 messages, and trait 1003 with 1.
|
||||
|
||||
var tokenData = [
|
||||
// [traitid, count, token]
|
||||
[1001, 0, "iDoNotExist"],
|
||||
[1001, 1, "linecount"],
|
||||
[1001, 2, "envelope-to:kenttest@caspia.com"],
|
||||
[1003, 0, "iAlsoDoNotExist"],
|
||||
[1003, 0, "isjunk"], // in 1001 but not 1003
|
||||
[1003, 1, "linecount"],
|
||||
[1003, 1, "subject:test"],
|
||||
[1003, 1, "envelope-to:kenttest@caspia.com"],
|
||||
]
|
||||
|
||||
// list of tests
|
||||
|
||||
var gTests =
|
||||
[
|
||||
// train two different combinations of messages
|
||||
function checkLoadOnce() {
|
||||
let fileName = "msgCorpus.dat";
|
||||
let file = do_get_file("resources/" + fileName);
|
||||
msgCorpus.updateData(file, true);
|
||||
|
||||
// check message counts
|
||||
let messageCount = {};
|
||||
msgCorpus.corpusCounts(1001, messageCount);
|
||||
do_check_eq(2, messageCount.value);
|
||||
msgCorpus.corpusCounts(1003, messageCount);
|
||||
do_check_eq(1, messageCount.value);
|
||||
|
||||
for (i = 0; i < tokenData.length; i++) {
|
||||
let id = tokenData[i][0];
|
||||
let count = tokenData[i][1];
|
||||
let word = tokenData[i][2];
|
||||
do_check_eq(count, msgCorpus.getTokenCount(word, id));
|
||||
}
|
||||
},
|
||||
function checkLoadTwice() {
|
||||
let fileName = "msgCorpus.dat";
|
||||
let file = do_get_file("resources/" + fileName);
|
||||
msgCorpus.updateData(file, true);
|
||||
|
||||
// check message counts
|
||||
let messageCount = {};
|
||||
msgCorpus.corpusCounts(1001, messageCount);
|
||||
do_check_eq(4, messageCount.value);
|
||||
msgCorpus.corpusCounts(1003, messageCount);
|
||||
do_check_eq(2, messageCount.value);
|
||||
|
||||
for (i = 0; i < tokenData.length; i++) {
|
||||
let id = tokenData[i][0];
|
||||
let count = 2 * tokenData[i][1];
|
||||
let word = tokenData[i][2];
|
||||
do_check_eq(count, msgCorpus.getTokenCount(word, id));
|
||||
}
|
||||
},
|
||||
// remap the ids in the file to different local ids
|
||||
function loadWithRemap() {
|
||||
let fileName = "msgCorpus.dat";
|
||||
let file = do_get_file("resources/" + fileName);
|
||||
msgCorpus.updateData(file, true, 2, [1001, 1003], [1, 3]);
|
||||
|
||||
for (i = 0; i < tokenData.length; i++) {
|
||||
let id = tokenData[i][0] - 1000;
|
||||
let count = tokenData[i][1];
|
||||
let word = tokenData[i][2];
|
||||
do_check_eq(count, msgCorpus.getTokenCount(word, id));
|
||||
}
|
||||
},
|
||||
// test removing data
|
||||
function checkRemove() {
|
||||
let fileName = "msgCorpus.dat";
|
||||
let file = do_get_file("resources/" + fileName);
|
||||
msgCorpus.updateData(file, false);
|
||||
|
||||
// check message counts
|
||||
let messageCount = {};
|
||||
msgCorpus.corpusCounts(1001, messageCount);
|
||||
do_check_eq(2, messageCount.value);
|
||||
msgCorpus.corpusCounts(1003, messageCount);
|
||||
do_check_eq(1, messageCount.value);
|
||||
|
||||
for (i = 0; i < tokenData.length; i++) {
|
||||
let id = tokenData[i][0];
|
||||
let count = tokenData[i][1];
|
||||
let word = tokenData[i][2];
|
||||
do_check_eq(count, msgCorpus.getTokenCount(word, id));
|
||||
}
|
||||
},
|
||||
// test clearing a trait
|
||||
function checkClear() {
|
||||
let messageCountObject = {};
|
||||
/*
|
||||
msgCorpus.corpusCounts(1001, messageCountObject);
|
||||
let v1001 = messageCountObject.value;
|
||||
msgCorpus.corpusCounts(1003, messageCountObject);
|
||||
let v1003 = messageCountObject.value;
|
||||
dump("pre-clear value " + v1001 + " " + v1003 + "\n");
|
||||
/**/
|
||||
msgCorpus.clearTrait(1001);
|
||||
// check that the message count is zero
|
||||
msgCorpus.corpusCounts(1001, messageCountObject);
|
||||
do_check_eq(0, messageCountObject.value);
|
||||
// but the other trait should still have counts
|
||||
msgCorpus.corpusCounts(1003, messageCountObject);
|
||||
do_check_eq(1, messageCountObject.value);
|
||||
// check that token count was cleared
|
||||
for (i = 0; i < tokenData.length; i++) {
|
||||
let id = tokenData[i][0];
|
||||
let count = tokenData[i][1];
|
||||
let word = tokenData[i][2];
|
||||
do_check_eq(id == 1001 ? 0 : count, msgCorpus.getTokenCount(word, id));
|
||||
}
|
||||
},
|
||||
|
||||
]
|
||||
|
||||
// main test
|
||||
function run_test()
|
||||
{
|
||||
do_test_pending();
|
||||
while(1)
|
||||
{
|
||||
if (!gTests.length) // Do we have more commands?
|
||||
{
|
||||
// no, all done
|
||||
do_test_finished();
|
||||
return;
|
||||
}
|
||||
|
||||
let test = gTests.shift();
|
||||
test();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,194 @@
|
|||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Kent James <kent@caspia.com>.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2009
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
// Tests bayes trait analysis with aliases. Adapted from test_traits.js
|
||||
|
||||
/*
|
||||
* These tests rely on data stored in a file, with the same format as traits.dat,
|
||||
* that was trained in the following manner. There are two training messages,
|
||||
* included here as files aliases1.eml and aliases2.eml Aliases.dat was trained on
|
||||
* each of these messages, for different trait indices, as follows, with
|
||||
* columns showing the training count for each trait index:
|
||||
*
|
||||
* file count(1001) count(1005) count(1007) count(1009)
|
||||
*
|
||||
* aliases1.eml 1 0 2 0
|
||||
* aliases2.eml 0 1 0 1
|
||||
*
|
||||
* There is also a third email file, aliases3.eml, which combines tokens
|
||||
* from aliases1.eml and aliases2.eml
|
||||
*
|
||||
* The goal here is to demonstrate that traits 1001 and 1007, and traits
|
||||
* 1005 and 1009, can be combined using aliases. We classify messages with
|
||||
* trait 1001 as the PRO trait, and 1005 as the ANTI trait.
|
||||
*
|
||||
* With these characteristics, I've run a trait analysis without aliases, and
|
||||
* determined that the following is the correct percentage results from the
|
||||
* analysis for each message. "Train11" means that the training was 1 pro count
|
||||
* from aliases1.eml, and 1 anti count from alias2.eml. "Train32" is 3 pro counts,
|
||||
* and 2 anti counts.
|
||||
*
|
||||
* percentage
|
||||
* file Train11 Train32
|
||||
*
|
||||
* alias1.eml 92 98
|
||||
* alias2.eml 8 3
|
||||
* alias3.eml 50 53
|
||||
*/
|
||||
|
||||
const nsIJunkMailPlugin =
|
||||
Cc["@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter"]
|
||||
.getService(Ci.nsIJunkMailPlugin);
|
||||
const traitService = Cc["@mozilla.org/msg-trait-service;1"]
|
||||
.getService(Ci.nsIMsgTraitService);
|
||||
const kProTrait = 1001;
|
||||
const kAntiTrait = 1005;
|
||||
const kProAlias = 1007;
|
||||
const kAntiAlias = 1009;
|
||||
|
||||
var gTest; // currently active test
|
||||
|
||||
// The tests array defines the tests to attempt. Format of
|
||||
// an element "test" of this array:
|
||||
//
|
||||
// test.fileName: file containing message to test
|
||||
// test.proAliases: array of aliases for the pro trait
|
||||
// test.antiAliases: array of aliases for the anti trait
|
||||
// test.percent: expected results from the classifier
|
||||
|
||||
var tests =
|
||||
[
|
||||
{fileName: "aliases1.eml",
|
||||
proAliases: [],
|
||||
antiAliases: [],
|
||||
percent: 92
|
||||
},
|
||||
{fileName: "aliases2.eml",
|
||||
proAliases: [],
|
||||
antiAliases: [],
|
||||
percent: 8
|
||||
},
|
||||
{fileName: "aliases3.eml",
|
||||
proAliases: [],
|
||||
antiAliases: [],
|
||||
percent: 50
|
||||
},
|
||||
{fileName: "aliases1.eml",
|
||||
proAliases: [kProAlias],
|
||||
antiAliases: [kAntiAlias],
|
||||
percent: 98
|
||||
},
|
||||
{fileName: "aliases2.eml",
|
||||
proAliases: [kProAlias],
|
||||
antiAliases: [kAntiAlias],
|
||||
percent: 3
|
||||
},
|
||||
{fileName: "aliases3.eml",
|
||||
proAliases: [kProAlias],
|
||||
antiAliases: [kAntiAlias],
|
||||
percent: 53
|
||||
},
|
||||
]
|
||||
|
||||
// main test
|
||||
function run_test()
|
||||
{
|
||||
loadLocalMailAccount();
|
||||
|
||||
// load in the aliases trait testing file
|
||||
nsIJunkMailPlugin.QueryInterface(Ci.nsIMsgCorpus)
|
||||
.updateData(do_get_file("resources/aliases.dat"), true);
|
||||
do_test_pending();
|
||||
|
||||
startCommand();
|
||||
}
|
||||
|
||||
var listener =
|
||||
{
|
||||
//nsIMsgTraitClassificationListener implementation
|
||||
onMessageTraitsClassified: function(aMsgURI, {}, aTraits, aPercents)
|
||||
{
|
||||
//print("Message URI is " + aMsgURI);
|
||||
if (!aMsgURI)
|
||||
return; //ignore end-of-batch signal
|
||||
|
||||
do_check_eq(aPercents[0], gTest.percent)
|
||||
// All done, start the next test
|
||||
startCommand();
|
||||
},
|
||||
};
|
||||
|
||||
// start the next test command
|
||||
function startCommand()
|
||||
{
|
||||
if (!tests.length) // Do we have more commands?
|
||||
{
|
||||
// no, all done
|
||||
do_test_finished();
|
||||
return;
|
||||
}
|
||||
|
||||
gTest = tests.shift();
|
||||
|
||||
// classify message
|
||||
var antiArray = [kAntiTrait];
|
||||
var proArray = [kProTrait];
|
||||
|
||||
// remove any existing aliases
|
||||
let proAliases = traitService.getAliases(kProTrait, {});
|
||||
let antiAliases = traitService.getAliases(kAntiTrait, {});
|
||||
let proAlias;
|
||||
let antiAlias;
|
||||
while (proAlias = proAliases.pop())
|
||||
traitService.removeAlias(kProTrait, proAlias);
|
||||
while (antiAlias = antiAliases.pop())
|
||||
traitService.removeAlias(kAntiTrait, antiAlias);
|
||||
|
||||
// add new aliases
|
||||
while (proAlias = gTest.proAliases.pop())
|
||||
traitService.addAlias(kProTrait, proAlias);
|
||||
while (antiAlias = gTest.antiAliases.pop())
|
||||
traitService.addAlias(kAntiTrait, antiAlias);
|
||||
|
||||
nsIJunkMailPlugin.classifyTraitsInMessage(
|
||||
getSpec(gTest.fileName), // in string aMsgURI
|
||||
proArray.length, // length of traits arrays
|
||||
proArray, // in array aProTraits,
|
||||
antiArray, // in array aAntiTraits
|
||||
listener); // in nsIMsgTraitClassificationListener aTraitListener
|
||||
//null, // [optional] in nsIMsgWindow aMsgWindow
|
||||
//null, // [optional] in nsIJunkMailClassificationListener aJunkListener
|
||||
}
|
Загрузка…
Ссылка в новой задаче