Skip to content

Commit

Permalink
NMI evalaution fixed for the full match case
Browse files Browse the repository at this point in the history
  • Loading branch information
luav committed Sep 4, 2017
1 parent 01c1196 commit 9457540
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 21 deletions.
4 changes: 3 additions & 1 deletion include/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,10 @@ class Collection: public NodeBaseI {
//! \param cn2 const CollectionT& - second collection
//! \param expbase=false bool - use ln (exp base) or log2 (Shannon entropy, bits)
//! for the information measuring
//! \param verbose=false bool - perform additional verification and print details
//! \return RawNmi - resulting NMI
static RawNmi nmi(const CollectionT& cn1, const CollectionT& cn2, bool expbase=false);
static RawNmi nmi(const CollectionT& cn1, const CollectionT& cn2, bool expbase=false
, bool verbose=false);
protected:
// F1-related functions ----------------------------------------------------
//! \brief Average of the maximal matches (by F1 or partial probabilities)
Expand Down
35 changes: 20 additions & 15 deletions include/interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,21 +446,26 @@ Probs Collection<Count>::gmatches(const CollectionT& cn, bool prob) const
}

template <typename Count>
RawNmi Collection<Count>::nmi(const CollectionT& cn1, const CollectionT& cn2, bool expbase)
RawNmi Collection<Count>::nmi(const CollectionT& cn1, const CollectionT& cn2, bool expbase, bool verbose)
{
RawNmi rnmi1;
if(!cn1.clsnum() || !cn2.clsnum())
return rnmi1;

rnmi1 = cn1.nmi(cn2, expbase);
#if VALIDATE >= 2
// Check NMI value for the inverse order of collections
auto rnmi2 = cn2.nmi(cn1, expbase);
fprintf(stderr, "nmi(), mi1: %G, mi2: %G, dmi: %G\n", rnmi1.mi, rnmi2.mi
, rnmi1.mi - rnmi2.mi);
assert(equal(rnmi1.mi, rnmi2.mi, (cn1.clsnum() + cn2.clsnum()) / 2)
&& "nmi(), rnmi is not symmetric");
#endif // VALIDATE
#if VALIDATE >= 1
#if VALIDATE < 2
if(verbose)
#endif // VALIDATE 2
{
// Check NMI value for the inverse order of collections
auto rnmi2 = cn2.nmi(cn1, expbase);
fprintf(stderr, "nmi(), mi1: %G, mi2: %G, dmi: %G\n", rnmi1.mi, rnmi2.mi
, rnmi1.mi - rnmi2.mi);
assert(equal(rnmi1.mi, rnmi2.mi, (cn1.clsnum() + cn2.clsnum()) / 2)
&& "nmi(), rnmi is not symmetric, most likely overlaps are present and not considered but this implementation");
}
#endif // VALIDATE 1
return rnmi1;
}

Expand Down Expand Up @@ -494,7 +499,7 @@ RawNmi Collection<Count>::nmi(const CollectionT& cn, bool expbase) const
//if(!val)
// return 0;
const AccProb prob = val / capacity;
return prob * clog(prob);
return !equal<Prob>(prob, 1) ? prob * clog(prob) : -1;
};

AccProb h12 = 0; // Accumulated mutual probability over the matrix, I(cn1, cn2) in exp base
Expand Down Expand Up @@ -563,7 +568,7 @@ RawNmi Collection<Count>::nmi(const CollectionT& cn, bool expbase) const
#endif // TRACING_CLSMM_
}
#if VALIDATE >= 2
fprintf(stderr, "nmi(), psum: %G\n", psum);
fprintf(stderr, "nmi(), psum: %G, h12: %G\n", psum, h12);
assert(equal(psum, 1., cmmsum) && "nmi(), total probability of the matrix should be 1");
#endif // VALIDATE

Expand Down Expand Up @@ -740,8 +745,8 @@ auto Collection<Count>::evalconts(const CollectionT& cn, ClustersMatching* pclsm
if(m_overlaps
&& !(equal<AccCont>(m_contsum - econt1, ndsnum(), ndsnum())
&& equal<AccCont>(cn.m_contsum - econt2, cn.ndsnum(), cn.ndsnum()))) { // consum equals to the number of nodes for the overlapping case
fprintf(stderr, "evalconts(), c1csum: %.3G (- %.3G), nds1num: %u"
", c2csum: %.3G (- %.3G), nds2num: %u, cmmsum: %.3G\n"
fprintf(stderr, "evalconts(), c1csum: %.3G (- %.3G lacked), nds1num: %u"
", c2csum: %.3G (- %.3G lacked), nds2num: %u, cmmsum: %.3G\n"
, AccProb(m_contsum), AccProb(econt1), ndsnum()
, AccProb(cn.m_contsum), AccProb(econt2), cn.ndsnum(), AccProb(cmmsum));
assert(0 && "evalconts(), consum validation failed");
Expand All @@ -752,13 +757,13 @@ auto Collection<Count>::evalconts(const CollectionT& cn, ClustersMatching* pclsm
if((m_ndshash == cn.m_ndshash && m_ndshash && m_ndshash && (!match1 || !match2)) // The same node base
|| (m_ndshash != cn.m_ndshash && (!match1 && !match2)) // Distinct node base
) { // Note: cmmsum should match to either of the sums
fprintf(stderr, "evalconts(), c1csum: %.3G (- %.3G), c2csum: %.3G (- %.3G), cmmsum: %.3G\n"
fprintf(stderr, "evalconts(), c1csum: %.3G (- %.3G lacked), c2csum: %.3G (- %.3G lacked), cmmsum: %.3G\n"
, AccProb(m_contsum), AccProb(econt1), AccProb(cn.m_contsum)
, AccProb(econt2), AccProb(cmmsum));
throw domain_error("nmi(), rows accumulation is invalid");
}
#if TRACE >= 2
fprintf(stderr, "evalconts(), c1csum: %.3G (- %.3G), c2csum: %.3G (- %.3G), cmmsum: %.3G\n"
fprintf(stderr, "evalconts(), c1csum: %.3G (- %.3G lacked), c2csum: %.3G (- %.3G lacked), cmmsum: %.3G\n"
, AccProb(m_contsum), AccProb(econt1), AccProb(cn.m_contsum)
, AccProb(econt2), AccProb(cmmsum));
#endif // TRACE
Expand Down
17 changes: 15 additions & 2 deletions shared/fileio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ void ensureDir(const string& dir)
+= "' already exists as a non-directory path\n");
}

void parseCnlHeader(NamedFileWrapper& fcls, StringBuffer& line, size_t& clsnum, size_t& ndsnum) {
void parseCnlHeader(NamedFileWrapper& fcls, StringBuffer& line, size_t& clsnum
, size_t& ndsnum, bool verbose)
{
//! Parse count value
//! \return - id value of 0 in case of parsing errors
auto parseCount = []() noexcept -> size_t {
Expand Down Expand Up @@ -251,8 +253,19 @@ void parseCnlHeader(NamedFileWrapper& fcls, StringBuffer& line, size_t& clsnum,
fprintf(stderr, "parseCnlHeader(), nodes: %lu\n", ndsnum);
#endif // TRACE
} else {
fprintf(stderr, "WARNING parseCnlHeader(), the header parsing is omitted"
#if TRACE >= 1
#if TRACE < 2
if(verbose)
#endif // TRACE 2
fprintf(
#if TRACE >= 2
stderr
#else
stdout
#endif // TRACE 2
, "WARNING parseCnlHeader(), the header parsing is omitted"
" because of the unexpected attribute: %s\n", tok);
#endif // TRACE 1
break;
}
} while((tok = strtok(nullptr, attrnameDelim)) && attrs < 2);
Expand Down
6 changes: 4 additions & 2 deletions shared/fileio.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,10 @@ void ensureDir(const string& dir);
//! \param line StringBuffer& - processing line (string, header) being read from the file
//! \param[out] clsnum size_t& - resulting number of clusters if specified, 0 in case of parsing errors
//! \param[out] ndsnum size_t& - resulting number of nodes if specified, 0 in case of parsing errors
//! \param verbose=false bool - print information about the header parsing issue to the stdout
//! \return void
void parseCnlHeader(NamedFileWrapper& fcls, StringBuffer& line, size_t& clsnum, size_t& ndsnum);
void parseCnlHeader(NamedFileWrapper& fcls, StringBuffer& line, size_t& clsnum
, size_t& ndsnum, bool verbose=false);

//! \brief Load nodes from the CNL file with optional filtering by the cluster size
//!
Expand Down Expand Up @@ -349,7 +351,7 @@ unordered_set<Id> loadNodes(NamedFileWrapper& file, float membership
StringBuffer line; // Reading line
// Parse header and read the number of clusters if specified
// Note: line includes terminating '\n'
parseCnlHeader(file, line, clsnum, ndsnum);
parseCnlHeader(file, line, clsnum, ndsnum, verbose);

// Estimate the number of nodes in the file if not specified
if(!ndsnum) {
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ int main(int argc, char **argv)
// Note: evaluation of overlapping F1 after NMI allows to reuse some
// calculations, for other cases the order of evaluations does not matter
if(args_info.nmi_given) {
auto rnmi = Collection::nmi(cn1, cn2, args_info.ln_flag);
auto rnmi = Collection::nmi(cn1, cn2, args_info.ln_flag, args_info.detailed_flag);
// Set NMI to NULL if collections have no any mutual information
if(!rnmi.mi) // Note: strict ! is fine here
rnmi.h1 = rnmi.h2 = 1;
Expand Down

0 comments on commit 9457540

Please sign in to comment.