diff --git a/coverage/index.html b/.coverage-tmp.html similarity index 100% rename from coverage/index.html rename to .coverage-tmp.html diff --git a/coverage/coverage_html.js b/coverage/coverage_html.js deleted file mode 100755 index f55e371..0000000 --- a/coverage/coverage_html.js +++ /dev/null @@ -1,604 +0,0 @@ -// Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 -// For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt - -// Coverage.py HTML report browser code. -/*jslint browser: true, sloppy: true, vars: true, plusplus: true, maxerr: 50, indent: 4 */ -/*global coverage: true, document, window, $ */ - -coverage = {}; - -// General helpers -function debounce(callback, wait) { - let timeoutId = null; - return function(...args) { - clearTimeout(timeoutId); - timeoutId = setTimeout(() => { - callback.apply(this, args); - }, wait); - }; -}; - -function checkVisible(element) { - const rect = element.getBoundingClientRect(); - const viewBottom = Math.max(document.documentElement.clientHeight, window.innerHeight); - const viewTop = 30; - return !(rect.bottom < viewTop || rect.top >= viewBottom); -} - -function on_click(sel, fn) { - const elt = document.querySelector(sel); - if (elt) { - elt.addEventListener("click", fn); - } -} - -// Helpers for table sorting -function getCellValue(row, column = 0) { - const cell = row.cells[column] - if (cell.childElementCount == 1) { - const child = cell.firstElementChild - if (child instanceof HTMLTimeElement && child.dateTime) { - return child.dateTime - } else if (child instanceof HTMLDataElement && child.value) { - return child.value - } - } - return cell.innerText || cell.textContent; -} - -function rowComparator(rowA, rowB, column = 0) { - let valueA = getCellValue(rowA, column); - let valueB = getCellValue(rowB, column); - if (!isNaN(valueA) && !isNaN(valueB)) { - return valueA - valueB - } - return valueA.localeCompare(valueB, undefined, {numeric: true}); -} - -function sortColumn(th) { - // Get the current sorting direction of the selected header, - // clear state on other headers and then set the new sorting direction - const currentSortOrder = th.getAttribute("aria-sort"); - [...th.parentElement.cells].forEach(header => header.setAttribute("aria-sort", "none")); - if (currentSortOrder === "none") { - th.setAttribute("aria-sort", th.dataset.defaultSortOrder || "ascending"); - } else { - th.setAttribute("aria-sort", currentSortOrder === "ascending" ? "descending" : "ascending"); - } - - const column = [...th.parentElement.cells].indexOf(th) - - // Sort all rows and afterwards append them in order to move them in the DOM - Array.from(th.closest("table").querySelectorAll("tbody tr")) - .sort((rowA, rowB) => rowComparator(rowA, rowB, column) * (th.getAttribute("aria-sort") === "ascending" ? 1 : -1)) - .forEach(tr => tr.parentElement.appendChild(tr) ); -} - -// Find all the elements with data-shortcut attribute, and use them to assign a shortcut key. -coverage.assign_shortkeys = function () { - document.querySelectorAll("[data-shortcut]").forEach(element => { - document.addEventListener("keypress", event => { - if (event.target.tagName.toLowerCase() === "input") { - return; // ignore keypress from search filter - } - if (event.key === element.dataset.shortcut) { - element.click(); - } - }); - }); -}; - -// Create the events for the filter box. -coverage.wire_up_filter = function () { - // Cache elements. - const table = document.querySelector("table.index"); - const table_body_rows = table.querySelectorAll("tbody tr"); - const no_rows = document.getElementById("no_rows"); - - // Observe filter keyevents. - document.getElementById("filter").addEventListener("input", debounce(event => { - // Keep running total of each metric, first index contains number of shown rows - const totals = new Array(table.rows[0].cells.length).fill(0); - // Accumulate the percentage as fraction - totals[totals.length - 1] = { "numer": 0, "denom": 0 }; - - // Hide / show elements. - table_body_rows.forEach(row => { - if (!row.cells[0].textContent.includes(event.target.value)) { - // hide - row.classList.add("hidden"); - return; - } - - // show - row.classList.remove("hidden"); - totals[0]++; - - for (let column = 1; column < totals.length; column++) { - // Accumulate dynamic totals - cell = row.cells[column] - if (column === totals.length - 1) { - // Last column contains percentage - const [numer, denom] = cell.dataset.ratio.split(" "); - totals[column]["numer"] += parseInt(numer, 10); - totals[column]["denom"] += parseInt(denom, 10); - } else { - totals[column] += parseInt(cell.textContent, 10); - } - } - }); - - // Show placeholder if no rows will be displayed. - if (!totals[0]) { - // Show placeholder, hide table. - no_rows.style.display = "block"; - table.style.display = "none"; - return; - } - - // Hide placeholder, show table. - no_rows.style.display = null; - table.style.display = null; - - const footer = table.tFoot.rows[0]; - // Calculate new dynamic sum values based on visible rows. - for (let column = 1; column < totals.length; column++) { - // Get footer cell element. - const cell = footer.cells[column]; - - // Set value into dynamic footer cell element. - if (column === totals.length - 1) { - // Percentage column uses the numerator and denominator, - // and adapts to the number of decimal places. - const match = /\.([0-9]+)/.exec(cell.textContent); - const places = match ? match[1].length : 0; - const { numer, denom } = totals[column]; - cell.dataset.ratio = `${numer} ${denom}`; - // Check denom to prevent NaN if filtered files contain no statements - cell.textContent = denom - ? `${(numer * 100 / denom).toFixed(places)}%` - : `${(100).toFixed(places)}%`; - } else { - cell.textContent = totals[column]; - } - } - })); - - // Trigger change event on setup, to force filter on page refresh - // (filter value may still be present). - document.getElementById("filter").dispatchEvent(new Event("input")); -}; - -coverage.INDEX_SORT_STORAGE = "COVERAGE_INDEX_SORT_2"; - -// Loaded on index.html -coverage.index_ready = function () { - coverage.assign_shortkeys(); - coverage.wire_up_filter(); - document.querySelectorAll("[data-sortable] th[aria-sort]").forEach( - th => th.addEventListener("click", e => sortColumn(e.target)) - ); - - // Look for a localStorage item containing previous sort settings: - const stored_list = localStorage.getItem(coverage.INDEX_SORT_STORAGE); - - if (stored_list) { - const {column, direction} = JSON.parse(stored_list); - const th = document.querySelector("[data-sortable]").tHead.rows[0].cells[column]; - th.setAttribute("aria-sort", direction === "ascending" ? "descending" : "ascending"); - th.click() - } - - // Watch for page unload events so we can save the final sort settings: - window.addEventListener("unload", function () { - const th = document.querySelector('[data-sortable] th[aria-sort="ascending"], [data-sortable] [aria-sort="descending"]'); - if (!th) { - return; - } - localStorage.setItem(coverage.INDEX_SORT_STORAGE, JSON.stringify({ - column: [...th.parentElement.cells].indexOf(th), - direction: th.getAttribute("aria-sort"), - })); - }); - - on_click(".button_prev_file", coverage.to_prev_file); - on_click(".button_next_file", coverage.to_next_file); - - on_click(".button_show_hide_help", coverage.show_hide_help); -}; - -// -- pyfile stuff -- - -coverage.LINE_FILTERS_STORAGE = "COVERAGE_LINE_FILTERS"; - -coverage.pyfile_ready = function () { - // If we're directed to a particular line number, highlight the line. - var frag = location.hash; - if (frag.length > 2 && frag[1] === 't') { - document.querySelector(frag).closest(".n").classList.add("highlight"); - coverage.set_sel(parseInt(frag.substr(2), 10)); - } else { - coverage.set_sel(0); - } - - on_click(".button_toggle_run", coverage.toggle_lines); - on_click(".button_toggle_mis", coverage.toggle_lines); - on_click(".button_toggle_exc", coverage.toggle_lines); - on_click(".button_toggle_par", coverage.toggle_lines); - - on_click(".button_next_chunk", coverage.to_next_chunk_nicely); - on_click(".button_prev_chunk", coverage.to_prev_chunk_nicely); - on_click(".button_top_of_page", coverage.to_top); - on_click(".button_first_chunk", coverage.to_first_chunk); - - on_click(".button_prev_file", coverage.to_prev_file); - on_click(".button_next_file", coverage.to_next_file); - on_click(".button_to_index", coverage.to_index); - - on_click(".button_show_hide_help", coverage.show_hide_help); - - coverage.filters = undefined; - try { - coverage.filters = localStorage.getItem(coverage.LINE_FILTERS_STORAGE); - } catch(err) {} - - if (coverage.filters) { - coverage.filters = JSON.parse(coverage.filters); - } - else { - coverage.filters = {run: false, exc: true, mis: true, par: true}; - } - - for (cls in coverage.filters) { - coverage.set_line_visibilty(cls, coverage.filters[cls]); - } - - coverage.assign_shortkeys(); - coverage.init_scroll_markers(); - coverage.wire_up_sticky_header(); - - // Rebuild scroll markers when the window height changes. - window.addEventListener("resize", coverage.build_scroll_markers); -}; - -coverage.toggle_lines = function (event) { - const btn = event.target.closest("button"); - const category = btn.value - const show = !btn.classList.contains("show_" + category); - coverage.set_line_visibilty(category, show); - coverage.build_scroll_markers(); - coverage.filters[category] = show; - try { - localStorage.setItem(coverage.LINE_FILTERS_STORAGE, JSON.stringify(coverage.filters)); - } catch(err) {} -}; - -coverage.set_line_visibilty = function (category, should_show) { - const cls = "show_" + category; - const btn = document.querySelector(".button_toggle_" + category); - if (btn) { - if (should_show) { - document.querySelectorAll("#source ." + category).forEach(e => e.classList.add(cls)); - btn.classList.add(cls); - } - else { - document.querySelectorAll("#source ." + category).forEach(e => e.classList.remove(cls)); - btn.classList.remove(cls); - } - } -}; - -// Return the nth line div. -coverage.line_elt = function (n) { - return document.getElementById("t" + n)?.closest("p"); -}; - -// Set the selection. b and e are line numbers. -coverage.set_sel = function (b, e) { - // The first line selected. - coverage.sel_begin = b; - // The next line not selected. - coverage.sel_end = (e === undefined) ? b+1 : e; -}; - -coverage.to_top = function () { - coverage.set_sel(0, 1); - coverage.scroll_window(0); -}; - -coverage.to_first_chunk = function () { - coverage.set_sel(0, 1); - coverage.to_next_chunk(); -}; - -coverage.to_prev_file = function () { - window.location = document.getElementById("prevFileLink").href; -} - -coverage.to_next_file = function () { - window.location = document.getElementById("nextFileLink").href; -} - -coverage.to_index = function () { - location.href = document.getElementById("indexLink").href; -} - -coverage.show_hide_help = function () { - const helpCheck = document.getElementById("help_panel_state") - helpCheck.checked = !helpCheck.checked; -} - -// Return a string indicating what kind of chunk this line belongs to, -// or null if not a chunk. -coverage.chunk_indicator = function (line_elt) { - const classes = line_elt?.className; - if (!classes) { - return null; - } - const match = classes.match(/\bshow_\w+\b/); - if (!match) { - return null; - } - return match[0]; -}; - -coverage.to_next_chunk = function () { - const c = coverage; - - // Find the start of the next colored chunk. - var probe = c.sel_end; - var chunk_indicator, probe_line; - while (true) { - probe_line = c.line_elt(probe); - if (!probe_line) { - return; - } - chunk_indicator = c.chunk_indicator(probe_line); - if (chunk_indicator) { - break; - } - probe++; - } - - // There's a next chunk, `probe` points to it. - var begin = probe; - - // Find the end of this chunk. - var next_indicator = chunk_indicator; - while (next_indicator === chunk_indicator) { - probe++; - probe_line = c.line_elt(probe); - next_indicator = c.chunk_indicator(probe_line); - } - c.set_sel(begin, probe); - c.show_selection(); -}; - -coverage.to_prev_chunk = function () { - const c = coverage; - - // Find the end of the prev colored chunk. - var probe = c.sel_begin-1; - var probe_line = c.line_elt(probe); - if (!probe_line) { - return; - } - var chunk_indicator = c.chunk_indicator(probe_line); - while (probe > 1 && !chunk_indicator) { - probe--; - probe_line = c.line_elt(probe); - if (!probe_line) { - return; - } - chunk_indicator = c.chunk_indicator(probe_line); - } - - // There's a prev chunk, `probe` points to its last line. - var end = probe+1; - - // Find the beginning of this chunk. - var prev_indicator = chunk_indicator; - while (prev_indicator === chunk_indicator) { - probe--; - if (probe <= 0) { - return; - } - probe_line = c.line_elt(probe); - prev_indicator = c.chunk_indicator(probe_line); - } - c.set_sel(probe+1, end); - c.show_selection(); -}; - -// Returns 0, 1, or 2: how many of the two ends of the selection are on -// the screen right now? -coverage.selection_ends_on_screen = function () { - if (coverage.sel_begin === 0) { - return 0; - } - - const begin = coverage.line_elt(coverage.sel_begin); - const end = coverage.line_elt(coverage.sel_end-1); - - return ( - (checkVisible(begin) ? 1 : 0) - + (checkVisible(end) ? 1 : 0) - ); -}; - -coverage.to_next_chunk_nicely = function () { - if (coverage.selection_ends_on_screen() === 0) { - // The selection is entirely off the screen: - // Set the top line on the screen as selection. - - // This will select the top-left of the viewport - // As this is most likely the span with the line number we take the parent - const line = document.elementFromPoint(0, 0).parentElement; - if (line.parentElement !== document.getElementById("source")) { - // The element is not a source line but the header or similar - coverage.select_line_or_chunk(1); - } else { - // We extract the line number from the id - coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); - } - } - coverage.to_next_chunk(); -}; - -coverage.to_prev_chunk_nicely = function () { - if (coverage.selection_ends_on_screen() === 0) { - // The selection is entirely off the screen: - // Set the lowest line on the screen as selection. - - // This will select the bottom-left of the viewport - // As this is most likely the span with the line number we take the parent - const line = document.elementFromPoint(document.documentElement.clientHeight-1, 0).parentElement; - if (line.parentElement !== document.getElementById("source")) { - // The element is not a source line but the header or similar - coverage.select_line_or_chunk(coverage.lines_len); - } else { - // We extract the line number from the id - coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); - } - } - coverage.to_prev_chunk(); -}; - -// Select line number lineno, or if it is in a colored chunk, select the -// entire chunk -coverage.select_line_or_chunk = function (lineno) { - var c = coverage; - var probe_line = c.line_elt(lineno); - if (!probe_line) { - return; - } - var the_indicator = c.chunk_indicator(probe_line); - if (the_indicator) { - // The line is in a highlighted chunk. - // Search backward for the first line. - var probe = lineno; - var indicator = the_indicator; - while (probe > 0 && indicator === the_indicator) { - probe--; - probe_line = c.line_elt(probe); - if (!probe_line) { - break; - } - indicator = c.chunk_indicator(probe_line); - } - var begin = probe + 1; - - // Search forward for the last line. - probe = lineno; - indicator = the_indicator; - while (indicator === the_indicator) { - probe++; - probe_line = c.line_elt(probe); - indicator = c.chunk_indicator(probe_line); - } - - coverage.set_sel(begin, probe); - } - else { - coverage.set_sel(lineno); - } -}; - -coverage.show_selection = function () { - // Highlight the lines in the chunk - document.querySelectorAll("#source .highlight").forEach(e => e.classList.remove("highlight")); - for (let probe = coverage.sel_begin; probe < coverage.sel_end; probe++) { - coverage.line_elt(probe).querySelector(".n").classList.add("highlight"); - } - - coverage.scroll_to_selection(); -}; - -coverage.scroll_to_selection = function () { - // Scroll the page if the chunk isn't fully visible. - if (coverage.selection_ends_on_screen() < 2) { - const element = coverage.line_elt(coverage.sel_begin); - coverage.scroll_window(element.offsetTop - 60); - } -}; - -coverage.scroll_window = function (to_pos) { - window.scroll({top: to_pos, behavior: "smooth"}); -}; - -coverage.init_scroll_markers = function () { - // Init some variables - coverage.lines_len = document.querySelectorAll('#source > p').length; - - // Build html - coverage.build_scroll_markers(); -}; - -coverage.build_scroll_markers = function () { - const temp_scroll_marker = document.getElementById('scroll_marker') - if (temp_scroll_marker) temp_scroll_marker.remove(); - // Don't build markers if the window has no scroll bar. - if (document.body.scrollHeight <= window.innerHeight) { - return; - } - - const marker_scale = window.innerHeight / document.body.scrollHeight; - const line_height = Math.min(Math.max(3, window.innerHeight / coverage.lines_len), 10); - - let previous_line = -99, last_mark, last_top; - - const scroll_marker = document.createElement("div"); - scroll_marker.id = "scroll_marker"; - document.getElementById('source').querySelectorAll( - 'p.show_run, p.show_mis, p.show_exc, p.show_exc, p.show_par' - ).forEach(element => { - const line_top = Math.floor(element.offsetTop * marker_scale); - const line_number = parseInt(element.querySelector(".n a").id.substr(1)); - - if (line_number === previous_line + 1) { - // If this solid missed block just make previous mark higher. - last_mark.style.height = `${line_top + line_height - last_top}px`; - } else { - // Add colored line in scroll_marker block. - last_mark = document.createElement("div"); - last_mark.id = `m${line_number}`; - last_mark.classList.add("marker"); - last_mark.style.height = `${line_height}px`; - last_mark.style.top = `${line_top}px`; - scroll_marker.append(last_mark); - last_top = line_top; - } - - previous_line = line_number; - }); - - // Append last to prevent layout calculation - document.body.append(scroll_marker); -}; - -coverage.wire_up_sticky_header = function () { - const header = document.querySelector('header'); - const header_bottom = ( - header.querySelector('.content h2').getBoundingClientRect().top - - header.getBoundingClientRect().top - ); - - function updateHeader() { - if (window.scrollY > header_bottom) { - header.classList.add('sticky'); - } else { - header.classList.remove('sticky'); - } - } - - window.addEventListener('scroll', updateHeader); - updateHeader(); -}; - -document.addEventListener("DOMContentLoaded", () => { - if (document.body.classList.contains("indexfile")) { - coverage.index_ready(); - } else { - coverage.pyfile_ready(); - } -}); diff --git a/coverage/covindex.html b/coverage/covindex.html deleted file mode 100755 index cc8d4e9..0000000 --- a/coverage/covindex.html +++ /dev/null @@ -1,119 +0,0 @@ - - - - - Coverage report - - - - - -
-
-

Coverage report: - 100.00% -

- -
- -
-

- coverage.py v6.5.0, - created at 2023-11-14 10:53 +0100 -

-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Modulestatementsmissingexcludedbranchespartialcoverage
src\cognitivefactory\features_maximization_metric\fmc.py12000800100.00%
tests\test_fmc.py1760000100.00%
Total29600800100.00%
-

- No items found using the specified filter. -

-
- - - diff --git a/coverage/d_16e4f0e538c2fa43_fmc_py.html b/coverage/d_16e4f0e538c2fa43_fmc_py.html deleted file mode 100755 index 7cb0407..0000000 --- a/coverage/d_16e4f0e538c2fa43_fmc_py.html +++ /dev/null @@ -1,917 +0,0 @@ - - - - - Coverage for src\cognitivefactory\features_maximization_metric\fmc.py: 100.00% - - - - - -
-
-

- Coverage for src\cognitivefactory\features_maximization_metric\fmc.py: - 100.00% -

- -

- 120 statements   - - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v6.5.0, - created at 2023-11-14 10:53 +0100 -

- -
-
-
-

1# -*- coding: utf-8 -*- 

-

2 

-

3""" 

-

4* Name: cognitivefactory.features_maximization_metric.fmc 

-

5* Description: Implementation of Features Maximization Metrics. 

-

6* Author: Erwan SCHILD 

-

7* Created: 23/11/2022 

-

8* Licence: CeCILL-C License v1.0 (https://cecill.info/licences.fr.html) 

-

9""" 

-

10 

-

11# ============================================================================== 

-

12# IMPORTS : 

-

13# ============================================================================== 

-

14 

-

15from typing import Dict, List, Literal, Optional, Tuple 

-

16 

-

17from scipy.sparse import csr_matrix 

-

18from sklearn.metrics.cluster import homogeneity_completeness_v_measure 

-

19 

-

20# ============================================================================== 

-

21# FEATURES MAXIMIZATION METRIC 

-

22# ============================================================================== 

-

23 

-

24 

-

25class FeaturesMaximizationMetric: 

-

26 r""" 

-

27 This class implements the ***Features Maximization Metric***. 

-

28 It's a dataset modelization based on vectors features and data labels: 

-

29 for each couple `(feature, classe)`, it gives a score (called **F-Measure**) that describe the power of identification and distinction of the feature for this classe. 

-

30 

-

31 This metric is computed by applying the following steps: 

-

32 

-

33 1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics). 

-

34 

-

35 > (a) The ***Features Recall*** `FR[f][c]` for a given class `c` and a given feature `f` is the ratio between 

-

36 > the sum of the vectors weights of the feature `f` for data in class `c` 

-

37 > and the sum of all vectors weights of feature `f` for all data. 

-

38 > It answers the question: "_Can the feature `f` distinguish the class `c` from other classes `c'` ?_" 

-

39 

-

40 > (b) The ***Features Predominance*** `FP[f][c]` for a given class `c` and a given feature `f` is the ratio between 

-

41 > the sum of the vectors weights of the feature `f` for data in class `c` 

-

42 > and the sum of all vectors weights of all feature `f'` for data in class `c`. 

-

43 > It answers the question: "_Can the feature `f` better identify the class `c` than the other features `f'` ?_" 

-

44 

-

45 > (c) The ***Features F-Measure*** `FM[f][c]` for a given class `c` and a given feature `f` is 

-

46 > the harmonic mean of the ***Features Recall*** (a) and the ***Features Predominance*** (c). 

-

47 > It answers the question: "_How much information does the feature `f` contain about the class `c` ?_" 

-

48 

-

49 2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison). 

-

50 

-

51 > (d) The ***F-Measure Overall Average*** is the average of ***Features F-Measure*** (c) for all classes `c` and for all features `f`. 

-

52 > It answers the question: "_What are the mean of information contained by features in all classes ?_" 

-

53 

-

54 > (e) A feature `f` is ***Selected*** if and only if it exist at least one class `c` for which the ***Features F-Measure*** (c) `FM[f][c]` is bigger than the ***F-Measure Overall Average*** (d). 

-

55 > It answers the question: "_What are the features which contain more information than the mean of information in the dataset ?_" 

-

56 

-

57 > (f) A Feature `f` is ***Deleted*** if and only if the ***Features F-Measure*** (c) `FM[f][c]` is always lower than the ***F-Measure Overall Average*** (d) for each class `c`. 

-

58 > It answers the question: "_What are the features which do not contain more information than the mean of information in the dataset ?_" 

-

59 

-

60 3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison). 

-

61 

-

62 > (g) The ***F-Measure Marginal Averages*** for a given feature `f` is the average of ***Features F-Measure*** (c) for all classes `c` and for the given feature `f`. 

-

63 > It answers the question: "_What are the mean of information contained by the feature `f` in all classes ?_" 

-

64 

-

65 > (h) The ***Features Contrast*** `FC[f][c]` for a given class `c` and a given selected feature `f` is the ratio between 

-

66 > the ***Features F-Measure*** (c) `FM[f][c]` 

-

67 > and the ***F-Measure Marginal Averages*** (g) for selected feature f 

-

68 > put to the power of an ***Amplification Factor***. 

-

69 > It answers the question: "_How relevant is the feature `f` to distinguish the class `c` ?_" 

-

70 

-

71 > (i) A selected Feature `f` is ***Active*** for a given class `c` if and only if the ***Features Contrast*** (h) `FC[f][c]` is bigger than `1.0`. 

-

72 > It answers the question : "_For which classes a selected feature `f` is relevant ?_" 

-

73 

-

74 In order to ***evaluate it according to a reference***, a FMC modelization is represented by the Features Activation of its vector features, 

-

75 and a similarity score to the reference is computed, based on common metrics on clustering (homogeneity, completeness, v_measure). 

-

76 

-

77 Attributes: 

-

78 data_vectors (csr_matrix): The sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`). 

-

79 data_classes (List[str]): The list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`). 

-

80 list_of_possible_features (List[str]): The list of existing vectors features. 

-

81 list_of_possible_classes (List[str]): The list of existing data classes. 

-

82 amplification_factor (int): The positive integer called "amplification factor" aimed at emphasize the feature contrast. Usually at `1`. 

-

83 features_frecall (Dict[str, Dict[str, float]]): The computation of *Features Recall* (_Can the feature `f` distinguish the class `c` from other classes `l'` ?_). 

-

84 features_fpredominance (Dict[str, Dict[str, float]]): The computation of *Features Predominance* (_Can the feature `f` better identify the class `c` than the other features `f'` ?_). 

-

85 features_fmeasure (Dict[str, Dict[str, float]]): The computation of *Features F-Measure* (_How much information does the feature `f` contain about the class `c` ?_). 

-

86 features_overall_average (float): The computation of *Overall Average of Features F-Measure* (_What are the mean of information contained by features in all classes ?_). 

-

87 features_selection (Dict[str, bool]): The computation of *Features Selected* (_What are the features which contain more information than the mean of information in the dataset ?_). 

-

88 features_marginal_averages (Dict[str, float]): The computation of *Marginal Averages of Features F-Measure* (_What are the mean of information contained by the feature `f` in all classes ?_). 

-

89 features_contrast (Dict[str, Dict[str, float]]): The computation of *Features Contrast* (_How important is the feature `f` to distinguish the class `c` ?_). 

-

90 features_activation (Dict[str, Dict[str, bool]]): The computation of *Features Activation* (_For which classes a selected feature `f` is relevant ?_). 

-

91 

-

92 Example: 

-

93 - Basic usecase: "_What are the physical characteristics that most distinguish men from women ?_" 

-

94 ```python 

-

95 

-

96 # Problem to solve. 

-

97 print(">> What are the physical characteristics that most distinguish men from women ?") 

-

98 

-

99 ### 

-

100 ### Python dependencies. 

-

101 ### 

-

102 

-

103 from cognitivefactory.features_maximization_metric.fmc import FeaturesMaximizationMetric 

-

104 from scipy.sparse import csr_matrix 

-

105 from typing import List 

-

106 

-

107 ### 

-

108 ### Data. 

-

109 ### 

-

110 

-

111 # Define people characteristics that will be studied. 

-

112 characteristics_studied: List[str] = [ 

-

113 "Shoes size", 

-

114 "Hair size", 

-

115 "Nose size", 

-

116 ] 

-

117 

-

118 # Get people characteristics. 

-

119 people_characteristics: csr_matrix = csr_matrix( 

-

120 [ 

-

121 [9, 5, 5], 

-

122 [9, 10, 5], 

-

123 [9, 20, 6], 

-

124 [5, 15, 5], 

-

125 [6, 25, 6], 

-

126 [5, 25, 5], 

-

127 ] 

-

128 ) 

-

129 

-

130 # Get people genders. 

-

131 people_genders: List[str] = [ 

-

132 "Man", 

-

133 "Man", 

-

134 "Man", 

-

135 "Woman", 

-

136 "Woman", 

-

137 "Woman", 

-

138 ] 

-

139 

-

140 ### 

-

141 ### Feature Maximization Metrics. 

-

142 ### 

-

143 

-

144 # Main computation. 

-

145 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

146 data_vectors=people_characteristics, 

-

147 data_classes=people_genders, 

-

148 list_of_possible_features=characteristics_studied, 

-

149 amplification_factor=1, 

-

150 ) 

-

151 

-

152 ### 

-

153 ### Analysis 1: Delete characteristics that aren't relevant. 

-

154 ### 

-

155 

-

156 print( 

-

157 "\n", 

-

158 "1. Which characteristic seems not relevant to distinguish men from women ?", 

-

159 ) 

-

160 for characteristic in characteristics_studied: 

-

161 if not fmc_computer.features_selection[characteristic]: 

-

162 print( 

-

163 " - '{0}' seems not relevant.".format(characteristic) 

-

164 ) 

-

165 

-

166 ### 

-

167 ### Analysis 2: Describe gender by relevant characteristics. 

-

168 ### 

-

169 

-

170 print( 

-

171 "\n", 

-

172 "2. According to remaining characteristics:", 

-

173 ) 

-

174 for gender in sorted(set(people_genders)): 

-

175 print( 

-

176 " - Which characteristic seems important to recognize a '{0}' ?".format(gender) 

-

177 ) 

-

178 

-

179 for characteristic in fmc_computer.get_most_active_features_by_a_classe( 

-

180 classe=gender, 

-

181 ): 

-

182 print( 

-

183 " - '{0}' seems important (fmeasure of '{1:.2f}', contrast of '{2:.2f}').".format( 

-

184 characteristic, 

-

185 fmc_computer.features_fmeasure[characteristic][gender], 

-

186 fmc_computer.features_contrast[characteristic][gender], 

-

187 ) 

-

188 ) 

-

189 ``` 

-

190 

-

191 References: 

-

192 - Features Maximization Metric: `Lamirel, J.-C., Cuxac, P., & Hajlaoui, K. (2016). A Novel Approach to Feature Selection Based on Quality Estimation Metrics. In Advances in Knowledge Discovery and Management (pp. 121–140). Springer International Publishing. https://doi.org/10.1007/978-3-319-45763-5_7` 

-

193 """ 

-

194 

-

195 # ========================================================================================= 

-

196 # INITIALIZATION 

-

197 # ========================================================================================= 

-

198 

-

199 def __init__( 

-

200 self, 

-

201 data_vectors: csr_matrix, 

-

202 data_classes: List[str], 

-

203 list_of_possible_features: List[str], 

-

204 amplification_factor: int = 1, 

-

205 verbose: bool = False, 

-

206 ): 

-

207 """ 

-

208 The constructor for `FeaturesMaximizationMetric` class. 

-

209 It applies the several steps of ***Feature Maximization***: 

-

210 1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics). 

-

211 2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison). 

-

212 3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison). 

-

213 

-

214 Args: 

-

215 data_vectors (scipy.sparse.csr_matrix): A sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`). 

-

216 data_classes (List[str]): A list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`). 

-

217 list_of_possible_features (List[str]): A list of existing vectors features. 

-

218 amplification_factor (int, optional): A positive integer called "amplification factor" aimed at emphasize the feature contrast. Defaults to `1`. 

-

219 verbose (bool): An option to display progress status of computations. Defaults to `False`. 

-

220 

-

221 Raises: 

-

222 ValueError: if `data_vectors` and `data_classes` have inconsistent shapes. 

-

223 ValueError: if `data_vectors` and `list_of_possible_features` have inconsistent shapes. 

-

224 ValueError: if `amplification_factor` is not a positive integer. 

-

225 """ 

-

226 

-

227 ### 

-

228 ### Check parameters. 

-

229 ### 

-

230 

-

231 # Display progress status if requested. 

-

232 if verbose: 

-

233 print("`FeaturesMaximizationMetric.__init__`", ":", "Check parameters.") 

-

234 

-

235 # Check data size. 

-

236 if data_vectors.shape[0] != len(data_classes): 

-

237 raise ValueError( 

-

238 "The vectors `data_vectors` and the list of classes `data_classes` have inconsistent shapes (currently: '{0}' vs '{1}').".format( 

-

239 data_vectors.shape[0], 

-

240 len(data_classes), 

-

241 ) 

-

242 ) 

-

243 

-

244 # Check features size. 

-

245 if data_vectors.shape[1] != len(list_of_possible_features): 

-

246 raise ValueError( 

-

247 "The vectors `data_vectors` and the list of features `list_of_possible_features` have inconsistent shapes (currently: '{0}' vs '{1}').".format( 

-

248 data_vectors.shape[1], 

-

249 len(list_of_possible_features), 

-

250 ) 

-

251 ) 

-

252 

-

253 # Check amplification factor. 

-

254 if (not isinstance(amplification_factor, int)) or amplification_factor < 1: 

-

255 raise ValueError( 

-

256 "The amplification factor `amplification_factor` has to be a positive integer (currently: '{0}').".format( 

-

257 amplification_factor, 

-

258 ) 

-

259 ) 

-

260 

-

261 ### 

-

262 ### Store parameters. 

-

263 ### 

-

264 

-

265 # Display progress status if requested. 

-

266 if verbose: 

-

267 print("`FeaturesMaximizationMetric.__init__`", ":", "Store parameters.") 

-

268 

-

269 # Store data information. 

-

270 self.data_vectors: csr_matrix = data_vectors 

-

271 self.data_classes: List[str] = data_classes 

-

272 # Store features and classes lists. 

-

273 self.list_of_possible_features: List[str] = list_of_possible_features 

-

274 self.list_of_possible_classes: List[str] = sorted(set(data_classes)) 

-

275 # Store amplification factor. 

-

276 self.amplification_factor: int = amplification_factor 

-

277 

-

278 ### 

-

279 ### Compute Features Maximization Metric. 

-

280 ### 

-

281 

-

282 # Display progress status if requested. 

-

283 if verbose: 

-

284 print("`FeaturesMaximizationMetric.__init__`", ":", "Start computations.") 

-

285 

-

286 # 1. Compute the *Features F-Measure* metric (based on *Features Recall* and *Features Predominance* metrics). 

-

287 

-

288 # Display progress status if requested. 

-

289 if verbose: 

-

290 print("`FeaturesMaximizationMetric.__init__`", ":", "Compute Features F-Measure.") 

-

291 

-

292 # Initialize variables. 

-

293 self.features_frecall: Dict[str, Dict[str, float]] 

-

294 self.features_fpredominance: Dict[str, Dict[str, float]] 

-

295 self.features_fmeasure: Dict[str, Dict[str, float]] 

-

296 # Compute variables. 

-

297 self._compute_features_frecall_fpredominance_fmeasure() 

-

298 

-

299 # 2. Perform a *Features Selection* (based on *F-Measure Overall Average* comparison). 

-

300 

-

301 # Display progress status if requested. 

-

302 if verbose: 

-

303 print("`FeaturesMaximizationMetric.__init__`", ":", "Compute Features Selection.") 

-

304 

-

305 # Initialize variables. 

-

306 self.features_overall_average: float 

-

307 self.features_selection: Dict[str, bool] 

-

308 # Compute variables. 

-

309 self._compute_features_selection() 

-

310 

-

311 # 3. Compute the *Features Contrast* and *Features Activation* (based on *F-Measure Marginal Averages* comparison). 

-

312 

-

313 # Display progress status if requested. 

-

314 if verbose: 

-

315 print("`FeaturesMaximizationMetric.__init__`", ":", "Compute Features Contrast.") 

-

316 

-

317 # Initialize variables. 

-

318 self.features_marginal_averages: Dict[str, float] 

-

319 self.features_contrast: Dict[str, Dict[str, float]] 

-

320 self.features_activation: Dict[str, Dict[str, bool]] 

-

321 # Compute variables. 

-

322 self._compute_features_contrast_and_activation() 

-

323 

-

324 # Display progress status if requested. 

-

325 if verbose: 

-

326 print("`FeaturesMaximizationMetric.__init__`", ":", "Computations done.") 

-

327 

-

328 # ============================================================================== 

-

329 # COMPUTE FEATURES F-MEASURE 

-

330 # ============================================================================== 

-

331 

-

332 def _compute_features_frecall_fpredominance_fmeasure( 

-

333 self, 

-

334 ) -> None: 

-

335 """ 

-

336 Compute: 

-

337 (a) the ***Features Recall*** (cf. `self.features_frecall`), 

-

338 (b) the ***Features Predominance*** (cf. `self.features_fpredominance`), and 

-

339 (c) the ***Features F-Measure*** (cf. `self.features_fmeasure`). 

-

340 """ 

-

341 

-

342 ### 

-

343 ### Temporary computations. 

-

344 ### 

-

345 

-

346 # Temporary variable used to store sums of all vectors weights for a given feature `f` and a given class `c`. 

-

347 # Needed for both Features Recall and Features Predominance computations. 

-

348 sum_by_feature_and_classe: Dict[str, Dict[str, float]] = { 

-

349 feature: {classe: 0.0 for classe in self.list_of_possible_classes} 

-

350 for feature in self.list_of_possible_features 

-

351 } 

-

352 

-

353 # Temporary variable used to store sums of all vectors weights for a given feature `f` and all classes. 

-

354 # Needed for Features Recall computation. 

-

355 sum_by_features: Dict[str, float] = {feature: 0.0 for feature in self.list_of_possible_features} 

-

356 

-

357 # Temporary variable used to store sums of all vectors weights for all features and a given class `c`. 

-

358 # Needed for Features Predominance computation. 

-

359 sum_by_classe: Dict[str, float] = {classe: 0.0 for classe in self.list_of_possible_classes} 

-

360 

-

361 # Index used to get non zero elements in the sparse matrix weights. 

-

362 indices_x, indices_y = self.data_vectors.nonzero() 

-

363 

-

364 # Browse non zero weights in vectors to compute all the needed sums. 

-

365 for index in range(self.data_vectors.nnz): 

-

366 # Get needed information (data, class/ classe, feature, vectors weight) 

-

367 data_index: int = indices_x[index] 

-

368 data_classe: str = self.data_classes[data_index] 

-

369 feature_index: int = indices_y[index] 

-

370 data_feature: str = self.list_of_possible_features[feature_index] 

-

371 weight: float = self.data_vectors[data_index, feature_index] # TODO: check if np.nan ? 

-

372 

-

373 # Update the several sums. 

-

374 sum_by_feature_and_classe[data_feature][data_classe] += weight 

-

375 sum_by_features[data_feature] += weight 

-

376 sum_by_classe[data_classe] += weight 

-

377 

-

378 ### 

-

379 ### Features F-Measure computation. 

-

380 ### 

-

381 

-

382 # Compute Features Recall. 

-

383 self.features_frecall = { 

-

384 feature: { 

-

385 classe: ( 

-

386 0.0 # TODO: set to np.nan ? 

-

387 if sum_by_features[feature] == 0 

-

388 else sum_by_feature_and_classe[feature][classe] / sum_by_features[feature] 

-

389 ) 

-

390 for classe in self.list_of_possible_classes 

-

391 } 

-

392 for feature in self.list_of_possible_features 

-

393 } 

-

394 

-

395 # Compute Features Predominance. 

-

396 self.features_fpredominance = { 

-

397 feature: { 

-

398 classe: ( 

-

399 0.0 # TODO: set to np.nan ? 

-

400 if sum_by_classe[classe] == 0 

-

401 else sum_by_feature_and_classe[feature][classe] / sum_by_classe[classe] 

-

402 ) 

-

403 for classe in self.list_of_possible_classes 

-

404 } 

-

405 for feature in self.list_of_possible_features 

-

406 } 

-

407 

-

408 # Compute Features F-Measure. 

-

409 self.features_fmeasure = { 

-

410 feature: { 

-

411 classe: ( 

-

412 0.0 # TODO: set to np.nan ? 

-

413 if (self.features_frecall[feature][classe] + self.features_fpredominance[feature][classe] == 0) 

-

414 else ( 

-

415 2 

-

416 * (self.features_frecall[feature][classe] * self.features_fpredominance[feature][classe]) 

-

417 / (self.features_frecall[feature][classe] + self.features_fpredominance[feature][classe]) 

-

418 ) 

-

419 ) 

-

420 for classe in self.list_of_possible_classes 

-

421 } 

-

422 for feature in self.list_of_possible_features 

-

423 } 

-

424 

-

425 # ============================================================================================= 

-

426 # COMPUTE FEATURES SELECTION 

-

427 # ============================================================================================= 

-

428 

-

429 def _compute_features_selection( 

-

430 self, 

-

431 ) -> None: 

-

432 """ 

-

433 Compute: 

-

434 (d) the ***F-Measure Overall Average*** (cf. `self.features_overall_average`), and 

-

435 (e) the ***Features Selected*** (cf. `self.features_selection`). 

-

436 """ 

-

437 

-

438 ### 

-

439 ### Features F-Measure Overall Average computation. 

-

440 ### 

-

441 

-

442 # Temporary variable used to store the overall sum in order to compute the overall average of Features F-Measure. 

-

443 overall_sum: float = 0.0 

-

444 nb_overall: int = 0 

-

445 

-

446 # For each feature... 

-

447 for feature1 in self.list_of_possible_features: 

-

448 # For each classe... 

-

449 for classe1 in self.list_of_possible_classes: 

-

450 # Update the overall sum and count. 

-

451 overall_sum += self.features_fmeasure[feature1][classe1] 

-

452 nb_overall += 1 

-

453 

-

454 # Compute the overall average of Features F-Measure. 

-

455 self.features_overall_average = 0.0 if nb_overall == 0 else overall_sum / nb_overall # TODO: set to np.nan ? 

-

456 

-

457 ### 

-

458 ### Features Selection computation. 

-

459 ### 

-

460 

-

461 # Temporary variable used store the selected features. 

-

462 self.features_selection = {} 

-

463 

-

464 # Browse features to determine the selected ones. 

-

465 for feature2 in self.list_of_possible_features: 

-

466 # Set default state of selection. 

-

467 self.features_selection[feature2] = False 

-

468 

-

469 # For each feature, browse class to find one for which the Features F-Measure is bigger than the overall average. 

-

470 for classe2 in self.list_of_possible_classes: 

-

471 # Check that the Feature F-Measure is bigger than the overall average. 

-

472 if self.features_fmeasure[feature2][classe2] > self.features_overall_average: 

-

473 # Approve the selection and then break the loop. 

-

474 self.features_selection[feature2] = True 

-

475 break 

-

476 

-

477 # ============================================================================================= 

-

478 # COMPUTE FEATURES CONSTRAST AND ACTIVATION 

-

479 # ============================================================================================= 

-

480 

-

481 def _compute_features_contrast_and_activation( 

-

482 self, 

-

483 ) -> None: 

-

484 """ 

-

485 Compute: 

-

486 (g) The ***F-Measure Marginal Averages*** (cf. `self.features_marginal_averages`), and 

-

487 (h) The ***Features Contrast*** (cf. `self.features_contrast`). 

-

488 (i) the ***Features Activation*** (cf. `self.features_activation`). 

-

489 """ 

-

490 

-

491 ### 

-

492 ### Features F-Measure Marginal computation. 

-

493 ### 

-

494 

-

495 # Initialize the marginal average of Features F-Measure. 

-

496 self.features_marginal_averages = {} 

-

497 

-

498 # Browse features to compute the averages. 

-

499 for feature1 in self.list_of_possible_features: 

-

500 # Temporary variable used to store the marginal sum in order to compute the marginal average of Features F-Measure over the current feature. 

-

501 sum_marginal: float = 0.0 

-

502 nb_marginal: int = 0 

-

503 

-

504 # Update the marginal sum of Features F-Measure over the current feature. 

-

505 for classe1 in self.list_of_possible_classes: 

-

506 sum_marginal += self.features_fmeasure[feature1][classe1] 

-

507 nb_marginal += 1 

-

508 

-

509 # Compute the marginal averages of Features F-Measure over the current feature. 

-

510 self.features_marginal_averages[feature1] = ( 

-

511 0.0 if nb_marginal == 0 else sum_marginal / nb_marginal 

-

512 ) # TODO: set to np.nan ? 

-

513 

-

514 ### 

-

515 ### Features Contrast computation. 

-

516 ### 

-

517 

-

518 # Temporary variable used to store the contrast of a feature for a class. 

-

519 self.features_contrast = { 

-

520 feature2: { 

-

521 classe2: ( 

-

522 0.0 # TODO: set to np.nan ? 

-

523 if (self.features_selection[feature2] is False or self.features_marginal_averages[feature2] == 0) 

-

524 else (self.features_fmeasure[feature2][classe2] / self.features_marginal_averages[feature2]) 

-

525 ** self.amplification_factor 

-

526 ) 

-

527 for classe2 in self.list_of_possible_classes 

-

528 } 

-

529 for feature2 in self.list_of_possible_features 

-

530 } 

-

531 

-

532 ### 

-

533 ### Features Activation computation. 

-

534 ### 

-

535 

-

536 # Temporary variable used store the features activation. 

-

537 self.features_activation = { 

-

538 feature3: { 

-

539 classe3: bool( 

-

540 self.features_selection[feature3] is True and self.features_contrast[feature3][classe3] > 1 

-

541 ) 

-

542 for classe3 in self.list_of_possible_classes 

-

543 } 

-

544 for feature3 in self.list_of_possible_features 

-

545 } 

-

546 

-

547 # ============================================================================================= 

-

548 # GET: MOST ACTIVATED CLASSES FOR A FEATURE 

-

549 # ============================================================================================= 

-

550 

-

551 def get_most_activated_classes_by_a_feature( 

-

552 self, 

-

553 feature: str, 

-

554 activation_only: bool = True, 

-

555 sort_by: Literal["contrast", "fmeasure"] = "contrast", 

-

556 max_number: Optional[int] = None, 

-

557 ) -> List[str]: 

-

558 """ 

-

559 Get the list of classes for which the requested feature is the most relevant. 

-

560 

-

561 Args: 

-

562 feature (str): The feature to analyze. 

-

563 sort_by (Literal["contrast", "fmeasure"]): The sort criterion for the list of classes. Defaults to `"contrast"`. 

-

564 activation_only (bool): The option to get only activated classes. Defaults to `True`. 

-

565 max_number (Optional[int]): The maximum number of classes to return. Defaults to `None`. 

-

566 

-

567 Raises: 

-

568 ValueError: if `feature` is not in `self.list_of_possible_features`. 

-

569 ValueError: if `sort_by` is not in `{"contrast", "fmeasure"}`. 

-

570 

-

571 Returns: 

-

572 List[str]: The list of classes for which the requested feature is the most relevant. 

-

573 """ 

-

574 

-

575 ### 

-

576 ### Check parameters. 

-

577 ### 

-

578 

-

579 # Check parameter `feature`. 

-

580 if feature not in self.list_of_possible_features: 

-

581 raise ValueError( 

-

582 "The requested feature `'{0}'` is unknown.".format( 

-

583 feature, 

-

584 ) 

-

585 ) 

-

586 

-

587 # Check parameter `sort_by`. 

-

588 if sort_by not in {"contrast", "fmeasure"}: 

-

589 raise ValueError( 

-

590 "The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').".format( 

-

591 sort_by, 

-

592 ) 

-

593 ) 

-

594 

-

595 ### 

-

596 ### Compute the requested list. 

-

597 ### 

-

598 

-

599 # Define list of possible results (classe + contrast/fmeasure). 

-

600 list_of_possible_results: List[Tuple[float, str]] = [ 

-

601 ( 

-

602 # 0: the metric: contrast or fmeasure. 

-

603 ( 

-

604 self.features_contrast[feature][classe] 

-

605 if sort_by == "contrast" 

-

606 else self.features_fmeasure[feature][classe] 

-

607 ), 

-

608 # 1: the classe. 

-

609 classe, 

-

610 ) 

-

611 for classe in self.list_of_possible_classes 

-

612 if (activation_only is False or self.features_activation[feature][classe] is True) 

-

613 ] 

-

614 

-

615 # Return top classes sorted by requested metric. 

-

616 return [ 

-

617 activated_classe 

-

618 for _, activated_classe in sorted( 

-

619 list_of_possible_results, 

-

620 reverse=True, 

-

621 ) 

-

622 ][:max_number] 

-

623 

-

624 # ============================================================================================= 

-

625 # GET: MOST ACTIVATED FEATURES FOR A CLASSE 

-

626 # ============================================================================================= 

-

627 

-

628 def get_most_active_features_by_a_classe( 

-

629 self, 

-

630 classe: str, 

-

631 activation_only: bool = True, 

-

632 sort_by: Literal["contrast", "fmeasure"] = "contrast", 

-

633 max_number: Optional[int] = None, 

-

634 ) -> List[str]: 

-

635 """ 

-

636 Get the list of features which are the most relevant for the requested classe. 

-

637 

-

638 Args: 

-

639 classe (str): The classe to analyze. 

-

640 sort_by (Literal["contrast", "fmeasure"]): The sort criterion for the list of features. Defaults to `"contrast"`. 

-

641 activation_only (bool): The option to get only active features. Defaults to `True`. 

-

642 max_number (Optional[int]): The maximum number of features to return. Defaults to `None`. 

-

643 

-

644 Raises: 

-

645 ValueError: if `classe` is not in `self.list_of_possible_classes`. 

-

646 ValueError: if `sort_by` is not in `{"contrast", "fmeasure"}`. 

-

647 

-

648 Returns: 

-

649 List[str]: The list of features which are the most relevant for the requested classe. 

-

650 """ 

-

651 

-

652 ### 

-

653 ### Check parameters. 

-

654 ### 

-

655 

-

656 # Check parameter `feature`. 

-

657 if classe not in self.list_of_possible_classes: 

-

658 raise ValueError( 

-

659 "The requested classe `'{0}'` is unknown.".format( 

-

660 classe, 

-

661 ) 

-

662 ) 

-

663 

-

664 # Check parameter `sort_by`. 

-

665 if sort_by not in {"contrast", "fmeasure"}: 

-

666 raise ValueError( 

-

667 "The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').".format( 

-

668 sort_by, 

-

669 ) 

-

670 ) 

-

671 

-

672 ### 

-

673 ### Compute the requested list. 

-

674 ### 

-

675 

-

676 # Define list of possible results (feature + contrast/fmeasure). 

-

677 list_of_possible_results: List[Tuple[float, str]] = [ 

-

678 ( 

-

679 # 0: the metric: contrast or fmeasure. 

-

680 ( 

-

681 self.features_contrast[feature][classe] 

-

682 if sort_by == "contrast" 

-

683 else self.features_fmeasure[feature][classe] 

-

684 ), 

-

685 # 1: the feature. 

-

686 feature, 

-

687 ) 

-

688 for feature in self.list_of_possible_features 

-

689 if (activation_only is False or self.features_activation[feature][classe] is True) 

-

690 ] 

-

691 

-

692 # Return top features sorted by requested metric. 

-

693 return [ 

-

694 active_feature 

-

695 for _, active_feature in sorted( 

-

696 list_of_possible_results, 

-

697 reverse=True, 

-

698 ) 

-

699 ][:max_number] 

-

700 

-

701 # ============================================================================================= 

-

702 # COMPARE: WITH AN OTHER FMC 

-

703 # ============================================================================================= 

-

704 

-

705 def compare( 

-

706 self, 

-

707 fmc_reference: "FeaturesMaximizationMetric", 

-

708 rounded: Optional[int] = None, 

-

709 ) -> Tuple[float, float, float]: 

-

710 """ 

-

711 Gives a similarity score in agreement with a reference FMC modelization. 

-

712 The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure), 

-

713 where each FMC modelization is represented by the Features Activation of their vector features. 

-

714 In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization. 

-

715 

-

716 

-

717 Args: 

-

718 fmc_reference (FeaturesMaximizationMetric): Another Features Maximization modelization used as reference for the comparison. 

-

719 rounded (Optional[int]): The option to round the result to counter log approximation. Defaults to `None`. 

-

720 

-

721 Raises: 

-

722 ValueError: if `list_of_possible_features` are different. 

-

723 

-

724 Returns: 

-

725 Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure. 

-

726 """ 

-

727 

-

728 ### 

-

729 ### Check parameters. 

-

730 ### 

-

731 

-

732 # Check list_of_possible_features equality. 

-

733 if self.list_of_possible_features != fmc_reference.list_of_possible_features: 

-

734 list_of_in_excess_features: List[str] = [ 

-

735 feature 

-

736 for feature in self.list_of_possible_features 

-

737 if feature not in fmc_reference.list_of_possible_features 

-

738 ] 

-

739 list_of_missing_features: List[str] = [ 

-

740 feature 

-

741 for feature in fmc_reference.list_of_possible_features 

-

742 if feature not in self.list_of_possible_features 

-

743 ] 

-

744 raise ValueError( 

-

745 "The list of features `list_of_possible_features` must be the same for both FMC modelization. +: {0}, -: {1}".format( 

-

746 str(list_of_in_excess_features), str(list_of_missing_features) 

-

747 ) 

-

748 ) 

-

749 

-

750 ### 

-

751 ### Format Features Activation as classification label of features. 

-

752 ### 

-

753 

-

754 # Initialize 

-

755 list_of_self_features_activations: List[str] = [] 

-

756 list_of_reference_features_activations: List[str] = [] 

-

757 

-

758 # Define default value if feature not activated. 

-

759 # NB: we can't set a fixed value in case this value is in the list of possible classes... 

-

760 # Example: can't set `""` or `"None"` in case self.list_of_possible_classes==["A", ""] and fmc_reference.list_of_possible_classes==["B", "None"]. 

-

761 default_label_if_feature_not_activated: str = "NOT_ACTIVATED:{possible_classe}".format( 

-

762 possible_classe=self.list_of_possible_classes + fmc_reference.list_of_possible_classes 

-

763 ) 

-

764 

-

765 # Browse activated features toà compare Features Activation. 

-

766 for feature in fmc_reference.list_of_possible_features: 

-

767 # Get Features Activation. 

-

768 list_of_most_activated_classes_for_feature_in_self: List[ 

-

769 str 

-

770 ] = self.get_most_activated_classes_by_a_feature( 

-

771 feature=feature, 

-

772 ) 

-

773 list_of_most_activated_classes_for_feature_in_reference: List[ 

-

774 str 

-

775 ] = fmc_reference.get_most_activated_classes_by_a_feature( 

-

776 feature=feature, 

-

777 ) 

-

778 

-

779 # TODO: Skip if feature is not activated in both modelization. 

-

780 if ( 

-

781 len(list_of_most_activated_classes_for_feature_in_self) != 1 

-

782 and len(list_of_most_activated_classes_for_feature_in_reference) != 1 

-

783 ): 

-

784 continue 

-

785 

-

786 # Format Feature Activation as classification label. Set to `-1` if not activated. 

-

787 list_of_self_features_activations.append( 

-

788 list_of_most_activated_classes_for_feature_in_self[0] 

-

789 if len(list_of_most_activated_classes_for_feature_in_self) == 1 

-

790 else default_label_if_feature_not_activated 

-

791 ) 

-

792 list_of_reference_features_activations.append( 

-

793 list_of_most_activated_classes_for_feature_in_reference[0] 

-

794 if len(list_of_most_activated_classes_for_feature_in_reference) == 1 

-

795 else default_label_if_feature_not_activated 

-

796 ) 

-

797 

-

798 ### 

-

799 ### Compute FMC modelizations similarity. 

-

800 ### 

-

801 

-

802 # Compute standard metrics for clustering. 

-

803 homogeneity: float 

-

804 completeness: float 

-

805 v_measure: float 

-

806 homogeneity, completeness, v_measure = homogeneity_completeness_v_measure( 

-

807 labels_pred=list_of_self_features_activations, 

-

808 labels_true=list_of_reference_features_activations, 

-

809 ) 

-

810 

-

811 # Round the results. 

-

812 if rounded is not None: 

-

813 homogeneity = round(homogeneity, rounded) 

-

814 completeness = round(completeness, rounded) 

-

815 v_measure = round(v_measure, rounded) 

-

816 

-

817 # Return values. 

-

818 return homogeneity, completeness, v_measure 

-
- - - diff --git a/coverage/d_a44f0ac069e85531_test_fmc_py.html b/coverage/d_a44f0ac069e85531_test_fmc_py.html deleted file mode 100755 index 4bbdb47..0000000 --- a/coverage/d_a44f0ac069e85531_test_fmc_py.html +++ /dev/null @@ -1,1901 +0,0 @@ - - - - - Coverage for tests\test_fmc.py: 100.00% - - - - - -
-
-

- Coverage for tests\test_fmc.py: - 100.00% -

- -

- 176 statements   - - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v6.5.0, - created at 2023-11-14 10:53 +0100 -

- -
-
-
-

1# -*- coding: utf-8 -*- 

-

2 

-

3""" 

-

4* Name: cognitivefactory/features_maximization_metric/tests/test_py 

-

5* Description: Unittests for the `cognitivefactory.features_maximization_metric.fmc` module. 

-

6* Author: Erwan Schild 

-

7* Created: 23/11/2022 

-

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

-

9""" 

-

10 

-

11# ============================================================================== 

-

12# IMPORT PYTHON DEPENDENCIES 

-

13# ============================================================================== 

-

14 

-

15import re 

-

16from typing import List 

-

17 

-

18import pytest 

-

19from scipy.sparse import csr_matrix 

-

20 

-

21from cognitivefactory.features_maximization_metric.fmc import FeaturesMaximizationMetric 

-

22 

-

23# ============================================================================== 

-

24# test_FeaturesMaximizationMetric_init_error_data_size 

-

25# ============================================================================== 

-

26 

-

27 

-

28def test_FeaturesMaximizationMetric_init_error_data_size(): 

-

29 """ 

-

30 In `FeaturesMaximizationMetric`, test `__init__` method with data size error. 

-

31 """ 

-

32 

-

33 # Invalid case with inconsistencies in number of data. 

-

34 with pytest.raises( 

-

35 ValueError, 

-

36 match=re.escape( 

-

37 "The vectors `data_vectors` and the list of classes `data_classes` have inconsistent shapes (currently: '3' vs '5')." 

-

38 ), 

-

39 ): 

-

40 FeaturesMaximizationMetric( 

-

41 data_vectors=csr_matrix( 

-

42 [ 

-

43 [11, 12], 

-

44 [21, 22], 

-

45 [31, 32], 

-

46 ] 

-

47 ), 

-

48 data_classes=[ 

-

49 "classe_data_1", 

-

50 "classe_data_2", 

-

51 "classe_data_3", 

-

52 "classe_data_4", 

-

53 "classe_data_5", 

-

54 ], 

-

55 list_of_possible_features=["feature_1", "feature_2"], 

-

56 ) 

-

57 

-

58 

-

59# ============================================================================== 

-

60# test_FeaturesMaximizationMetric_init_error_features_size 

-

61# ============================================================================== 

-

62 

-

63 

-

64def test_FeaturesMaximizationMetric_init_error_features_size(): 

-

65 """ 

-

66 In `FeaturesMaximizationMetric`, test `__init__` method with features size error. 

-

67 """ 

-

68 

-

69 # Invalid case with inconsistencies in number of features. 

-

70 with pytest.raises( 

-

71 ValueError, 

-

72 match=re.escape( 

-

73 "The vectors `data_vectors` and the list of features `list_of_possible_features` have inconsistent shapes (currently: '4' vs '2')." 

-

74 ), 

-

75 ): 

-

76 FeaturesMaximizationMetric( 

-

77 data_vectors=csr_matrix( 

-

78 [ 

-

79 [11, 12, 13, 14], 

-

80 [21, 22, 23, 24], 

-

81 [31, 32, 33, 34], 

-

82 [41, 42, 43, 44], 

-

83 [51, 52, 53, 54], 

-

84 ] 

-

85 ), 

-

86 data_classes=[ 

-

87 "classe_data_1", 

-

88 "classe_data_2", 

-

89 "classe_data_3", 

-

90 "classe_data_4", 

-

91 "classe_data_5", 

-

92 ], 

-

93 list_of_possible_features=["feature_1", "feature_2"], 

-

94 ) 

-

95 

-

96 

-

97# ============================================================================== 

-

98# test_FeaturesMaximizationMetric_init_error_amplification 

-

99# ============================================================================== 

-

100 

-

101 

-

102def test_FeaturesMaximizationMetric_init_error_amplification(): 

-

103 """ 

-

104 In `FeaturesMaximizationMetric`, test `__init__` method with amplification factor error. 

-

105 """ 

-

106 

-

107 # Invalid case with not integer value. 

-

108 with pytest.raises( 

-

109 ValueError, 

-

110 match=re.escape( 

-

111 "The amplification factor `amplification_factor` has to be a positive integer (currently: 'a')." 

-

112 ), 

-

113 ): 

-

114 FeaturesMaximizationMetric( 

-

115 data_vectors=csr_matrix( 

-

116 [ 

-

117 [11, 12], 

-

118 [21, 22], 

-

119 [31, 32], 

-

120 [41, 42], 

-

121 ] 

-

122 ), 

-

123 data_classes=[ 

-

124 "classe_A", 

-

125 "classe_A", 

-

126 "classe_B", 

-

127 "classe_B", 

-

128 ], 

-

129 list_of_possible_features=["feature_1", "feature_2"], 

-

130 amplification_factor="a", 

-

131 ) 

-

132 

-

133 # Invalid case with not positive integer value. 

-

134 with pytest.raises( 

-

135 ValueError, 

-

136 match=re.escape( 

-

137 "The amplification factor `amplification_factor` has to be a positive integer (currently: '-1')." 

-

138 ), 

-

139 ): 

-

140 FeaturesMaximizationMetric( 

-

141 data_vectors=csr_matrix( 

-

142 [ 

-

143 [11, 12], 

-

144 [21, 22], 

-

145 [31, 32], 

-

146 [41, 42], 

-

147 ] 

-

148 ), 

-

149 data_classes=[ 

-

150 "classe_A", 

-

151 "classe_A", 

-

152 "classe_B", 

-

153 "classe_B", 

-

154 ], 

-

155 list_of_possible_features=["feature_1", "feature_2"], 

-

156 amplification_factor=-1, 

-

157 ) 

-

158 

-

159 

-

160# ============================================================================== 

-

161# test_FeaturesMaximizationMetric_default_values 

-

162# ============================================================================== 

-

163 

-

164 

-

165def test_FeaturesMaximizationMetric_init_default_values(capsys): 

-

166 """ 

-

167 In `FeaturesMaximizationMetric`, test `__init__` method with default values. 

-

168 

-

169 Args: 

-

170 capsys: Fixture capturing the system output. 

-

171 """ 

-

172 

-

173 ### 

-

174 ### Data. 

-

175 ### 

-

176 

-

177 # Define people characteristics that will be studied. 

-

178 characteristics_studied: List[str] = [ 

-

179 "Shoes size", 

-

180 "Hair size", 

-

181 "Nose size", 

-

182 ] 

-

183 

-

184 # Get people characteristics. 

-

185 people_characteristics: csr_matrix = csr_matrix( 

-

186 [ 

-

187 [9, 5, 5], 

-

188 [9, 10, 5], 

-

189 [9, 20, 6], 

-

190 [5, 15, 5], 

-

191 [6, 25, 6], 

-

192 [5, 25, 5], 

-

193 ] 

-

194 ) 

-

195 

-

196 # Get people genders. 

-

197 people_genders: List[str] = [ 

-

198 "Man", 

-

199 "Man", 

-

200 "Man", 

-

201 "Woman", 

-

202 "Woman", 

-

203 "Woman", 

-

204 ] 

-

205 

-

206 ### 

-

207 ### Feature Maximization Metrics. 

-

208 ### 

-

209 

-

210 # Computation. 

-

211 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

212 data_vectors=people_characteristics, 

-

213 data_classes=people_genders, 

-

214 list_of_possible_features=characteristics_studied, 

-

215 ) 

-

216 

-

217 # Check verbose output. 

-

218 assert capsys.readouterr().out == "" 

-

219 

-

220 ### 

-

221 ### Check parameters. 

-

222 ### 

-

223 

-

224 # Check data vectors (by checking the difference is never True). 

-

225 vectors_differences = ( 

-

226 fmc_computer.data_vectors 

-

227 != csr_matrix( 

-

228 [ 

-

229 [9, 5, 5], 

-

230 [9, 10, 5], 

-

231 [9, 20, 6], 

-

232 [5, 15, 5], 

-

233 [6, 25, 6], 

-

234 [5, 25, 5], 

-

235 ] 

-

236 ) 

-

237 ).todense() 

-

238 assert not vectors_differences.any() 

-

239 

-

240 # Check data classes. 

-

241 assert fmc_computer.data_classes == [ 

-

242 "Man", 

-

243 "Man", 

-

244 "Man", 

-

245 "Woman", 

-

246 "Woman", 

-

247 "Woman", 

-

248 ] 

-

249 

-

250 # Check list of possible features. 

-

251 assert fmc_computer.list_of_possible_features == [ 

-

252 "Shoes size", 

-

253 "Hair size", 

-

254 "Nose size", 

-

255 ] 

-

256 

-

257 # Check list of possible classes. 

-

258 assert fmc_computer.list_of_possible_classes == [ 

-

259 "Man", 

-

260 "Woman", 

-

261 ] 

-

262 

-

263 # Check amplification factor. 

-

264 assert fmc_computer.amplification_factor == 1 

-

265 

-

266 ### 

-

267 ### Check computations. 

-

268 ### 

-

269 

-

270 # Check Features F-Measure. 

-

271 assert fmc_computer.features_fmeasure == { 

-

272 "Shoes size": { 

-

273 "Man": 0.4462809917355372, 

-

274 "Woman": 0.2285714285714286, 

-

275 }, 

-

276 "Hair size": { 

-

277 "Man": 0.3932584269662921, 

-

278 "Woman": 0.6598984771573604, 

-

279 }, 

-

280 "Nose size": { 

-

281 "Man": 0.29090909090909095, 

-

282 "Woman": 0.24806201550387597, 

-

283 }, 

-

284 } 

-

285 

-

286 # Check Features Overall Average. 

-

287 assert fmc_computer.features_overall_average == 0.3778300718072642 # noqa: WPS459 

-

288 

-

289 # Check Features Selection. 

-

290 assert fmc_computer.features_selection == { 

-

291 "Shoes size": True, 

-

292 "Hair size": True, 

-

293 "Nose size": False, 

-

294 } 

-

295 

-

296 # Check Features Marginal Average. 

-

297 assert fmc_computer.features_marginal_averages == { 

-

298 "Shoes size": 0.3374262101534829, 

-

299 "Hair size": 0.5265784520618262, 

-

300 "Nose size": 0.2694855532064835, 

-

301 } 

-

302 

-

303 # Check Features Contrast. 

-

304 assert fmc_computer.features_contrast == { 

-

305 "Shoes size": { 

-

306 "Man": 1.3226032190342898, 

-

307 "Woman": 0.6773967809657103, 

-

308 }, 

-

309 "Hair size": { 

-

310 "Man": 0.7468183049011644, 

-

311 "Woman": 1.2531816950988357, 

-

312 }, 

-

313 "Nose size": { 

-

314 "Man": 0.0, # 1.07949791 

-

315 "Woman": 0.0, # 0.92050209 

-

316 }, 

-

317 } 

-

318 

-

319 # Check Features Activation. 

-

320 assert fmc_computer.features_activation == { 

-

321 "Shoes size": { 

-

322 "Man": True, 

-

323 "Woman": False, 

-

324 }, 

-

325 "Hair size": { 

-

326 "Man": False, 

-

327 "Woman": True, 

-

328 }, 

-

329 "Nose size": { 

-

330 "Man": False, 

-

331 "Woman": False, 

-

332 }, 

-

333 } 

-

334 

-

335 

-

336# ============================================================================== 

-

337# test_FeaturesMaximizationMetric_verbose_output 

-

338# ============================================================================== 

-

339 

-

340 

-

341def test_FeaturesMaximizationMetric_verbose_output(capsys): 

-

342 """ 

-

343 In `FeaturesMaximizationMetric`, test `__init__` method with verbose output . 

-

344 

-

345 Args: 

-

346 capsys: Fixture capturing the system output. 

-

347 """ 

-

348 

-

349 ### 

-

350 ### Data. 

-

351 ### 

-

352 

-

353 # Define people characteristics that will be studied. 

-

354 characteristics_studied: List[str] = [ 

-

355 "Shoes size", 

-

356 "Hair size", 

-

357 "Nose size", 

-

358 ] 

-

359 

-

360 # Get people characteristics. 

-

361 people_characteristics: csr_matrix = csr_matrix( 

-

362 [ 

-

363 [9, 5, 5], 

-

364 [9, 10, 5], 

-

365 [9, 20, 6], 

-

366 [5, 15, 5], 

-

367 [6, 25, 6], 

-

368 [5, 25, 5], 

-

369 ] 

-

370 ) 

-

371 

-

372 # Get people genders. 

-

373 people_genders: List[str] = [ 

-

374 "Man", 

-

375 "Man", 

-

376 "Man", 

-

377 "Woman", 

-

378 "Woman", 

-

379 "Woman", 

-

380 ] 

-

381 

-

382 ### 

-

383 ### Feature Maximization Metrics. 

-

384 ### 

-

385 

-

386 # Computation. 

-

387 FeaturesMaximizationMetric( 

-

388 data_vectors=people_characteristics, 

-

389 data_classes=people_genders, 

-

390 list_of_possible_features=characteristics_studied, 

-

391 verbose=True, 

-

392 ) 

-

393 

-

394 # Check verbose output. 

-

395 assert capsys.readouterr().out.split("\n") == [ 

-

396 "`FeaturesMaximizationMetric.__init__` : Check parameters.", 

-

397 "`FeaturesMaximizationMetric.__init__` : Store parameters.", 

-

398 "`FeaturesMaximizationMetric.__init__` : Start computations.", 

-

399 "`FeaturesMaximizationMetric.__init__` : Compute Features F-Measure.", 

-

400 "`FeaturesMaximizationMetric.__init__` : Compute Features Selection.", 

-

401 "`FeaturesMaximizationMetric.__init__` : Compute Features Contrast.", 

-

402 "`FeaturesMaximizationMetric.__init__` : Computations done.", 

-

403 "", 

-

404 ] 

-

405 

-

406 

-

407# ============================================================================== 

-

408# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_error_feature 

-

409# ============================================================================== 

-

410 

-

411 

-

412def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_error_feature(): 

-

413 """ 

-

414 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method with unknown feature. 

-

415 """ 

-

416 

-

417 ### 

-

418 ### Data. 

-

419 ### 

-

420 

-

421 # Define people characteristics that will be studied. 

-

422 characteristics_studied: List[str] = [ 

-

423 "Shoes size", 

-

424 "Hair size", 

-

425 "Nose size", 

-

426 ] 

-

427 

-

428 # Get people characteristics. 

-

429 people_characteristics: csr_matrix = csr_matrix( 

-

430 [ 

-

431 [9, 5, 5], 

-

432 [9, 10, 5], 

-

433 [9, 20, 6], 

-

434 [5, 15, 5], 

-

435 [6, 25, 6], 

-

436 [5, 25, 5], 

-

437 ] 

-

438 ) 

-

439 

-

440 # Get people genders. 

-

441 people_genders: List[str] = [ 

-

442 "Man", 

-

443 "Man", 

-

444 "Man", 

-

445 "Woman", 

-

446 "Woman", 

-

447 "Woman", 

-

448 ] 

-

449 

-

450 ### 

-

451 ### Feature Maximization Metrics. 

-

452 ### 

-

453 

-

454 # Computation. 

-

455 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

456 data_vectors=people_characteristics, 

-

457 data_classes=people_genders, 

-

458 list_of_possible_features=characteristics_studied, 

-

459 ) 

-

460 

-

461 ### 

-

462 ### Get with bad parameters. 

-

463 ### 

-

464 

-

465 # Invalid case with unknown feature. 

-

466 with pytest.raises( 

-

467 ValueError, 

-

468 match=re.escape("The requested feature `'UNKNOWN'` is unknown."), 

-

469 ): 

-

470 fmc_computer.get_most_activated_classes_by_a_feature( 

-

471 feature="UNKNOWN", 

-

472 ) 

-

473 

-

474 

-

475# ============================================================================== 

-

476# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_error_sort_by 

-

477# ============================================================================== 

-

478 

-

479 

-

480def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_error_sort_by(): 

-

481 """ 

-

482 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method with unknown sort option. 

-

483 """ 

-

484 

-

485 ### 

-

486 ### Data. 

-

487 ### 

-

488 

-

489 # Define people characteristics that will be studied. 

-

490 characteristics_studied: List[str] = [ 

-

491 "Shoes size", 

-

492 "Hair size", 

-

493 "Nose size", 

-

494 ] 

-

495 

-

496 # Get people characteristics. 

-

497 people_characteristics: csr_matrix = csr_matrix( 

-

498 [ 

-

499 [9, 5, 5], 

-

500 [9, 10, 5], 

-

501 [9, 20, 6], 

-

502 [5, 15, 5], 

-

503 [6, 25, 6], 

-

504 [5, 25, 5], 

-

505 ] 

-

506 ) 

-

507 

-

508 # Get people genders. 

-

509 people_genders: List[str] = [ 

-

510 "Man", 

-

511 "Man", 

-

512 "Man", 

-

513 "Woman", 

-

514 "Woman", 

-

515 "Woman", 

-

516 ] 

-

517 

-

518 ### 

-

519 ### Feature Maximization Metrics. 

-

520 ### 

-

521 

-

522 # Computation. 

-

523 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

524 data_vectors=people_characteristics, 

-

525 data_classes=people_genders, 

-

526 list_of_possible_features=characteristics_studied, 

-

527 ) 

-

528 

-

529 ### 

-

530 ### Get with bad parameters. 

-

531 ### 

-

532 

-

533 # Invalid case with invalid sort option. 

-

534 with pytest.raises( 

-

535 ValueError, 

-

536 match=re.escape( 

-

537 "The sort option factor `sort_by` has to be in the following values: `{'contrast', 'fmeasure'}` (currently: 'UNKNOWN')." 

-

538 ), 

-

539 ): 

-

540 fmc_computer.get_most_activated_classes_by_a_feature( 

-

541 feature="Shoes size", 

-

542 sort_by="UNKNOWN", 

-

543 ) 

-

544 

-

545 

-

546# ============================================================================== 

-

547# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_default 

-

548# ============================================================================== 

-

549 

-

550 

-

551def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_default(): 

-

552 """ 

-

553 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method. 

-

554 """ 

-

555 

-

556 ### 

-

557 ### Data. 

-

558 ### 

-

559 

-

560 # Define people characteristics that will be studied. 

-

561 characteristics_studied: List[str] = [ 

-

562 "Shoes size", 

-

563 "Hair size", 

-

564 "Nose size", 

-

565 ] 

-

566 

-

567 # Get people characteristics. 

-

568 people_characteristics: csr_matrix = csr_matrix( 

-

569 [ 

-

570 [9, 5, 5], 

-

571 [9, 10, 5], 

-

572 [9, 20, 6], 

-

573 [5, 15, 5], 

-

574 [6, 25, 6], 

-

575 [5, 25, 5], 

-

576 ] 

-

577 ) 

-

578 

-

579 # Get people genders. 

-

580 people_genders: List[str] = [ 

-

581 "Man", 

-

582 "Man", 

-

583 "Man", 

-

584 "Woman", 

-

585 "Woman", 

-

586 "Woman", 

-

587 ] 

-

588 

-

589 ### 

-

590 ### Feature Maximization Metrics. 

-

591 ### 

-

592 

-

593 # Computation. 

-

594 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

595 data_vectors=people_characteristics, 

-

596 data_classes=people_genders, 

-

597 list_of_possible_features=characteristics_studied, 

-

598 ) 

-

599 

-

600 ### 

-

601 ### Get with correct parameters. 

-

602 ### 

-

603 

-

604 # Default case. 

-

605 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

606 feature="Shoes size", 

-

607 ) == ["Man"] 

-

608 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

609 feature="Hair size", 

-

610 ) == ["Woman"] 

-

611 assert ( 

-

612 fmc_computer.get_most_activated_classes_by_a_feature( 

-

613 feature="Nose size", 

-

614 ) 

-

615 == [] # noqa: WPS520 (falsy constant) 

-

616 ) 

-

617 

-

618 

-

619# ============================================================================== 

-

620# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_only_activated_sorted_by_contrast 

-

621# ============================================================================== 

-

622 

-

623 

-

624def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_only_activated_sorted_by_constrast(): 

-

625 """ 

-

626 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method, only activated, sorted by contrast. 

-

627 """ 

-

628 

-

629 ### 

-

630 ### Data. 

-

631 ### 

-

632 

-

633 # Define people characteristics that will be studied. 

-

634 characteristics_studied: List[str] = [ 

-

635 "Shoes size", 

-

636 "Hair size", 

-

637 "Nose size", 

-

638 ] 

-

639 

-

640 # Get people characteristics. 

-

641 people_characteristics: csr_matrix = csr_matrix( 

-

642 [ 

-

643 [9, 5, 5], 

-

644 [9, 10, 5], 

-

645 [9, 20, 6], 

-

646 [5, 15, 5], 

-

647 [6, 25, 6], 

-

648 [5, 25, 5], 

-

649 ] 

-

650 ) 

-

651 

-

652 # Get people genders. 

-

653 people_genders: List[str] = [ 

-

654 "Man", 

-

655 "Man", 

-

656 "Man", 

-

657 "Woman", 

-

658 "Woman", 

-

659 "Woman", 

-

660 ] 

-

661 

-

662 ### 

-

663 ### Feature Maximization Metrics. 

-

664 ### 

-

665 

-

666 # Computation. 

-

667 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

668 data_vectors=people_characteristics, 

-

669 data_classes=people_genders, 

-

670 list_of_possible_features=characteristics_studied, 

-

671 ) 

-

672 

-

673 ### 

-

674 ### Get with correct parameters. 

-

675 ### 

-

676 

-

677 # Case with only activated, sort by contrast. 

-

678 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

679 feature="Shoes size", 

-

680 activation_only=True, 

-

681 sort_by="contrast", 

-

682 max_number=None, 

-

683 ) == ["Man"] 

-

684 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

685 feature="Hair size", 

-

686 activation_only=True, 

-

687 sort_by="contrast", 

-

688 max_number=None, 

-

689 ) == ["Woman"] 

-

690 assert ( 

-

691 fmc_computer.get_most_activated_classes_by_a_feature( 

-

692 feature="Nose size", 

-

693 activation_only=True, 

-

694 sort_by="contrast", 

-

695 max_number=None, 

-

696 ) 

-

697 == [] # noqa: WPS520 (falsy constant) 

-

698 ) 

-

699 

-

700 

-

701# ============================================================================== 

-

702# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_sorted_by_contrast 

-

703# ============================================================================== 

-

704 

-

705 

-

706def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_sorted_by_contrast(): 

-

707 """ 

-

708 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method, sorted_by_contrast. 

-

709 """ 

-

710 

-

711 ### 

-

712 ### Data. 

-

713 ### 

-

714 

-

715 # Define people characteristics that will be studied. 

-

716 characteristics_studied: List[str] = [ 

-

717 "Shoes size", 

-

718 "Hair size", 

-

719 "Nose size", 

-

720 ] 

-

721 

-

722 # Get people characteristics. 

-

723 people_characteristics: csr_matrix = csr_matrix( 

-

724 [ 

-

725 [9, 5, 5], 

-

726 [9, 10, 5], 

-

727 [9, 20, 6], 

-

728 [5, 15, 5], 

-

729 [6, 25, 6], 

-

730 [5, 25, 5], 

-

731 ] 

-

732 ) 

-

733 

-

734 # Get people genders. 

-

735 people_genders: List[str] = [ 

-

736 "Man", 

-

737 "Man", 

-

738 "Man", 

-

739 "Woman", 

-

740 "Woman", 

-

741 "Woman", 

-

742 ] 

-

743 

-

744 ### 

-

745 ### Feature Maximization Metrics. 

-

746 ### 

-

747 

-

748 # Computation. 

-

749 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

750 data_vectors=people_characteristics, 

-

751 data_classes=people_genders, 

-

752 list_of_possible_features=characteristics_studied, 

-

753 ) 

-

754 

-

755 ### 

-

756 ### Get with correct parameters. 

-

757 ### 

-

758 

-

759 # Case with all, sort by contrast. 

-

760 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

761 feature="Shoes size", 

-

762 activation_only=False, 

-

763 sort_by="contrast", 

-

764 max_number=None, 

-

765 ) == ["Man", "Woman"] 

-

766 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

767 feature="Hair size", 

-

768 activation_only=False, 

-

769 sort_by="contrast", 

-

770 max_number=None, 

-

771 ) == ["Woman", "Man"] 

-

772 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

773 feature="Nose size", 

-

774 activation_only=False, 

-

775 sort_by="contrast", 

-

776 max_number=None, 

-

777 ) == ["Woman", "Man"] 

-

778 

-

779 

-

780# ============================================================================== 

-

781# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_sorted_by_fmeasure 

-

782# ============================================================================== 

-

783 

-

784 

-

785def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_sorted_by_fmeasure(): 

-

786 """ 

-

787 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method, sorted by fmeasure. 

-

788 """ 

-

789 

-

790 ### 

-

791 ### Data. 

-

792 ### 

-

793 

-

794 # Define people characteristics that will be studied. 

-

795 characteristics_studied: List[str] = [ 

-

796 "Shoes size", 

-

797 "Hair size", 

-

798 "Nose size", 

-

799 ] 

-

800 

-

801 # Get people characteristics. 

-

802 people_characteristics: csr_matrix = csr_matrix( 

-

803 [ 

-

804 [9, 5, 5], 

-

805 [9, 10, 5], 

-

806 [9, 20, 6], 

-

807 [5, 15, 5], 

-

808 [6, 25, 6], 

-

809 [5, 25, 5], 

-

810 ] 

-

811 ) 

-

812 

-

813 # Get people genders. 

-

814 people_genders: List[str] = [ 

-

815 "Man", 

-

816 "Man", 

-

817 "Man", 

-

818 "Woman", 

-

819 "Woman", 

-

820 "Woman", 

-

821 ] 

-

822 

-

823 ### 

-

824 ### Feature Maximization Metrics. 

-

825 ### 

-

826 

-

827 # Computation. 

-

828 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

829 data_vectors=people_characteristics, 

-

830 data_classes=people_genders, 

-

831 list_of_possible_features=characteristics_studied, 

-

832 ) 

-

833 

-

834 ### 

-

835 ### Get with correct parameters. 

-

836 ### 

-

837 

-

838 # Case with all, sort by fmeasure. 

-

839 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

840 feature="Shoes size", 

-

841 activation_only=False, 

-

842 sort_by="fmeasure", 

-

843 max_number=None, 

-

844 ) == ["Man", "Woman"] 

-

845 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

846 feature="Hair size", 

-

847 activation_only=False, 

-

848 sort_by="fmeasure", 

-

849 max_number=None, 

-

850 ) == ["Woman", "Man"] 

-

851 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

852 feature="Nose size", 

-

853 activation_only=False, 

-

854 sort_by="fmeasure", 

-

855 max_number=None, 

-

856 ) == ["Man", "Woman"] 

-

857 

-

858 

-

859# ============================================================================== 

-

860# test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_sorted_by_fmeasure_limited_to_1 

-

861# ============================================================================== 

-

862 

-

863 

-

864def test_FeaturesMaximizationMetric_get_most_activated_classes_by_a_feature_sorted_by_fmeasure_limited_to_1(): 

-

865 """ 

-

866 In `FeaturesMaximizationMetric`, test `get_most_activated_classes_by_a_feature` method, sorted by fmeasure, limited to 1. 

-

867 """ 

-

868 

-

869 ### 

-

870 ### Data. 

-

871 ### 

-

872 

-

873 # Define people characteristics that will be studied. 

-

874 characteristics_studied: List[str] = [ 

-

875 "Shoes size", 

-

876 "Hair size", 

-

877 "Nose size", 

-

878 ] 

-

879 

-

880 # Get people characteristics. 

-

881 people_characteristics: csr_matrix = csr_matrix( 

-

882 [ 

-

883 [9, 5, 5], 

-

884 [9, 10, 5], 

-

885 [9, 20, 6], 

-

886 [5, 15, 5], 

-

887 [6, 25, 6], 

-

888 [5, 25, 5], 

-

889 ] 

-

890 ) 

-

891 

-

892 # Get people genders. 

-

893 people_genders: List[str] = [ 

-

894 "Man", 

-

895 "Man", 

-

896 "Man", 

-

897 "Woman", 

-

898 "Woman", 

-

899 "Woman", 

-

900 ] 

-

901 

-

902 ### 

-

903 ### Feature Maximization Metrics. 

-

904 ### 

-

905 

-

906 # Computation. 

-

907 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

908 data_vectors=people_characteristics, 

-

909 data_classes=people_genders, 

-

910 list_of_possible_features=characteristics_studied, 

-

911 ) 

-

912 

-

913 ### 

-

914 ### Get with correct parameters. 

-

915 ### 

-

916 

-

917 # Case with all, sort by fmeasure, limit to 1. 

-

918 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

919 feature="Shoes size", 

-

920 activation_only=False, 

-

921 sort_by="fmeasure", 

-

922 max_number=1, 

-

923 ) == ["Man"] 

-

924 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

925 feature="Hair size", 

-

926 activation_only=False, 

-

927 sort_by="fmeasure", 

-

928 max_number=1, 

-

929 ) == ["Woman"] 

-

930 assert fmc_computer.get_most_activated_classes_by_a_feature( 

-

931 feature="Nose size", 

-

932 activation_only=False, 

-

933 sort_by="fmeasure", 

-

934 max_number=1, 

-

935 ) == ["Man"] 

-

936 

-

937 

-

938# ============================================================================== 

-

939# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_error_classe 

-

940# ============================================================================== 

-

941 

-

942 

-

943def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_error_classe(): 

-

944 """ 

-

945 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method with unknown classe. 

-

946 """ 

-

947 

-

948 ### 

-

949 ### Data. 

-

950 ### 

-

951 

-

952 # Define people characteristics that will be studied. 

-

953 characteristics_studied: List[str] = [ 

-

954 "Shoes size", 

-

955 "Hair size", 

-

956 "Nose size", 

-

957 ] 

-

958 

-

959 # Get people characteristics. 

-

960 people_characteristics: csr_matrix = csr_matrix( 

-

961 [ 

-

962 [9, 5, 5], 

-

963 [9, 10, 5], 

-

964 [9, 20, 6], 

-

965 [5, 15, 5], 

-

966 [6, 25, 6], 

-

967 [5, 25, 5], 

-

968 ] 

-

969 ) 

-

970 

-

971 # Get people genders. 

-

972 people_genders: List[str] = [ 

-

973 "Man", 

-

974 "Man", 

-

975 "Man", 

-

976 "Woman", 

-

977 "Woman", 

-

978 "Woman", 

-

979 ] 

-

980 

-

981 ### 

-

982 ### Feature Maximization Metrics. 

-

983 ### 

-

984 

-

985 # Computation. 

-

986 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

987 data_vectors=people_characteristics, 

-

988 data_classes=people_genders, 

-

989 list_of_possible_features=characteristics_studied, 

-

990 ) 

-

991 

-

992 ### 

-

993 ### Get with bad parameters. 

-

994 ### 

-

995 

-

996 # Invalid case with unknown feature. 

-

997 with pytest.raises( 

-

998 ValueError, 

-

999 match=re.escape("The requested classe `'UNKNOWN'` is unknown."), 

-

1000 ): 

-

1001 fmc_computer.get_most_active_features_by_a_classe( 

-

1002 classe="UNKNOWN", 

-

1003 ) 

-

1004 

-

1005 

-

1006# ============================================================================== 

-

1007# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_error_sort_by 

-

1008# ============================================================================== 

-

1009 

-

1010 

-

1011def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_error_sort_by(): 

-

1012 """ 

-

1013 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method with unknown sort option. 

-

1014 """ 

-

1015 

-

1016 ### 

-

1017 ### Data. 

-

1018 ### 

-

1019 

-

1020 # Define people characteristics that will be studied. 

-

1021 characteristics_studied: List[str] = [ 

-

1022 "Shoes size", 

-

1023 "Hair size", 

-

1024 "Nose size", 

-

1025 ] 

-

1026 

-

1027 # Get people characteristics. 

-

1028 people_characteristics: csr_matrix = csr_matrix( 

-

1029 [ 

-

1030 [9, 5, 5], 

-

1031 [9, 10, 5], 

-

1032 [9, 20, 6], 

-

1033 [5, 15, 5], 

-

1034 [6, 25, 6], 

-

1035 [5, 25, 5], 

-

1036 ] 

-

1037 ) 

-

1038 

-

1039 # Get people genders. 

-

1040 people_genders: List[str] = [ 

-

1041 "Man", 

-

1042 "Man", 

-

1043 "Man", 

-

1044 "Woman", 

-

1045 "Woman", 

-

1046 "Woman", 

-

1047 ] 

-

1048 

-

1049 ### 

-

1050 ### Feature Maximization Metrics. 

-

1051 ### 

-

1052 

-

1053 # Computation. 

-

1054 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1055 data_vectors=people_characteristics, 

-

1056 data_classes=people_genders, 

-

1057 list_of_possible_features=characteristics_studied, 

-

1058 ) 

-

1059 

-

1060 ### 

-

1061 ### Get with bad parameters. 

-

1062 ### 

-

1063 

-

1064 # Invalid case with invalid sort option. 

-

1065 with pytest.raises( 

-

1066 ValueError, 

-

1067 match=re.escape( 

-

1068 "The sort option factor `sort_by` has to be in the following values: `{'contrast', 'fmeasure'}` (currently: 'UNKNOWN')." 

-

1069 ), 

-

1070 ): 

-

1071 fmc_computer.get_most_active_features_by_a_classe( 

-

1072 classe="Man", 

-

1073 sort_by="UNKNOWN", 

-

1074 ) 

-

1075 

-

1076 

-

1077# ============================================================================== 

-

1078# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_default 

-

1079# ============================================================================== 

-

1080 

-

1081 

-

1082def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_default(): 

-

1083 """ 

-

1084 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method. 

-

1085 """ 

-

1086 

-

1087 ### 

-

1088 ### Data. 

-

1089 ### 

-

1090 

-

1091 # Define people characteristics that will be studied. 

-

1092 characteristics_studied: List[str] = [ 

-

1093 "Shoes size", 

-

1094 "Hair size", 

-

1095 "Nose size", 

-

1096 ] 

-

1097 

-

1098 # Get people characteristics. 

-

1099 people_characteristics: csr_matrix = csr_matrix( 

-

1100 [ 

-

1101 [9, 5, 5], 

-

1102 [9, 10, 5], 

-

1103 [9, 20, 6], 

-

1104 [5, 15, 5], 

-

1105 [6, 25, 6], 

-

1106 [5, 25, 5], 

-

1107 ] 

-

1108 ) 

-

1109 

-

1110 # Get people genders. 

-

1111 people_genders: List[str] = [ 

-

1112 "Man", 

-

1113 "Man", 

-

1114 "Man", 

-

1115 "Woman", 

-

1116 "Woman", 

-

1117 "Woman", 

-

1118 ] 

-

1119 

-

1120 ### 

-

1121 ### Feature Maximization Metrics. 

-

1122 ### 

-

1123 

-

1124 # Computation. 

-

1125 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1126 data_vectors=people_characteristics, 

-

1127 data_classes=people_genders, 

-

1128 list_of_possible_features=characteristics_studied, 

-

1129 ) 

-

1130 

-

1131 ### 

-

1132 ### Get with correct parameters. 

-

1133 ### 

-

1134 

-

1135 # Default case. 

-

1136 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1137 classe="Man", 

-

1138 ) == ["Shoes size"] 

-

1139 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1140 classe="Woman", 

-

1141 ) == ["Hair size"] 

-

1142 

-

1143 

-

1144# ============================================================================== 

-

1145# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_only_activated_sorted_by_contrast 

-

1146# ============================================================================== 

-

1147 

-

1148 

-

1149def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_only_activated_sorted_by_contrast(): 

-

1150 """ 

-

1151 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method, only activated, sort by contrast. 

-

1152 """ 

-

1153 

-

1154 ### 

-

1155 ### Data. 

-

1156 ### 

-

1157 

-

1158 # Define people characteristics that will be studied. 

-

1159 characteristics_studied: List[str] = [ 

-

1160 "Shoes size", 

-

1161 "Hair size", 

-

1162 "Nose size", 

-

1163 ] 

-

1164 

-

1165 # Get people characteristics. 

-

1166 people_characteristics: csr_matrix = csr_matrix( 

-

1167 [ 

-

1168 [9, 5, 5], 

-

1169 [9, 10, 5], 

-

1170 [9, 20, 6], 

-

1171 [5, 15, 5], 

-

1172 [6, 25, 6], 

-

1173 [5, 25, 5], 

-

1174 ] 

-

1175 ) 

-

1176 

-

1177 # Get people genders. 

-

1178 people_genders: List[str] = [ 

-

1179 "Man", 

-

1180 "Man", 

-

1181 "Man", 

-

1182 "Woman", 

-

1183 "Woman", 

-

1184 "Woman", 

-

1185 ] 

-

1186 

-

1187 ### 

-

1188 ### Feature Maximization Metrics. 

-

1189 ### 

-

1190 

-

1191 # Computation. 

-

1192 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1193 data_vectors=people_characteristics, 

-

1194 data_classes=people_genders, 

-

1195 list_of_possible_features=characteristics_studied, 

-

1196 ) 

-

1197 

-

1198 ### 

-

1199 ### Get with correct parameters. 

-

1200 ### 

-

1201 

-

1202 # Case with only activated, sort by contrast. 

-

1203 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1204 classe="Man", 

-

1205 activation_only=True, 

-

1206 sort_by="contrast", 

-

1207 max_number=None, 

-

1208 ) == ["Shoes size"] 

-

1209 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1210 classe="Woman", 

-

1211 activation_only=True, 

-

1212 sort_by="contrast", 

-

1213 max_number=None, 

-

1214 ) == ["Hair size"] 

-

1215 

-

1216 

-

1217# ============================================================================== 

-

1218# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_sorted_by_contrast 

-

1219# ============================================================================== 

-

1220 

-

1221 

-

1222def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_sorted_by_contrast(): 

-

1223 """ 

-

1224 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method, sorted by contrast. 

-

1225 """ 

-

1226 

-

1227 ### 

-

1228 ### Data. 

-

1229 ### 

-

1230 

-

1231 # Define people characteristics that will be studied. 

-

1232 characteristics_studied: List[str] = [ 

-

1233 "Shoes size", 

-

1234 "Hair size", 

-

1235 "Nose size", 

-

1236 ] 

-

1237 

-

1238 # Get people characteristics. 

-

1239 people_characteristics: csr_matrix = csr_matrix( 

-

1240 [ 

-

1241 [9, 5, 5], 

-

1242 [9, 10, 5], 

-

1243 [9, 20, 6], 

-

1244 [5, 15, 5], 

-

1245 [6, 25, 6], 

-

1246 [5, 25, 5], 

-

1247 ] 

-

1248 ) 

-

1249 

-

1250 # Get people genders. 

-

1251 people_genders: List[str] = [ 

-

1252 "Man", 

-

1253 "Man", 

-

1254 "Man", 

-

1255 "Woman", 

-

1256 "Woman", 

-

1257 "Woman", 

-

1258 ] 

-

1259 

-

1260 ### 

-

1261 ### Feature Maximization Metrics. 

-

1262 ### 

-

1263 

-

1264 # Computation. 

-

1265 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1266 data_vectors=people_characteristics, 

-

1267 data_classes=people_genders, 

-

1268 list_of_possible_features=characteristics_studied, 

-

1269 ) 

-

1270 

-

1271 ### 

-

1272 ### Get with correct parameters. 

-

1273 ### 

-

1274 

-

1275 # Case with all, sort by contrast. 

-

1276 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1277 classe="Man", 

-

1278 activation_only=False, 

-

1279 sort_by="contrast", 

-

1280 max_number=None, 

-

1281 ) == ["Shoes size", "Hair size", "Nose size"] 

-

1282 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1283 classe="Woman", 

-

1284 activation_only=False, 

-

1285 sort_by="contrast", 

-

1286 max_number=None, 

-

1287 ) == ["Hair size", "Shoes size", "Nose size"] 

-

1288 

-

1289 

-

1290# ============================================================================== 

-

1291# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_sorted_by_fmeasure 

-

1292# ============================================================================== 

-

1293 

-

1294 

-

1295def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_sorted_by_fmeasure(): 

-

1296 """ 

-

1297 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method, sorted by fmeasure. 

-

1298 """ 

-

1299 

-

1300 ### 

-

1301 ### Data. 

-

1302 ### 

-

1303 

-

1304 # Define people characteristics that will be studied. 

-

1305 characteristics_studied: List[str] = [ 

-

1306 "Shoes size", 

-

1307 "Hair size", 

-

1308 "Nose size", 

-

1309 ] 

-

1310 

-

1311 # Get people characteristics. 

-

1312 people_characteristics: csr_matrix = csr_matrix( 

-

1313 [ 

-

1314 [9, 5, 5], 

-

1315 [9, 10, 5], 

-

1316 [9, 20, 6], 

-

1317 [5, 15, 5], 

-

1318 [6, 25, 6], 

-

1319 [5, 25, 5], 

-

1320 ] 

-

1321 ) 

-

1322 

-

1323 # Get people genders. 

-

1324 people_genders: List[str] = [ 

-

1325 "Man", 

-

1326 "Man", 

-

1327 "Man", 

-

1328 "Woman", 

-

1329 "Woman", 

-

1330 "Woman", 

-

1331 ] 

-

1332 

-

1333 ### 

-

1334 ### Feature Maximization Metrics. 

-

1335 ### 

-

1336 

-

1337 # Computation. 

-

1338 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1339 data_vectors=people_characteristics, 

-

1340 data_classes=people_genders, 

-

1341 list_of_possible_features=characteristics_studied, 

-

1342 ) 

-

1343 

-

1344 ### 

-

1345 ### Get with correct parameters. 

-

1346 ### 

-

1347 

-

1348 # Case with all, sort by fmeasure. 

-

1349 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1350 classe="Man", 

-

1351 activation_only=False, 

-

1352 sort_by="fmeasure", 

-

1353 max_number=None, 

-

1354 ) == ["Shoes size", "Hair size", "Nose size"] 

-

1355 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1356 classe="Woman", 

-

1357 activation_only=False, 

-

1358 sort_by="fmeasure", 

-

1359 max_number=None, 

-

1360 ) == ["Hair size", "Nose size", "Shoes size"] 

-

1361 

-

1362 

-

1363# ============================================================================== 

-

1364# test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_sorted_by_fmeasure_limited_to_1 

-

1365# ============================================================================== 

-

1366 

-

1367 

-

1368def test_FeaturesMaximizationMetric_get_most_active_features_by_a_classe_sorted_by_fmeasure_limited_to_1(): 

-

1369 """ 

-

1370 In `FeaturesMaximizationMetric`, test `get_most_active_features_by_a_classe` method, sorted by fmeasure, limited to 1. 

-

1371 """ 

-

1372 

-

1373 ### 

-

1374 ### Data. 

-

1375 ### 

-

1376 

-

1377 # Define people characteristics that will be studied. 

-

1378 characteristics_studied: List[str] = [ 

-

1379 "Shoes size", 

-

1380 "Hair size", 

-

1381 "Nose size", 

-

1382 ] 

-

1383 

-

1384 # Get people characteristics. 

-

1385 people_characteristics: csr_matrix = csr_matrix( 

-

1386 [ 

-

1387 [9, 5, 5], 

-

1388 [9, 10, 5], 

-

1389 [9, 20, 6], 

-

1390 [5, 15, 5], 

-

1391 [6, 25, 6], 

-

1392 [5, 25, 5], 

-

1393 ] 

-

1394 ) 

-

1395 

-

1396 # Get people genders. 

-

1397 people_genders: List[str] = [ 

-

1398 "Man", 

-

1399 "Man", 

-

1400 "Man", 

-

1401 "Woman", 

-

1402 "Woman", 

-

1403 "Woman", 

-

1404 ] 

-

1405 

-

1406 ### 

-

1407 ### Feature Maximization Metrics. 

-

1408 ### 

-

1409 

-

1410 # Computation. 

-

1411 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1412 data_vectors=people_characteristics, 

-

1413 data_classes=people_genders, 

-

1414 list_of_possible_features=characteristics_studied, 

-

1415 ) 

-

1416 

-

1417 ### 

-

1418 ### Get with correct parameters. 

-

1419 ### 

-

1420 

-

1421 # Case with all, sort by fmeasure, limit to 1. 

-

1422 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1423 classe="Man", 

-

1424 activation_only=False, 

-

1425 sort_by="fmeasure", 

-

1426 max_number=1, 

-

1427 ) == ["Shoes size"] 

-

1428 assert fmc_computer.get_most_active_features_by_a_classe( 

-

1429 classe="Woman", 

-

1430 activation_only=False, 

-

1431 sort_by="fmeasure", 

-

1432 max_number=1, 

-

1433 ) == ["Hair size"] 

-

1434 

-

1435 

-

1436# ============================================================================== 

-

1437# test_FeaturesMaximizationMetric_compare_itself 

-

1438# ============================================================================== 

-

1439 

-

1440 

-

1441def test_FeaturesMaximizationMetric_compare_itself(): 

-

1442 """ 

-

1443 In `FeaturesMaximizationMetric`, test `compare` method on itself. 

-

1444 """ 

-

1445 

-

1446 ### 

-

1447 ### Data. 

-

1448 ### 

-

1449 

-

1450 # Define people characteristics that will be studied. 

-

1451 characteristics_studied: List[str] = [ 

-

1452 "Shoes size", 

-

1453 "Hair size", 

-

1454 "Nose size", 

-

1455 ] 

-

1456 

-

1457 # Get people characteristics. 

-

1458 people_characteristics: csr_matrix = csr_matrix( 

-

1459 [ 

-

1460 [9, 5, 5], 

-

1461 [9, 10, 5], 

-

1462 [9, 20, 6], 

-

1463 [5, 15, 5], 

-

1464 [6, 25, 6], 

-

1465 [5, 25, 5], 

-

1466 ] 

-

1467 ) 

-

1468 

-

1469 # Get people genders. 

-

1470 people_genders: List[str] = [ 

-

1471 "Man", 

-

1472 "Man", 

-

1473 "Man", 

-

1474 "Woman", 

-

1475 "Woman", 

-

1476 "Woman", 

-

1477 ] 

-

1478 

-

1479 ### 

-

1480 ### Feature Maximization Metrics. 

-

1481 ### 

-

1482 

-

1483 # Computation. 

-

1484 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1485 data_vectors=people_characteristics, 

-

1486 data_classes=people_genders, 

-

1487 list_of_possible_features=characteristics_studied, 

-

1488 ) 

-

1489 

-

1490 ### 

-

1491 ### Comparison. 

-

1492 ### 

-

1493 

-

1494 assert fmc_computer.compare( 

-

1495 fmc_reference=fmc_computer, 

-

1496 ) == (1.0, 1.0, 1.0) 

-

1497 

-

1498 

-

1499# ============================================================================== 

-

1500# test_FeaturesMaximizationMetric_compare_error_list_of_possible_features 

-

1501# ============================================================================== 

-

1502 

-

1503 

-

1504def test_FeaturesMaximizationMetric_compare_error_list_of_possible_features(): 

-

1505 """ 

-

1506 In `FeaturesMaximizationMetric`, test `compare` method with error in list_of_possible_features. 

-

1507 """ 

-

1508 

-

1509 ### 

-

1510 ### Data. 

-

1511 ### 

-

1512 

-

1513 # Define people characteristics that will be studied. 

-

1514 characteristics_studied_1: List[str] = [ 

-

1515 "Shoes size", 

-

1516 "Hair size", 

-

1517 "Nose size", 

-

1518 ] 

-

1519 characteristics_studied_2: List[str] = [ 

-

1520 "Shoes size", 

-

1521 "Hair size", 

-

1522 "Tongue size", 

-

1523 "Ear size", 

-

1524 ] 

-

1525 

-

1526 # Get people characteristics. 

-

1527 people_characteristics_1: csr_matrix = csr_matrix( 

-

1528 [ 

-

1529 [9, 5, 5], 

-

1530 [9, 10, 5], 

-

1531 [9, 20, 6], 

-

1532 [5, 15, 5], 

-

1533 [6, 25, 6], 

-

1534 [5, 25, 5], 

-

1535 ] 

-

1536 ) 

-

1537 people_characteristics_2: csr_matrix = csr_matrix( 

-

1538 [ 

-

1539 [9, 5, 4, 7], 

-

1540 [9, 10, 3, 6], 

-

1541 [9, 20, 5, 7], 

-

1542 [5, 15, 5, 7], 

-

1543 [6, 25, 4, 7], 

-

1544 [5, 25, 3, 6], 

-

1545 ] 

-

1546 ) 

-

1547 

-

1548 # Get people genders. 

-

1549 people_genders: List[str] = [ 

-

1550 "Man", 

-

1551 "Man", 

-

1552 "Man", 

-

1553 "Woman", 

-

1554 "Woman", 

-

1555 "Woman", 

-

1556 ] 

-

1557 

-

1558 ### 

-

1559 ### Feature Maximization Metrics. 

-

1560 ### 

-

1561 

-

1562 # Computation. 

-

1563 fmc_computer_1: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1564 data_vectors=people_characteristics_1, 

-

1565 data_classes=people_genders, 

-

1566 list_of_possible_features=characteristics_studied_1, 

-

1567 ) 

-

1568 

-

1569 # Computation. 

-

1570 fmc_computer_2: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1571 data_vectors=people_characteristics_2, 

-

1572 data_classes=people_genders, 

-

1573 list_of_possible_features=characteristics_studied_2, 

-

1574 ) 

-

1575 

-

1576 ### 

-

1577 ### Comparison. 

-

1578 ### 

-

1579 

-

1580 # Invalid case with inconsistencies in list_of_possible_features. 

-

1581 with pytest.raises( 

-

1582 ValueError, 

-

1583 match=re.escape( 

-

1584 "The list of features `list_of_possible_features` must be the same for both FMC modelization. +: ['Nose size'], -: ['Tongue size', 'Ear size']" 

-

1585 ), 

-

1586 ): 

-

1587 fmc_computer_1.compare( 

-

1588 fmc_reference=fmc_computer_2, 

-

1589 ) 

-

1590 

-

1591 

-

1592# ============================================================================== 

-

1593# test_FeaturesMaximizationMetric_compare_default 

-

1594# ============================================================================== 

-

1595 

-

1596 

-

1597def test_FeaturesMaximizationMetric_compare_default(): 

-

1598 """ 

-

1599 In `FeaturesMaximizationMetric`, test `compare` method. 

-

1600 """ 

-

1601 

-

1602 ### 

-

1603 ### Data. 

-

1604 ### 

-

1605 

-

1606 # Define people characteristics that will be studied. 

-

1607 characteristics_studied: List[str] = [ 

-

1608 "Shoes size", 

-

1609 "Hair size", 

-

1610 "Nose size", 

-

1611 ] 

-

1612 

-

1613 # Get people characteristics. 

-

1614 people_characteristics: csr_matrix = csr_matrix( 

-

1615 [ 

-

1616 [9, 5, 5], 

-

1617 [9, 10, 5], 

-

1618 [9, 10, 6], 

-

1619 [9, 15, 6], 

-

1620 [9, 20, 6], 

-

1621 [5, 15, 5], 

-

1622 [6, 20, 6], 

-

1623 [5, 25, 5], 

-

1624 [6, 25, 5], 

-

1625 [5, 25, 5], 

-

1626 [3, 5, 15], 

-

1627 [4, 10, 15], 

-

1628 [2, 5, 15], 

-

1629 [3, 10, 14], 

-

1630 [3, 15, 14], 

-

1631 ] 

-

1632 ) 

-

1633 

-

1634 # Get people genders. 

-

1635 people_genders_1: List[str] = [ 

-

1636 "Man", 

-

1637 "Man", 

-

1638 "Man", 

-

1639 "Man", 

-

1640 "Man", 

-

1641 "Woman", 

-

1642 "Woman", 

-

1643 "Woman", 

-

1644 "Woman", 

-

1645 "Woman", 

-

1646 "Child", 

-

1647 "Child", 

-

1648 "Child", 

-

1649 "Child", 

-

1650 "Child", 

-

1651 ] 

-

1652 people_genders_2: List[str] = [ 

-

1653 "0", 

-

1654 "0", 

-

1655 "0", 

-

1656 "0", 

-

1657 "0", 

-

1658 "0", 

-

1659 "0", 

-

1660 "0", 

-

1661 "0", 

-

1662 "0", 

-

1663 "1", 

-

1664 "1", 

-

1665 "1", 

-

1666 "1", 

-

1667 "1", 

-

1668 ] 

-

1669 

-

1670 ### 

-

1671 ### Feature Maximization Metrics. 

-

1672 ### 

-

1673 

-

1674 # Computation. 

-

1675 fmc_reference: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1676 data_vectors=people_characteristics, 

-

1677 data_classes=people_genders_1, 

-

1678 list_of_possible_features=characteristics_studied, 

-

1679 ) 

-

1680 

-

1681 # Computation. 

-

1682 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1683 data_vectors=people_characteristics, 

-

1684 data_classes=people_genders_2, 

-

1685 list_of_possible_features=characteristics_studied, 

-

1686 ) 

-

1687 

-

1688 ### 

-

1689 ### Comparison. 

-

1690 ### 

-

1691 

-

1692 # Default case. 

-

1693 assert fmc_computer.compare( 

-

1694 fmc_reference=fmc_reference, 

-

1695 ) == (0.5793801642856952, 1.0, 0.7336804366512111) 

-

1696 

-

1697 

-

1698# ============================================================================== 

-

1699# test_FeaturesMaximizationMetric_compare_rounded 

-

1700# ============================================================================== 

-

1701 

-

1702 

-

1703def test_FeaturesMaximizationMetric_compare_rounded(): 

-

1704 """ 

-

1705 In `FeaturesMaximizationMetric`, test `compare` method with rounded option. 

-

1706 """ 

-

1707 

-

1708 ### 

-

1709 ### Data. 

-

1710 ### 

-

1711 

-

1712 # Define people characteristics that will be studied. 

-

1713 characteristics_studied: List[str] = [ 

-

1714 "Shoes size", 

-

1715 "Hair size", 

-

1716 "Nose size", 

-

1717 ] 

-

1718 

-

1719 # Get people characteristics. 

-

1720 people_characteristics: csr_matrix = csr_matrix( 

-

1721 [ 

-

1722 [9, 5, 5], 

-

1723 [9, 10, 5], 

-

1724 [9, 10, 6], 

-

1725 [9, 15, 6], 

-

1726 [9, 20, 6], 

-

1727 [5, 15, 5], 

-

1728 [6, 20, 6], 

-

1729 [5, 25, 5], 

-

1730 [6, 25, 5], 

-

1731 [5, 25, 5], 

-

1732 [3, 5, 15], 

-

1733 [4, 10, 15], 

-

1734 [2, 5, 15], 

-

1735 [3, 10, 14], 

-

1736 [3, 15, 14], 

-

1737 ] 

-

1738 ) 

-

1739 

-

1740 # Get people genders. 

-

1741 people_genders_1: List[str] = [ 

-

1742 "Man", 

-

1743 "Man", 

-

1744 "Man", 

-

1745 "Man", 

-

1746 "Man", 

-

1747 "Woman", 

-

1748 "Woman", 

-

1749 "Woman", 

-

1750 "Woman", 

-

1751 "Woman", 

-

1752 "Child", 

-

1753 "Child", 

-

1754 "Child", 

-

1755 "Child", 

-

1756 "Child", 

-

1757 ] 

-

1758 people_genders_2: List[str] = [ 

-

1759 "0", 

-

1760 "0", 

-

1761 "0", 

-

1762 "0", 

-

1763 "0", 

-

1764 "0", 

-

1765 "0", 

-

1766 "0", 

-

1767 "0", 

-

1768 "0", 

-

1769 "1", 

-

1770 "1", 

-

1771 "1", 

-

1772 "1", 

-

1773 "1", 

-

1774 ] 

-

1775 

-

1776 ### 

-

1777 ### Feature Maximization Metrics. 

-

1778 ### 

-

1779 

-

1780 # Computation. 

-

1781 fmc_reference: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1782 data_vectors=people_characteristics, 

-

1783 data_classes=people_genders_1, 

-

1784 list_of_possible_features=characteristics_studied, 

-

1785 ) 

-

1786 

-

1787 # Computation. 

-

1788 fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric( 

-

1789 data_vectors=people_characteristics, 

-

1790 data_classes=people_genders_2, 

-

1791 list_of_possible_features=characteristics_studied, 

-

1792 ) 

-

1793 

-

1794 ### 

-

1795 ### Comparison. 

-

1796 ### 

-

1797 

-

1798 # Rounded case. 

-

1799 assert fmc_computer.compare( 

-

1800 fmc_reference=fmc_reference, 

-

1801 rounded=4, 

-

1802 ) == (0.5794, 1.0, 0.7337) 

-
- - - diff --git a/coverage/favicon_32.png b/coverage/favicon_32.png deleted file mode 100755 index 8649f04..0000000 Binary files a/coverage/favicon_32.png and /dev/null differ diff --git a/coverage/keybd_closed.png b/coverage/keybd_closed.png deleted file mode 100755 index ba119c4..0000000 Binary files a/coverage/keybd_closed.png and /dev/null differ diff --git a/coverage/keybd_open.png b/coverage/keybd_open.png deleted file mode 100755 index a8bac6c..0000000 Binary files a/coverage/keybd_open.png and /dev/null differ diff --git a/coverage/status.json b/coverage/status.json deleted file mode 100755 index d9154e5..0000000 --- a/coverage/status.json +++ /dev/null @@ -1 +0,0 @@ -{"format":2,"version":"6.5.0","globals":"12aeed8cf25d60a2a7cbc6193ebda365","files":{"d_16e4f0e538c2fa43_fmc_py":{"hash":"73d92bcc5f939b04160b3cb3cd74467a","index":{"nums":[2,1,120,0,0,80,0,0],"html_filename":"d_16e4f0e538c2fa43_fmc_py.html","relative_filename":"src\\cognitivefactory\\features_maximization_metric\\fmc.py"}},"d_a44f0ac069e85531_test_fmc_py":{"hash":"a5411567fe720312a98b206a10785db5","index":{"nums":[2,1,176,0,0,0,0,0],"html_filename":"d_a44f0ac069e85531_test_fmc_py.html","relative_filename":"tests\\test_fmc.py"}}}} \ No newline at end of file diff --git a/coverage/style.css b/coverage/style.css deleted file mode 100755 index 4292ce9..0000000 --- a/coverage/style.css +++ /dev/null @@ -1,311 +0,0 @@ -@charset "UTF-8"; -/* Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 */ -/* For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt */ -/* Don't edit this .css file. Edit the .scss file instead! */ -html, body, h1, h2, h3, p, table, td, th { margin: 0; padding: 0; border: 0; font-weight: inherit; font-style: inherit; font-size: 100%; font-family: inherit; vertical-align: baseline; } - -body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-size: 1em; background: #fff; color: #000; } - -@media (prefers-color-scheme: dark) { body { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { body { color: #eee; } } - -html > body { font-size: 16px; } - -a:active, a:focus { outline: 2px dashed #007acc; } - -p { font-size: .875em; line-height: 1.4em; } - -table { border-collapse: collapse; } - -td { vertical-align: top; } - -table tr.hidden { display: none !important; } - -p#no_rows { display: none; font-size: 1.2em; } - -a.nav { text-decoration: none; color: inherit; } - -a.nav:hover { text-decoration: underline; color: inherit; } - -.hidden { display: none; } - -header { background: #f8f8f8; width: 100%; z-index: 2; border-bottom: 1px solid #ccc; } - -@media (prefers-color-scheme: dark) { header { background: black; } } - -@media (prefers-color-scheme: dark) { header { border-color: #333; } } - -header .content { padding: 1rem 3.5rem; } - -header h2 { margin-top: .5em; font-size: 1em; } - -header p.text { margin: .5em 0 -.5em; color: #666; font-style: italic; } - -@media (prefers-color-scheme: dark) { header p.text { color: #aaa; } } - -header.sticky { position: fixed; left: 0; right: 0; height: 2.5em; } - -header.sticky .text { display: none; } - -header.sticky h1, header.sticky h2 { font-size: 1em; margin-top: 0; display: inline-block; } - -header.sticky .content { padding: 0.5rem 3.5rem; } - -header.sticky .content p { font-size: 1em; } - -header.sticky ~ #source { padding-top: 6.5em; } - -main { position: relative; z-index: 1; } - -footer { margin: 1rem 3.5rem; } - -footer .content { padding: 0; color: #666; font-style: italic; } - -@media (prefers-color-scheme: dark) { footer .content { color: #aaa; } } - -#index { margin: 1rem 0 0 3.5rem; } - -h1 { font-size: 1.25em; display: inline-block; } - -#filter_container { float: right; margin: 0 2em 0 0; } - -#filter_container input { width: 10em; padding: 0.2em 0.5em; border: 2px solid #ccc; background: #fff; color: #000; } - -@media (prefers-color-scheme: dark) { #filter_container input { border-color: #444; } } - -@media (prefers-color-scheme: dark) { #filter_container input { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { #filter_container input { color: #eee; } } - -#filter_container input:focus { border-color: #007acc; } - -header button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; color: inherit; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } - -@media (prefers-color-scheme: dark) { header button { border-color: #444; } } - -header button:active, header button:focus { outline: 2px dashed #007acc; } - -header button.run { background: #eeffee; } - -@media (prefers-color-scheme: dark) { header button.run { background: #373d29; } } - -header button.run.show_run { background: #dfd; border: 2px solid #00dd00; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.run.show_run { background: #373d29; } } - -header button.mis { background: #ffeeee; } - -@media (prefers-color-scheme: dark) { header button.mis { background: #4b1818; } } - -header button.mis.show_mis { background: #fdd; border: 2px solid #ff0000; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.mis.show_mis { background: #4b1818; } } - -header button.exc { background: #f7f7f7; } - -@media (prefers-color-scheme: dark) { header button.exc { background: #333; } } - -header button.exc.show_exc { background: #eee; border: 2px solid #808080; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.exc.show_exc { background: #333; } } - -header button.par { background: #ffffd5; } - -@media (prefers-color-scheme: dark) { header button.par { background: #650; } } - -header button.par.show_par { background: #ffa; border: 2px solid #bbbb00; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.par.show_par { background: #650; } } - -#help_panel, #source p .annotate.long { display: none; position: absolute; z-index: 999; background: #ffffcc; border: 1px solid #888; border-radius: .2em; color: #333; padding: .25em .5em; } - -#source p .annotate.long { white-space: normal; float: right; top: 1.75em; right: 1em; height: auto; } - -#help_panel_wrapper { float: right; position: relative; } - -#keyboard_icon { margin: 5px; } - -#help_panel_state { display: none; } - -#help_panel { top: 25px; right: 0; padding: .75em; border: 1px solid #883; color: #333; } - -#help_panel .keyhelp p { margin-top: .75em; } - -#help_panel .legend { font-style: italic; margin-bottom: 1em; } - -.indexfile #help_panel { width: 25em; } - -.pyfile #help_panel { width: 18em; } - -#help_panel_state:checked ~ #help_panel { display: block; } - -kbd { border: 1px solid black; border-color: #888 #333 #333 #888; padding: .1em .35em; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-weight: bold; background: #eee; border-radius: 3px; } - -#source { padding: 1em 0 1em 3.5rem; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; } - -#source p { position: relative; white-space: pre; } - -#source p * { box-sizing: border-box; } - -#source p .n { float: left; text-align: right; width: 3.5rem; box-sizing: border-box; margin-left: -3.5rem; padding-right: 1em; color: #999; } - -@media (prefers-color-scheme: dark) { #source p .n { color: #777; } } - -#source p .n.highlight { background: #ffdd00; } - -#source p .n a { margin-top: -4em; padding-top: 4em; text-decoration: none; color: #999; } - -@media (prefers-color-scheme: dark) { #source p .n a { color: #777; } } - -#source p .n a:hover { text-decoration: underline; color: #999; } - -@media (prefers-color-scheme: dark) { #source p .n a:hover { color: #777; } } - -#source p .t { display: inline-block; width: 100%; box-sizing: border-box; margin-left: -.5em; padding-left: 0.3em; border-left: 0.2em solid #fff; } - -@media (prefers-color-scheme: dark) { #source p .t { border-color: #1e1e1e; } } - -#source p .t:hover { background: #f2f2f2; } - -@media (prefers-color-scheme: dark) { #source p .t:hover { background: #282828; } } - -#source p .t:hover ~ .r .annotate.long { display: block; } - -#source p .t .com { color: #008000; font-style: italic; line-height: 1px; } - -@media (prefers-color-scheme: dark) { #source p .t .com { color: #6a9955; } } - -#source p .t .key { font-weight: bold; line-height: 1px; } - -#source p .t .str { color: #0451a5; } - -@media (prefers-color-scheme: dark) { #source p .t .str { color: #9cdcfe; } } - -#source p.mis .t { border-left: 0.2em solid #ff0000; } - -#source p.mis.show_mis .t { background: #fdd; } - -@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t { background: #4b1818; } } - -#source p.mis.show_mis .t:hover { background: #f2d2d2; } - -@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t:hover { background: #532323; } } - -#source p.run .t { border-left: 0.2em solid #00dd00; } - -#source p.run.show_run .t { background: #dfd; } - -@media (prefers-color-scheme: dark) { #source p.run.show_run .t { background: #373d29; } } - -#source p.run.show_run .t:hover { background: #d2f2d2; } - -@media (prefers-color-scheme: dark) { #source p.run.show_run .t:hover { background: #404633; } } - -#source p.exc .t { border-left: 0.2em solid #808080; } - -#source p.exc.show_exc .t { background: #eee; } - -@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t { background: #333; } } - -#source p.exc.show_exc .t:hover { background: #e2e2e2; } - -@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t:hover { background: #3c3c3c; } } - -#source p.par .t { border-left: 0.2em solid #bbbb00; } - -#source p.par.show_par .t { background: #ffa; } - -@media (prefers-color-scheme: dark) { #source p.par.show_par .t { background: #650; } } - -#source p.par.show_par .t:hover { background: #f2f2a2; } - -@media (prefers-color-scheme: dark) { #source p.par.show_par .t:hover { background: #6d5d0c; } } - -#source p .r { position: absolute; top: 0; right: 2.5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } - -#source p .annotate { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; color: #666; padding-right: .5em; } - -@media (prefers-color-scheme: dark) { #source p .annotate { color: #ddd; } } - -#source p .annotate.short:hover ~ .long { display: block; } - -#source p .annotate.long { width: 30em; right: 2.5em; } - -#source p input { display: none; } - -#source p input ~ .r label.ctx { cursor: pointer; border-radius: .25em; } - -#source p input ~ .r label.ctx::before { content: "▶ "; } - -#source p input ~ .r label.ctx:hover { background: #e8f4ff; color: #666; } - -@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { background: #0f3a42; } } - -@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { color: #aaa; } } - -#source p input:checked ~ .r label.ctx { background: #d0e8ff; color: #666; border-radius: .75em .75em 0 0; padding: 0 .5em; margin: -.25em 0; } - -@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { background: #056; } } - -@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { color: #aaa; } } - -#source p input:checked ~ .r label.ctx::before { content: "▼ "; } - -#source p input:checked ~ .ctxs { padding: .25em .5em; overflow-y: scroll; max-height: 10.5em; } - -#source p label.ctx { color: #999; display: inline-block; padding: 0 .5em; font-size: .8333em; } - -@media (prefers-color-scheme: dark) { #source p label.ctx { color: #777; } } - -#source p .ctxs { display: block; max-height: 0; overflow-y: hidden; transition: all .2s; padding: 0 .5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; white-space: nowrap; background: #d0e8ff; border-radius: .25em; margin-right: 1.75em; } - -@media (prefers-color-scheme: dark) { #source p .ctxs { background: #056; } } - -#source p .ctxs span { display: block; text-align: right; } - -#index { font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.875em; } - -#index table.index { margin-left: -.5em; } - -#index td, #index th { text-align: right; width: 5em; padding: .25em .5em; border-bottom: 1px solid #eee; } - -@media (prefers-color-scheme: dark) { #index td, #index th { border-color: #333; } } - -#index td.name, #index th.name { text-align: left; width: auto; } - -#index th { font-style: italic; color: #333; cursor: pointer; } - -@media (prefers-color-scheme: dark) { #index th { color: #ddd; } } - -#index th:hover { background: #eee; } - -@media (prefers-color-scheme: dark) { #index th:hover { background: #333; } } - -#index th[aria-sort="ascending"], #index th[aria-sort="descending"] { white-space: nowrap; background: #eee; padding-left: .5em; } - -@media (prefers-color-scheme: dark) { #index th[aria-sort="ascending"], #index th[aria-sort="descending"] { background: #333; } } - -#index th[aria-sort="ascending"]::after { font-family: sans-serif; content: " ↑"; } - -#index th[aria-sort="descending"]::after { font-family: sans-serif; content: " ↓"; } - -#index td.name a { text-decoration: none; color: inherit; } - -#index tr.total td, #index tr.total_dynamic td { font-weight: bold; border-top: 1px solid #ccc; border-bottom: none; } - -#index tr.file:hover { background: #eee; } - -@media (prefers-color-scheme: dark) { #index tr.file:hover { background: #333; } } - -#index tr.file:hover td.name { text-decoration: underline; color: inherit; } - -#scroll_marker { position: fixed; z-index: 3; right: 0; top: 0; width: 16px; height: 100%; background: #fff; border-left: 1px solid #eee; will-change: transform; } - -@media (prefers-color-scheme: dark) { #scroll_marker { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { #scroll_marker { border-color: #333; } } - -#scroll_marker .marker { background: #ccc; position: absolute; min-height: 3px; width: 100%; } - -@media (prefers-color-scheme: dark) { #scroll_marker .marker { background: #444; } } diff --git a/credits/index.html b/credits/index.html index c02428a..7d49224 100755 --- a/credits/index.html +++ b/credits/index.html @@ -1303,7 +1303,7 @@

Development dependenciesmkdocstrings Automatic documentation from sources, for MkDocs. [python]>=0.18 -0.23.0 +0.24.0 ISC @@ -1569,7 +1569,7 @@

Development dependenciestypes-markdown Typing stubs for Markdown >=3.3 -3.5.0.1 +3.5.0.3 Apache-2.0 license diff --git a/reference/cognitivefactory/features_maximization_metric/index.html b/reference/cognitivefactory/features_maximization_metric/index.html index bc4b949..1906f37 100755 --- a/reference/cognitivefactory/features_maximization_metric/index.html +++ b/reference/cognitivefactory/features_maximization_metric/index.html @@ -811,8 +811,17 @@

features_maximization_metric

-

cognitivefactory-features-maximization-metric package.

-

Implementation of Features Maximization Metric, an unbiased metric aimed at estimate the quality of an unsupervised classification.

+ +

One module is available:

+ diff --git a/search/search_index.json b/search/search_index.json index 6d7065a..9984cee 100755 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Features Maximization Metric","text":"

Implementation of Features Maximization Metric, an unbiased metric aimed at estimate the quality of an unsupervised classification.

"},{"location":"#quick-description","title":"Quick description","text":"

Features Maximization (FMC) is a features selection method described in Lamirel, J.-C., Cuxac, P., & Hajlaoui, K. (2016). A Novel Approach to Feature Selection Based on Quality Estimation Metrics. In Advances in Knowledge Discovery and Management (pp. 121\u2013140). Springer International Publishing. https://doi.org/10.1007/978-3-319-45763-5_7.

This metric is computed by applying the following steps:

  1. Compute the Features F-Measure metric (based on Features Recall and Features Predominance metrics).

    (a) The Features Recall FR[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of feature f for all data. It answers the question: \"Can the feature f distinguish the class c from other classes c' ?\"

    (b) The Features Predominance FP[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of all feature f' for data in class c. It answers the question: \"Can the feature f better identify the class c than the other features f' ?\"

    (c) The Features F-Measure FM[f][c] for a given class c and a given feature f is the harmonic mean of the Features Recall (a) and the Features Predominance (c). It answers the question: \"How much information does the feature f contain about the class c ?\"

  2. Compute the Features Selection (based on F-Measure Overall Average comparison).

    (d) The F-Measure Overall Average is the average of Features F-Measure (c) for all classes c and for all features f. It answers the question: \"What are the mean of information contained by features in all classes ?\"

    (e) A feature f is Selected if and only if it exist at least one class c for which the Features F-Measure (c) FM[f][c] is bigger than the F-Measure Overall Average (d). It answers the question: \"What are the features which contain more information than the mean of information in the dataset ?\"

    (f) A Feature f is Deleted if and only if the Features F-Measure (c) FM[f][c] is always lower than the F-Measure Overall Average (d) for each class c. It answers the question: \"What are the features which do not contain more information than the mean of information in the dataset ?\"

  3. Compute the Features Contrast and Features Activation (based on F-Measure Marginal Averages comparison).

    (g) The F-Measure Marginal Averages for a given feature f is the average of Features F-Measure (c) for all classes c and for the given feature f. It answers the question: \"What are the mean of information contained by the feature f in all classes ?\"

    (h) The Features Contrast FC[f][c] for a given class c and a given selected feature f is the ratio between the Features F-Measure (c) FM[f][c] and the F-Measure Marginal Averages (g) for selected feature f put to the power of an Amplification Factor. It answers the question: \"How relevant is the feature f to distinguish the class c ?\"

    (i) A selected Feature f is Active for a given class c if and only if the Features Contrast (h) FC[f][c] is bigger than 1.0. It answers the question : \"For which classes a selected feature f is relevant ?\"

This metric is an efficient method to:

"},{"location":"#documentation","title":"Documentation","text":""},{"location":"#installation","title":"Installation","text":"

Features Maximization Metric requires Python 3.8 or above.

To install with pip:

# install package\npython3 -m pip install cognitivefactory-features-maximization-metric\n

To install with pipx:

# install pipx\npython3 -m pip install --user pipx\n\n# install package\npipx install --python python3 cognitivefactory-features-maximization-metric\n
"},{"location":"#development","title":"Development","text":"

To work on this project or contribute to it, please read:

"},{"location":"#references","title":"References","text":""},{"location":"#how-to-cite","title":"How to cite","text":"

Schild, E. (2023). cognitivefactory/features-maximization-metric. Zenodo. https://doi.org/10.5281/zenodo.7646382.

"},{"location":"changelog/","title":"Changelog","text":"

All notable changes to this project will be documented in this file.

The format is based on Keep a Changelog and this project adheres to Semantic Versioning.

"},{"location":"changelog/#100-2023-11-14","title":"1.0.0 - 2023-11-14","text":"

Compare with 0.1.1

"},{"location":"changelog/#build","title":"Build","text":""},{"location":"changelog/#code-refactoring","title":"Code Refactoring","text":""},{"location":"changelog/#011-2023-02-16","title":"0.1.1 - 2023-02-16","text":"

Compare with 0.1.0

"},{"location":"changelog/#build_1","title":"Build","text":""},{"location":"changelog/#010-2023-02-16","title":"0.1.0 - 2023-02-16","text":"

Compare with first commit

"},{"location":"changelog/#build_2","title":"Build","text":""},{"location":"changelog/#features","title":"Features","text":""},{"location":"code_of_conduct/","title":"Contributor Covenant Code of Conduct","text":""},{"location":"code_of_conduct/#our-pledge","title":"Our Pledge","text":"

In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.

"},{"location":"code_of_conduct/#our-standards","title":"Our Standards","text":"

Examples of behavior that contributes to creating a positive environment include:

Examples of unacceptable behavior by participants include:

"},{"location":"code_of_conduct/#our-responsibilities","title":"Our Responsibilities","text":"

Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.

"},{"location":"code_of_conduct/#scope","title":"Scope","text":"

This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.

"},{"location":"code_of_conduct/#enforcement","title":"Enforcement","text":"

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at erwan.schild@e-i.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.

"},{"location":"code_of_conduct/#attribution","title":"Attribution","text":"

This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4

"},{"location":"contributing/","title":"Contributing","text":"

Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given.

"},{"location":"contributing/#environment-setup","title":"Environment setup","text":"

Nothing easier! Follow the instructions below.

Note

We STRONGLY recommend using a Linux distribution for Python development (Windows sometimes leads to obscure compatibility errors...)

  1. Install Git to version and track our software changes.

    • On Windows, use the official installer: Git-for-Windows.

    • On Linux, simply use your package manager.

    Note

    Git-for-Windows doesn't provide the command make. In following step, use pdm instead.

  2. Install Python as programming language for this projet.

    • On Windows, use the official installer: Python Releases for Windows.

    • On Linux, simply use your package manager.

    Note

    You can also use use pyenv.

    # install pyenv\ngit clone https://github.com/pyenv/pyenv ~/.pyenv\n\n# setup pyenv (you should also put these three lines in .bashrc or similar)\nexport PATH=\"${HOME}/.pyenv/bin:${PATH}\"\nexport PYENV_ROOT=\"${HOME}/.pyenv\"\neval \"$(pyenv init -)\"\n\n# install Python 3.8\npyenv install 3.8\n\n# make it available globally\npyenv global system 3.8\n
  3. Fork and clone the repository:

    git clone https://github.com/cognitivefactory/features-maximization-metric/\ncd features-maximization-metric\n
  4. Install the dependencies of the projet with:

    cd interactive-clustering\nmake setup # on Linux\npdm install # on Windows\n

    Note

    If it fails for some reason (especially on Windows), you'll need to install pipx and pdm manually.

    You can install them with:

    python3 -m pip install --user pipx\npipx install pdm\n

    Now you can try running make setup again, or simply pdm install.

Your project is now ready and dependencies are installed.

"},{"location":"contributing/#available-template-tasks","title":"Available template tasks","text":"

This project uses duty to run tasks. A Makefile is also provided. To run a task, use make TASK on Linux and pdm run duty TASK on Windows.

To show the available template task:

make help # on Linux\npdm run duty --list # on Windows\n

The Makefile will try to run certain tasks on multiple Python versions. If for some reason you don't want to run the task on multiple Python versions, you can do one of the following:

  1. export PYTHON_VERSIONS=: this will run the task with only the current Python version
  2. run the task directly with pdm run duty TASK

The Makefile detects if a virtual environment is activated, so make/pdm will work the same with the virtualenv activated or not.

"},{"location":"contributing/#development-journey","title":"Development journey","text":"

As usual:

  1. create a new branch: git checkout -b feature-or-bugfix-name
  2. edit the code and/or the documentation

If you updated the documentation or the project dependencies:

  1. run make docs-regen
  2. run make docs-serve, go to http://localhost:8000 and check that everything looks good

Before committing:

  1. run make format to auto-format the code
  2. run make check to check everything (fix any warning)
  3. run make test to run the tests (fix any issue)
  4. follow our commit message convention

If you are unsure about how to fix or ignore a warning, just let the continuous integration fail, and we will help you during review.

Don't bother updating the changelog, we will take care of this.

"},{"location":"contributing/#commit-message-convention","title":"Commit message convention","text":"

Commits messages must follow the Angular style:

<type>[(scope)]: Subject\n\n[Body]\n

Scope and body are optional. Type can be:

Subject (and body) must be valid Markdown. If you write a body, please add issues references at the end:

Body.\n\nReferences: #10, #11.\nFixes #15.\n
"},{"location":"contributing/#pull-requests-guidelines","title":"Pull requests guidelines","text":"

Link to any related issue in the Pull Request message.

During review, we recommend using fixups:

# SHA is the SHA of the commit you want to fix\ngit commit --fixup=SHA\n

Once all the changes are approved, you can squash your commits:

git rebase -i --autosquash master\n

And force-push:

git push -f\n

If this seems all too complicated, you can push or force-push each new commit, and we will squash them ourselves if needed, before merging.

"},{"location":"credits/","title":"Credits","text":"

These projects were used to build cognitivefactory-features-maximization-metric. Thank you!

python | pdm | copier-pdm

"},{"location":"credits/#exec-1--runtime-dependencies","title":"Runtime dependencies","text":"Project Summary Version (accepted) Version (last resolved) License joblib Lightweight pipelining with Python functions >=1.1.1 1.2.0 BSD numpy Fundamental package for array computing in Python >=1.22.2 1.24.4 BSD-3-Clause scikit-learn A set of python modules for machine learning and data mining >=0.24.1 1.3.2 new BSD scipy Fundamental algorithms for scientific computing in Python >=1.7.3 1.9.3 BSD License threadpoolctl threadpoolctl >=2.0.0 3.1.0 BSD-3-Clause"},{"location":"credits/#exec-1--development-dependencies","title":"Development dependencies","text":"Project Summary Version (accepted) Version (last resolved) License ansimarkup Produce colored terminal text with an xml-like markup ~=1.4 1.5.0 Revised BSD License astor Read/rewrite/write Python ASTs >=0.8 0.8.1 BSD-3-Clause attrs Classes Without Boilerplate >=19.2.0 22.1.0 MIT autoflake Removes unused imports and unused variables >=1.4 1.7.8 MIT babel Internationalization utilities ~=2.10 2.13.1 BSD-3-Clause bandit Security oriented static analyser for python code. >=1.7.3 1.7.4 Apache-2.0 license black The uncompromising code formatter. >=21.10b0 23.11.0 MIT certifi Python package for providing Mozilla's CA Bundle. >=2022.12.7 2023.7.22 MPL-2.0 charset-normalizer The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. <3,>=2 2.1.1 MIT click Composable command line interface toolkit >=8.0.0 8.1.3 BSD-3-Clause colorama Cross-platform colored terminal text. ; platform_system == \"Windows\" 0.4.6 BSD License coverage Code coverage measurement for Python [toml]>=5.2.1 6.5.0 Apache 2.0 darglint A utility for ensuring Google-style docstrings stay up to date with the source code. >=1.8 1.8.1 MIT dparse A parser for Python dependency files >=0.6.2 0.6.2 MIT license duty A simple task runner. >=0.7 1.1.0 ISC exceptiongroup Backport of PEP 654 (exception groups) >=1.0.0rc8; python_version < \"3.11\" 1.0.4 MIT License execnet execnet: rapid multi-Python deployment >=1.1 1.9.0 MIT failprint Run a command, print its output only if it fails. !=1.0.0,>=0.11 1.0.2 ISC flake8 the modular source code checker: pep8 pyflakes and co >=4.0 5.0.4 MIT flake8-bandit Automated security testing with bandit and flake8. >=2.1 4.1.1 MIT flake8-black flake8 plugin to call black as a code style validator >=0.2 0.3.6 MIT flake8-bugbear A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle. >=21.9 23.3.12 MIT flake8-builtins Check for python builtins being used as variables or parameters >=1.5 2.2.0 GNU General Public License v2 (GPLv2) flake8-comprehensions A flake8 plugin to help you write better list/set/dict comprehensions. >=3.7 3.14.0 MIT flake8-docstrings Extension for flake8 which uses pydocstyle to check docstrings >=1.6 1.7.0 MIT flake8-plugin-utils The package provides base classes and utils for flake8 plugin writing <2.0.0,>=1.3.2 1.3.2 MIT flake8-polyfill Polyfill package for Flake8 plugins >=1.0.2 1.0.2 MIT flake8-pytest-style A flake8 plugin checking common style issues or inconsistencies with pytest-based tests. >=1.5 1.7.2 MIT flake8-string-format string format checker, plugin for flake8 >=0.3 0.3.0 MIT License flake8-tidy-imports A flake8 plugin that helps you write tidier imports. >=4.5 4.10.0 MIT flake8-variables-names A flake8 extension that helps to make more readable variables names >=0.0 0.0.6 MIT ghp-import Copy your docs directly to the gh-pages branch. >=1.0 2.1.0 Apache Software License git-changelog Automatic Changelog generator using Jinja2 templates. >=0.4,<1.0 0.6.0 ISC gitdb Git Object Database <5,>=4.0.1 4.0.10 BSD License gitpython GitPython is a Python library used to interact with Git repositories >=3.1.30 3.1.40 BSD griffe Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API. >=0.24 0.24.1 ISC idna Internationalized Domain Names in Applications (IDNA) <4,>=2.5 3.4 BSD License importlib-metadata Read metadata from Python packages >=4.3; python_version < \"3.10\" 4.13.0 Apache Software License iniconfig iniconfig: brain-dead simple config-ini parsing 1.1.1 MIT License isort A Python utility / library to sort Python imports. >=5.10 5.12.0 MIT jinja2 A very fast and expressive template engine. <4,>=2.11 3.1.2 BSD-3-Clause markdown Python implementation of Markdown. <4.0.0,>=3.3.3 3.3.7 BSD License markdown-callouts Markdown extension: a classier syntax for admonitions >=0.2 0.3.0 MIT markdown-exec Utilities to execute code blocks in Markdown files. >=0.5 1.7.0 ISC markupsafe Safely add untrusted strings to HTML/XML markup. >=2.0 2.1.1 BSD-3-Clause mccabe McCabe checker, plugin for flake8 <0.8.0,>=0.7.0 0.7.0 Expat license mergedeep A deep merge function for \ud83d\udc0d. >=1.3.4 1.3.4 MIT License mkdocs Project documentation with Markdown. >=1.3 1.5.3 BSD License mkdocs-coverage MkDocs plugin to integrate your coverage HTML report into your site. >=0.2 1.0.0 ISC mkdocs-gen-files MkDocs plugin to programmatically generate documentation pages during the build >=0.3 0.5.0 MIT License mkdocs-literate-nav MkDocs plugin to specify the navigation in Markdown instead of YAML >=0.4 0.6.1 MIT License mkdocs-material Documentation that simply works >=7.3 9.4.8 MIT License mkdocs-material-extensions Extension pack for Python Markdown and MkDocs Material. ~=1.3 1.3 MIT License mkdocs-section-index MkDocs plugin to allow clickable sections that lead to an index page >=0.3 0.3.8 MIT License mkdocstrings Automatic documentation from sources, for MkDocs. [python]>=0.18 0.23.0 ISC mkdocstrings-python A Python handler for mkdocstrings. >=0.5.2 0.8.2 ISC mypy Optional static typing for Python >=0.910 1.7.0 MIT mypy-extensions Type system extensions for programs checked with the mypy type checker. >=0.4.3 1.0.0 MIT License packaging Core utilities for Python packages >=22.0 23.0 BSD License paginate Divides large result sets into pages for easier browsing ~=0.5 0.5.6 MIT pathspec Utility library for gitignore style pattern matching of file paths. >=0.9.0 0.11.2 Mozilla Public License 2.0 (MPL 2.0) pbr Python Build Reasonableness !=2.1.0,>=2.0.0 5.11.0 Apache Software License pep8-naming Check PEP-8 naming conventions, plugin for flake8 >=0.12 0.13.3 Expat license platformdirs A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\". >=2 2.5.4 MIT License pluggy plugin and hook calling mechanisms for python <2.0,>=0.12 1.0.0 MIT ptyprocess Run a subprocess in a pseudo terminal ~=0.6; sys_platform != \"win32\" 0.7.0 ? pycodestyle Python style guide checker <2.10.0,>=2.9.0 2.9.1 Expat license pydocstyle Python docstring style checker >=2.1 6.1.1 MIT pyflakes passive checker of Python programs <3,>=1.1.0 2.5.0 MIT pygments Pygments is a syntax highlighting package written in Python. ~=2.16 2.16.1 BSD-2-Clause pymdown-extensions Extension pack for Python Markdown. >=9 10.4 MIT License pytest pytest: simple powerful testing with Python >=6.2 7.4.3 MIT pytest-cov Pytest plugin for measuring coverage. >=3.0 4.1.0 MIT pytest-randomly Pytest plugin to randomly order tests and control random.seed. >=3.10 3.15.0 MIT pytest-xdist pytest xdist plugin for distributed testing, most importantly across multiple CPUs >=2.4 3.4.0 MIT python-dateutil Extensions to the standard Python datetime module >=2.8.1 2.8.2 Dual License pytz World timezone definitions, modern and historical >=2015.7; python_version < \"3.9\" 2023.3.post1 MIT pyyaml YAML parser and emitter for Python >=5.1 6.0 MIT pyyaml-env-tag A custom YAML tag for referencing environment variables in YAML files. >=0.1 0.1 MIT License regex Alternative regular expression module, to replace re. >=2022.4 2022.10.31 Apache Software License requests Python HTTP for Humans. ~=2.26 2.28.1 Apache 2.0 ruamel-yaml ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order >=0.17.21 0.17.21 MIT license ruamel-yaml-clib C version of reader, parser and emitter for ruamel.yaml derived from libyaml >=0.2.6; platform_python_implementation == \"CPython\" and python_version < \"3.11\" 0.2.7 MIT safety Checks installed dependencies for known vulnerabilities and licenses. >=2 2.3.4 MIT license semver Python helper for Semantic Versioning (http://semver.org/) ~=2.13 2.13.0 BSD setuptools Easily download, build, install, upgrade, and uninstall Python packages >=19.3 65.6.3 MIT License six Python 2 and 3 compatibility utilities >=1.5 1.16.0 MIT smmap A pure Python implementation of a sliding window memory map manager <6,>=3.0.1 5.0.0 BSD snowballstemmer This package provides 29 stemmers for 28 languages generated from Snowball algorithms. 2.2.0 BSD-3-Clause stevedore Manage dynamic plugins for Python applications >=1.20.0 3.5.2 Apache Software License toml Python Library for Tom's Obvious, Minimal Language >=0.10 0.10.2 MIT tomli A lil' TOML parser >=2.0.1; python_version < \"3.11\" 2.0.1 MIT License types-markdown Typing stubs for Markdown >=3.3 3.5.0.1 Apache-2.0 license types-toml Typing stubs for toml >=0.10 0.10.8.7 Apache-2.0 license typing-extensions Backported and Experimental Type Hints for Python 3.7+ >=4.0.1; python_version < \"3.11\" 4.4.0 Python Software Foundation License urllib3 HTTP library with thread-safe connection pooling, file post, and more. <1.27,>=1.21.1 1.26.13 MIT watchdog Filesystem events monitoring >=2.0 2.1.9 Apache License 2.0 wps-light The strictest and most opinionated python linter ever (lighter fork). >=0.15 0.16.1 MIT zipp Backport of pathlib-compatible object wrapper for zip files >=0.5 3.10.0 MIT License"},{"location":"license/","title":"CeCILL-C FREE SOFTWARE LICENSE AGREEMENT","text":""},{"location":"license/#notice","title":"Notice","text":"

This Agreement is a Free Software license agreement that is the result of discussions between its authors in order to ensure compliance with the two main principles guiding its drafting:

The authors of the CeCILL-C license are:

CeCILL stands for Ce(a) C(nrs) I(nria) L(ogiciel) L(ibre)

"},{"location":"license/#preamble","title":"Preamble","text":"

The purpose of this Free Software license agreement is to grant users the right to modify and re-use the software governed by this license.

The exercising of this right is conditional upon the obligation to make available to the community the modifications made to the source code of the software so as to contribute to its evolution.

In consideration of access to the source code and the rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors only have limited liability.

In this respect, the risks associated with loading, using, modifying and/or developing or reproducing the software by the user are brought to the user's attention, given its Free Software status, which may make it complicated to use, with the result that its use is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the suitability of the software as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions of security. This Agreement may be freely reproduced and published, provided it is not altered, and that no provisions are either added or removed herefrom.

This Agreement may apply to any or all software for which the holder of the economic rights decides to submit the use thereof to its provisions.

"},{"location":"license/#article-1-definitions","title":"Article 1 - DEFINITIONS","text":"

For the purpose of this Agreement, when the following expressions commence with a capital letter, they shall have the following meaning:

Agreement: means this license agreement, and its possible subsequent versions and annexes.

Software: means the software in its Object Code and/or Source Code form and, where applicable, its documentation, \"as is\" when the Licensee accepts the Agreement.

Initial Software: means the Software in its Source Code and possibly its Object Code form and, where applicable, its documentation, \"as is\" when it is first distributed under the terms and conditions of the Agreement.

Modified Software: means the Software modified by at least one Integrated Contribution.

Source Code: means all the Software's instructions and program lines to which access is required so as to modify the Software.

Object Code: means the binary files originating from the compilation of the Source Code.

Holder: means the holder(s) of the economic rights over the Initial Software.

Licensee: means the Software user(s) having accepted the Agreement.

Contributor: means a Licensee having made at least one Integrated Contribution.

Licensor: means the Holder, or any other individual or legal entity, who distributes the Software under the Agreement.

Integrated Contribution: means any or all modifications, corrections, translations, adaptations and/or new functions integrated into the Source Code by any or all Contributors.

Related Module: means a set of sources files including their documentation that, without modification to the Source Code, enables supplementary functions or services in addition to those offered by the Software.

Derivative Software: means any combination of the Software, modified or not, and of a Related Module.

Parties: mean both the Licensee and the Licensor.

These expressions may be used both in singular and plural form.

"},{"location":"license/#article-2-purpose","title":"Article 2 - PURPOSE","text":"

The purpose of the Agreement is the grant by the Licensor to the Licensee of a non-exclusive, transferable and worldwide license for the Software as set forth in Article 5 hereinafter for the whole term of the protection granted by the rights over said Software.

"},{"location":"license/#article-3-acceptance","title":"Article 3 - ACCEPTANCE","text":"

3.1 The Licensee shall be deemed as having accepted the terms and conditions of this Agreement upon the occurrence of the first of the following events:

3.2 One copy of the Agreement, containing a notice relating to the characteristics of the Software, to the limited warranty, and to the fact that its use is restricted to experienced users has been provided to the Licensee prior to its acceptance as set forth in Article 3.1 hereinabove, and the Licensee hereby acknowledges that it has read and understood it.

"},{"location":"license/#article-4-effective-date-and-term","title":"Article 4 - EFFECTIVE DATE AND TERM","text":""},{"location":"license/#41-effective-date","title":"4.1 EFFECTIVE DATE","text":"

The Agreement shall become effective on the date when it is accepted by the Licensee as set forth in Article 3.1.

"},{"location":"license/#42-term","title":"4.2 TERM","text":"

The Agreement shall remain in force for the entire legal term of protection of the economic rights over the Software.

"},{"location":"license/#article-5-scope-of-rights-granted","title":"Article 5 - SCOPE OF RIGHTS GRANTED","text":"

The Licensor hereby grants to the Licensee, who accepts, the following rights over the Software for any or all use, and for the term of the Agreement, on the basis of the terms and conditions set forth hereinafter.

Besides, if the Licensor owns or comes to own one or more patents protecting all or part of the functions of the Software or of its components, the Licensor undertakes not to enforce the rights granted by these patents against successive Licensees using, exploiting or modifying the Software. If these patents are transferred, the Licensor undertakes to have the transferees subscribe to the obligations set forth in this paragraph.

"},{"location":"license/#51-right-of-use","title":"5.1 RIGHT OF USE","text":"

The Licensee is authorized to use the Software, without any limitation as to its fields of application, with it being hereinafter specified that this comprises:

  1. permanent or temporary reproduction of all or part of the Software by any or all means and in any or all form.

  2. loading, displaying, running, or storing the Software on any or all medium.

  3. entitlement to observe, study or test its operation so as to determine the ideas and principles behind any or all constituent elements of said Software. This shall apply when the Licensee carries out any or all loading, displaying, running, transmission or storage operation as regards the Software, that it is entitled to carry out hereunder.

"},{"location":"license/#52-right-of-modification","title":"5.2 RIGHT OF MODIFICATION","text":"

The right of modification includes the right to translate, adapt, arrange, or make any or all modifications to the Software, and the right to reproduce the resulting software. It includes, in particular, the right to create a Derivative Software.

The Licensee is authorized to make any or all modification to the Software provided that it includes an explicit notice that it is the author of said modification and indicates the date of the creation thereof.

"},{"location":"license/#53-right-of-distribution","title":"5.3 RIGHT OF DISTRIBUTION","text":"

In particular, the right of distribution includes the right to publish, transmit and communicate the Software to the general public on any or all medium, and by any or all means, and the right to market, either in consideration of a fee, or free of charge, one or more copies of the Software by any means.

The Licensee is further authorized to distribute copies of the modified or unmodified Software to third parties according to the terms and conditions set forth hereinafter.

"},{"location":"license/#531-distribution-of-software-without-modification","title":"5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION","text":"

The Licensee is authorized to distribute true copies of the Software in Source Code or Object Code form, provided that said distribution complies with all the provisions of the Agreement and is accompanied by:

  1. a copy of the Agreement,

  2. a notice relating to the limitation of both the Licensor's warranty and liability as set forth in Article 8 and Article 9,

and that, in the event that only the Object Code of the Software is redistributed, the Licensee allows effective access to the full Source Code of the Software at a minimum during the entire period of its distribution of the Software, it being understood that the additional cost of acquiring the Source Code shall not exceed the cost of transferring the data.

"},{"location":"license/#532-distribution-of-modified-software","title":"5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE","text":"

When the Licensee makes an Integrated Contribution to the Software, the terms and conditions for the distribution of the resulting Modified Software become subject to all the provisions of this Agreement.

The Licensee is authorized to distribute the Modified Software, in source code or object code form, provided that said distribution complies with all the provisions of the Agreement and is accompanied by:

  1. a copy of the Agreement,

  2. a notice relating to the limitation of both the Licensor's warranty and liability as set forth in Article 8 and Article 9,

and that, in the event that only the object code of the Modified Software is redistributed, the Licensee allows effective access to the full source code of the Modified Software at a minimum during the entire period of its distribution of the Modified Software, it being understood that the additional cost of acquiring the source code shall not exceed the cost of transferring the data.

"},{"location":"license/#533-distribution-of-derivative-software","title":"5.3.3 DISTRIBUTION OF DERIVATIVE SOFTWARE","text":"

When the Licensee creates Derivative Software, this Derivative Software may be distributed under a license agreement other than this Agreement, subject to compliance with the requirement to include a notice concerning the rights over the Software as defined in Article 6.4. In the event the creation of the Derivative Software required modification of the Source Code, the Licensee undertakes that:

  1. the resulting Modified Software will be governed by this Agreement,

  2. the Integrated Contributions in the resulting Modified Software will be clearly identified and documented,

  3. the Licensee will allow effective access to the source code of the Modified Software, at a minimum during the entire period of distribution of the Derivative Software, such that such modifications may be carried over in a subsequent version of the Software; it being understood that the additional cost of purchasing the source code of the Modified Software shall not exceed the cost of transferring the data.

"},{"location":"license/#534-compatibility-with-the-cecill-license","title":"5.3.4 COMPATIBILITY WITH THE CeCILL LICENSE","text":"

When a Modified Software contains an Integrated Contribution subject to the CeCILL license agreement, or when a Derivative Software contains a Related Module subject to the CeCILL license agreement, the provisions set forth in the third item of Article 6.4 are optional.

"},{"location":"license/#article-6-intellectual-property","title":"Article 6 - INTELLECTUAL PROPERTY","text":""},{"location":"license/#61-over-the-initial-software","title":"6.1 OVER THE INITIAL SOFTWARE","text":"

The Holder owns the economic rights over the Initial Software. Any or all use of the Initial Software is subject to compliance with the terms and conditions under which the Holder has elected to distribute its work and no one shall be entitled to modify the terms and conditions for the distribution of said Initial Software.

The Holder undertakes that the Initial Software will remain ruled at least by this Agreement, for the duration set forth in Article 4.2.

"},{"location":"license/#62-over-the-integrated-contributions","title":"6.2 OVER THE INTEGRATED CONTRIBUTIONS","text":"

The Licensee who develops an Integrated Contribution is the owner of the intellectual property rights over this Contribution as defined by applicable law.

"},{"location":"license/#63-over-the-related-modules","title":"6.3 OVER THE RELATED MODULES","text":"

The Licensee who develops a Related Module is the owner of the intellectual property rights over this Related Module as defined by applicable law and is free to choose the type of agreement that shall govern its distribution under the conditions defined in Article 5.3.3.

"},{"location":"license/#64-notice-of-rights","title":"6.4 NOTICE OF RIGHTS","text":"

The Licensee expressly undertakes:

  1. not to remove, or modify, in any manner, the intellectual property notices attached to the Software;

  2. to reproduce said notices, in an identical manner, in the copies of the Software modified or not;

  3. to ensure that use of the Software, its intellectual property notices and the fact that it is governed by the Agreement is indicated in a text that is easily accessible, specifically from the interface of any Derivative Software.

The Licensee undertakes not to directly or indirectly infringe the intellectual property rights of the Holder and/or Contributors on the Software and to take, where applicable, vis-\u00e0-vis its staff, any and all measures required to ensure respect of said intellectual property rights of the Holder and/or Contributors.

"},{"location":"license/#article-7-related-services","title":"Article 7 - RELATED SERVICES","text":"

7.1 Under no circumstances shall the Agreement oblige the Licensor to provide technical assistance or maintenance services for the Software.

However, the Licensor is entitled to offer this type of services. The terms and conditions of such technical assistance, and/or such maintenance, shall be set forth in a separate instrument. Only the Licensor offering said maintenance and/or technical assistance services shall incur liability therefor.

7.2 Similarly, any Licensor is entitled to offer to its licensees, under its sole responsibility, a warranty, that shall only be binding upon itself, for the redistribution of the Software and/or the Modified Software, under terms and conditions that it is free to decide. Said warranty, and the financial terms and conditions of its application, shall be subject of a separate instrument executed between the Licensor and the Licensee.

"},{"location":"license/#article-8-liability","title":"Article 8 - LIABILITY","text":"

8.1 Subject to the provisions of Article 8.2, the Licensee shall be entitled to claim compensation for any direct loss it may have suffered from the Software as a result of a fault on the part of the relevant Licensor, subject to providing evidence thereof.

8.2 The Licensor's liability is limited to the commitments made under this Agreement and shall not be incurred as a result of in particular:

In particular the Parties expressly agree that any or all pecuniary or business loss (i.e. loss of data, loss of profits, operating loss, loss of customers or orders, opportunity cost, any disturbance to business activities) or any or all legal proceedings instituted against the Licensee by a third party, shall constitute consequential loss and shall not provide entitlement to any or all compensation from the Licensor.

"},{"location":"license/#article-9-warranty","title":"Article 9 - WARRANTY","text":"

9.1 The Licensee acknowledges that the scientific and technical state-of-the-art when the Software was distributed did not enable all possible uses to be tested and verified, nor for the presence of possible defects to be detected. In this respect, the Licensee's attention has been drawn to the risks associated with loading, using, modifying and/or developing and reproducing the Software which are reserved for experienced users.

The Licensee shall be responsible for verifying, by any or all means, the suitability of the product for its requirements, its good working order, and for ensuring that it shall not cause damage to either persons or properties.

9.2 The Licensor hereby represents, in good faith, that it is entitled to grant all the rights over the Software (including in particular the rights set forth in Article 5).

9.3 The Licensee acknowledges that the Software is supplied \"as is\" by the Licensor without any other express or tacit warranty, other than that provided for in Article 9.2 and, in particular, without any warranty as to its commercial value, its secured, safe, innovative or relevant nature.

Specifically, the Licensor does not warrant that the Software is free from any error, that it will operate without interruption, that it will be compatible with the Licensee's own equipment and software configuration, nor that it will meet the Licensee's requirements.

9.4 The Licensor does not either expressly or tacitly warrant that the Software does not infringe any third party intellectual property right relating to a patent, software or any other property right. Therefore, the Licensor disclaims any and all liability towards the Licensee arising out of any or all proceedings for infringement that may be instituted in respect of the use, modification and redistribution of the Software. Nevertheless, should such proceedings be instituted against the Licensee, the Licensor shall provide it with technical and legal assistance for its defense. Such technical and legal assistance shall be decided on a case-by-case basis between the relevant Licensor and the Licensee pursuant to a memorandum of understanding. The Licensor disclaims any and all liability as regards the Licensee's use of the name of the Software. No warranty is given as regards the existence of prior rights over the name of the Software or as regards the existence of a trademark.

"},{"location":"license/#article-10-termination","title":"Article 10 - TERMINATION","text":"

10.1 In the event of a breach by the Licensee of its obligations hereunder, the Licensor may automatically terminate this Agreement thirty (30) days after notice has been sent to the Licensee and has remained ineffective.

10.2 A Licensee whose Agreement is terminated shall no longer be authorized to use, modify or distribute the Software. However, any licenses that it may have granted prior to termination of the Agreement shall remain valid subject to their having been granted in compliance with the terms and conditions hereof.

"},{"location":"license/#article-11-miscellaneous","title":"Article 11 - MISCELLANEOUS","text":"

11.1 EXCUSABLE EVENTS Neither Party shall be liable for any or all delay, or failure to perform the Agreement, that may be attributable to an event of force majeure, an act of God or an outside cause, such as defective functioning or interruptions of the electricity or telecommunications networks, network paralysis following a virus attack, intervention by government authorities, natural disasters, water damage, earthquakes, fire, explosions, strikes and labor unrest, war, etc.

11.2 Any failure by either Party, on one or more occasions, to invoke one or more of the provisions hereof, shall under no circumstances be interpreted as being a waiver by the interested Party of its right to invoke said provision(s) subsequently.

11.3 The Agreement cancels and replaces any or all previous agreements, whether written or oral, between the Parties and having the same purpose, and constitutes the entirety of the agreement between said Parties concerning said purpose. No supplement or modification to the terms and conditions hereof shall be effective as between the Parties unless it is made in writing and signed by their duly authorized representatives.

11.4 In the event that one or more of the provisions hereof were to conflict with a current or future applicable act or legislative text, said act or legislative text shall prevail, and the Parties shall make the necessary amendments so as to comply with said act or legislative text. All other provisions shall remain effective. Similarly, invalidity of a provision of the Agreement, for any reason whatsoever, shall not cause the Agreement as a whole to be invalid.

11.5 LANGUAGE The Agreement is drafted in both French and English and both versions are deemed authentic.

"},{"location":"license/#article-12-new-versions-of-the-agreement","title":"Article 12 - NEW VERSIONS OF THE AGREEMENT","text":"

12.1 Any person is authorized to duplicate and distribute copies of this Agreement.

12.2 So as to ensure coherence, the wording of this Agreement is protected and may only be modified by the authors of the License, who reserve the right to periodically publish updates or new versions of the Agreement, each with a separate number. These subsequent versions may address new issues encountered by Free Software.

12.3 Any Software distributed under a given version of the Agreement may only be subsequently distributed under the same version of the Agreement or a subsequent version.

"},{"location":"license/#article-13-governing-law-and-jurisdiction","title":"Article 13 - GOVERNING LAW AND JURISDICTION","text":"

13.1 The Agreement is governed by French law. The Parties agree to endeavor to seek an amicable solution to any disagreements or disputes that may arise during the performance of the Agreement.

13.2 Failing an amicable solution within two (2) months as from their occurrence, and unless emergency proceedings are necessary, the disagreements or disputes shall be referred to the Paris Courts having jurisdiction, by the more diligent Party.

Version 1.0 dated 2006-09-05.

"},{"location":"usage/","title":"Usage","text":""},{"location":"usage/#basic-usecase-what-are-the-physical-characteristics-that-most-distinguish-men-from-women","title":"Basic usecase: \"What are the physical characteristics that most distinguish men from women?\"","text":"
  1. Load Python dependencies:

    ###\n### Python dependencies.\n###\n\nfrom cognitivefactory.features_maximization_metric.fmc import FeaturesMaximizationMetric\nfrom scipy.sparse import csr_matrix\nfrom typing import List\n

  2. Define problem data:

    ###\n### Data.\n###\n\n# Define people characteristics that will be studied.\ncharacteristics_studied: List[str] = [\n    \"Shoes size\",\n    \"Hair size\",\n    \"Nose size\",\n]\n\n# Get people characteristics.\npeople_characteristics: csr_matrix = csr_matrix(\n    [\n        [9, 5, 5],\n        [9, 10, 5],\n        [9, 20, 6],\n        [5, 15, 5],\n        [6, 25, 6],\n        [5, 25, 5],\n    ]\n)\n\n# Get people genders.\npeople_genders: List[str] = [\n    \"Man\",\n    \"Man\",\n    \"Man\",\n    \"Woman\",\n    \"Woman\",\n    \"Woman\",\n]\n

  3. Modelize the problem:

    ###\n### Feature Maximization Metrics.\n###\n\n# Main computation.\nfmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric(\n    data_vectors=people_characteristics,\n    data_classes=people_genders,\n    list_of_possible_features=characteristics_studied,\n    amplification_factor=1,\n)\n

  4. Determine relevant characteristics:

    ###\n### Analysis 1: Delete characteristics that aren't relevant.\n###\n\nprint(\n    \"\\n\",\n    \"1. Which characteristic seems not relevant to distinguish men from women ?\",\n)\nfor characteristic in characteristics_studied:\n    if not fmc_computer.features_selection[characteristic]:\n        print(\n            \"    - '{0}' seems not relevant.\".format(characteristic)\n        )\n
    1. Which characteristic seems not relevant to distinguish men from women ?\n    - 'Nose size' seems not relevant.\n

  5. Describe gender by relevant characteristics.:

    ###\n### Analysis 2: Describe gender by relevant characteristics.\n###\n\nprint(\n    \"\\n\",\n    \"2. According to remaining characteristics:\",\n)\nfor gender in sorted(set(people_genders)):\n    print(\n        \"    - Which characteristic seems important to recognize a '{0}' ?\".format(gender)\n    )\n\n    for characteristic in fmc_computer.get_most_active_features_by_a_classe(\n        classe=gender,\n    ):\n        print(\n            \"        - '{0}' seems important (fmeasure of '{1:.2f}', contrast of '{2:.2f}').\".format(\n                characteristic,\n                fmc_computer.features_fmeasure[characteristic][gender],\n                fmc_computer.features_contrast[characteristic][gender],\n            )\n        )\n
    2. According to remaining characteristics:\n    - Which characteristic seems important to recognize a 'Man' ?\n        - 'Shoes size' seems important (fmeasure of '0.45', contrast of '1.32').\n    - Which characteristic seems important to recognize a 'Woman' ?\n        - 'Hair size' seems important (fmeasure of '0.66', contrast of '1.25').\n

"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":""},{"location":"reference/cognitivefactory/features_maximization_metric/","title":"features_maximization_metric","text":"

cognitivefactory-features-maximization-metric package.

Implementation of Features Maximization Metric, an unbiased metric aimed at estimate the quality of an unsupervised classification.

"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/","title":"fmc","text":""},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric","title":"FeaturesMaximizationMetric","text":"

This class implements the Features Maximization Metric. It's a dataset modelization based on vectors features and data labels: for each couple (feature, classe), it gives a score (called F-Measure) that describe the power of identification and distinction of the feature for this classe.

This metric is computed by applying the following steps
  1. Compute the Features F-Measure metric (based on Features Recall and Features Predominance metrics).

    (a) The Features Recall FR[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of feature f for all data. It answers the question: \"Can the feature f distinguish the class c from other classes c' ?\"

    (b) The Features Predominance FP[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of all feature f' for data in class c. It answers the question: \"Can the feature f better identify the class c than the other features f' ?\"

    (c) The Features F-Measure FM[f][c] for a given class c and a given feature f is the harmonic mean of the Features Recall (a) and the Features Predominance (c). It answers the question: \"How much information does the feature f contain about the class c ?\"

  2. Compute the Features Selection (based on F-Measure Overall Average comparison).

    (d) The F-Measure Overall Average is the average of Features F-Measure (c) for all classes c and for all features f. It answers the question: \"What are the mean of information contained by features in all classes ?\"

    (e) A feature f is Selected if and only if it exist at least one class c for which the Features F-Measure (c) FM[f][c] is bigger than the F-Measure Overall Average (d). It answers the question: \"What are the features which contain more information than the mean of information in the dataset ?\"

    (f) A Feature f is Deleted if and only if the Features F-Measure (c) FM[f][c] is always lower than the F-Measure Overall Average (d) for each class c. It answers the question: \"What are the features which do not contain more information than the mean of information in the dataset ?\"

  3. Compute the Features Contrast and Features Activation (based on F-Measure Marginal Averages comparison).

    (g) The F-Measure Marginal Averages for a given feature f is the average of Features F-Measure (c) for all classes c and for the given feature f. It answers the question: \"What are the mean of information contained by the feature f in all classes ?\"

    (h) The Features Contrast FC[f][c] for a given class c and a given selected feature f is the ratio between the Features F-Measure (c) FM[f][c] and the F-Measure Marginal Averages (g) for selected feature f put to the power of an Amplification Factor. It answers the question: \"How relevant is the feature f to distinguish the class c ?\"

    (i) A selected Feature f is Active for a given class c if and only if the Features Contrast (h) FC[f][c] is bigger than 1.0. It answers the question : \"For which classes a selected feature f is relevant ?\"

In order to evaluate it according to a reference, a FMC modelization is represented by the Features Activation of its vector features, and a similarity score to the reference is computed, based on common metrics on clustering (homogeneity, completeness, v_measure).

Attributes:

Name Type Description data_vectors csr_matrix

The sparse matrix representing the vector of each data (i.e. data_vectors[d,f] is the weight of data d for feature f).

data_classes List[str]

The list representing the class of each data (i.e. data_classes[d] is the class of data d).

list_of_possible_features List[str]

The list of existing vectors features.

list_of_possible_classes List[str]

The list of existing data classes.

amplification_factor int

The positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Usually at 1.

features_frecall Dict[str, Dict[str, float]]

The computation of Features Recall (Can the feature f distinguish the class c from other classes l' ?).

features_fpredominance Dict[str, Dict[str, float]]

The computation of Features Predominance (Can the feature f better identify the class c than the other features f' ?).

features_fmeasure Dict[str, Dict[str, float]]

The computation of Features F-Measure (How much information does the feature f contain about the class c ?).

features_overall_average float

The computation of Overall Average of Features F-Measure (What are the mean of information contained by features in all classes ?).

features_selection Dict[str, bool]

The computation of Features Selected (What are the features which contain more information than the mean of information in the dataset ?).

features_marginal_averages Dict[str, float]

The computation of Marginal Averages of Features F-Measure (What are the mean of information contained by the feature f in all classes ?).

features_contrast Dict[str, Dict[str, float]]

The computation of Features Contrast (How important is the feature f to distinguish the class c ?).

features_activation Dict[str, Dict[str, bool]]

The computation of Features Activation (For which classes a selected feature f is relevant ?).

Example References Source code in cognitivefactory\\features_maximization_metric\\fmc.py
class FeaturesMaximizationMetric:\n    r\"\"\"\n    This class implements the ***Features Maximization Metric***.\n    It's a dataset modelization based on vectors features and data labels:\n    for each couple `(feature, classe)`, it gives a score (called **F-Measure**) that describe the power of identification and distinction of the feature for this classe.\n\n    This metric is computed by applying the following steps:\n\n        1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics).\n\n            > (a) The ***Features Recall*** `FR[f][c]` for a given class `c` and a given feature `f` is the ratio between\n            > the sum of the vectors weights of the feature `f` for data in class `c`\n            > and the sum of all vectors weights of feature `f` for all data.\n            > It answers the question: \"_Can the feature `f` distinguish the class `c` from other classes `c'` ?_\"\n\n            > (b) The ***Features Predominance*** `FP[f][c]` for a given class `c` and a given feature `f` is the ratio between\n            > the sum of the vectors weights of the feature `f` for data in class `c`\n            > and the sum of all vectors weights of all feature `f'` for data in class `c`.\n            > It answers the question: \"_Can the feature `f` better identify the class `c` than the other features `f'` ?_\"\n\n            > (c) The ***Features F-Measure*** `FM[f][c]` for a given class `c` and a given feature `f` is\n            > the harmonic mean of the ***Features Recall*** (a) and the ***Features Predominance*** (c).\n            > It answers the question: \"_How much information does the feature `f` contain about the class `c` ?_\"\n\n        2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison).\n\n            > (d) The ***F-Measure Overall Average*** is the average of ***Features F-Measure*** (c) for all classes `c` and for all features `f`.\n            > It answers the question: \"_What are the mean of information contained by features in all classes ?_\"\n\n            > (e) A feature `f` is ***Selected*** if and only if it exist at least one class `c` for which the ***Features F-Measure*** (c) `FM[f][c]` is bigger than the ***F-Measure Overall Average*** (d).\n            > It answers the question: \"_What are the features which contain more information than the mean of information in the dataset ?_\"\n\n            > (f) A Feature `f` is ***Deleted*** if and only if the ***Features F-Measure*** (c) `FM[f][c]` is always lower than the ***F-Measure Overall Average*** (d) for each class `c`.\n            > It answers the question: \"_What are the features which do not contain more information than the mean of information in the dataset ?_\"\n\n        3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison).\n\n            > (g) The ***F-Measure Marginal Averages*** for a given feature `f` is the average of ***Features F-Measure*** (c) for all classes `c` and for the given feature `f`.\n            > It answers the question: \"_What are the mean of information contained by the feature `f` in all classes ?_\"\n\n            > (h) The ***Features Contrast*** `FC[f][c]` for a given class `c` and a given selected feature `f` is the ratio between\n            > the ***Features F-Measure*** (c) `FM[f][c]`\n            > and the ***F-Measure Marginal Averages*** (g) for selected feature f\n            > put to the power of an ***Amplification Factor***.\n            > It answers the question: \"_How relevant is the feature `f` to distinguish the class `c` ?_\"\n\n            > (i) A selected Feature `f` is ***Active*** for a given class `c` if and only if the ***Features Contrast*** (h) `FC[f][c]` is bigger than `1.0`.\n            > It answers the question : \"_For which classes a selected feature `f` is relevant ?_\"\n\n    In order to ***evaluate it according to a reference***, a FMC modelization is represented by the Features Activation of its vector features,\n    and a similarity score to the reference is computed, based on common metrics on clustering (homogeneity, completeness, v_measure).\n\n    Attributes:\n        data_vectors (csr_matrix): The sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`).\n        data_classes (List[str]): The list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`).\n        list_of_possible_features (List[str]): The list of existing vectors features.\n        list_of_possible_classes (List[str]):  The list of existing data classes.\n        amplification_factor (int): The positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Usually at `1`.\n        features_frecall (Dict[str, Dict[str, float]]): The computation of *Features Recall* (_Can the feature `f` distinguish the class `c` from other classes `l'` ?_).\n        features_fpredominance (Dict[str, Dict[str, float]]): The computation of *Features Predominance* (_Can the feature `f` better identify the class `c` than the other features `f'` ?_).\n        features_fmeasure (Dict[str, Dict[str, float]]): The computation of *Features F-Measure* (_How much information does the feature `f` contain about the class `c` ?_).\n        features_overall_average (float): The computation of *Overall Average of Features F-Measure* (_What are the mean of information contained by features in all classes ?_).\n        features_selection (Dict[str, bool]): The computation of *Features Selected* (_What are the features which contain more information than the mean of information in the dataset ?_).\n        features_marginal_averages (Dict[str, float]):  The computation of *Marginal Averages of Features F-Measure* (_What are the mean of information contained by the feature `f` in all classes ?_).\n        features_contrast (Dict[str, Dict[str, float]]): The computation of *Features Contrast* (_How important is the feature `f` to distinguish the class `c` ?_).\n        features_activation (Dict[str, Dict[str, bool]]): The computation of *Features Activation* (_For which classes a selected feature `f` is relevant ?_).\n\n    Example:\n        - Basic usecase: \"_What are the physical characteristics that most distinguish men from women ?_\"\n        ```python\n\n        # Problem to solve.\n        print(\">> What are the physical characteristics that most distinguish men from women ?\")\n\n        ###\n        ### Python dependencies.\n        ###\n\n        from cognitivefactory.features_maximization_metric.fmc import FeaturesMaximizationMetric\n        from scipy.sparse import csr_matrix\n        from typing import List\n\n        ###\n        ### Data.\n        ###\n\n        # Define people characteristics that will be studied.\n        characteristics_studied: List[str] = [\n            \"Shoes size\",\n            \"Hair size\",\n            \"Nose size\",\n        ]\n\n        # Get people characteristics.\n        people_characteristics: csr_matrix = csr_matrix(\n            [\n                [9, 5, 5],\n                [9, 10, 5],\n                [9, 20, 6],\n                [5, 15, 5],\n                [6, 25, 6],\n                [5, 25, 5],\n            ]\n        )\n\n        # Get people genders.\n        people_genders: List[str] = [\n            \"Man\",\n            \"Man\",\n            \"Man\",\n            \"Woman\",\n            \"Woman\",\n            \"Woman\",\n        ]\n\n        ###\n        ### Feature Maximization Metrics.\n        ###\n\n        # Main computation.\n        fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric(\n            data_vectors=people_characteristics,\n            data_classes=people_genders,\n            list_of_possible_features=characteristics_studied,\n            amplification_factor=1,\n        )\n\n        ###\n        ### Analysis 1: Delete characteristics that aren't relevant.\n        ###\n\n        print(\n            \"\\n\",\n            \"1. Which characteristic seems not relevant to distinguish men from women ?\",\n        )\n        for characteristic in characteristics_studied:\n            if not fmc_computer.features_selection[characteristic]:\n                print(\n                    \"    - '{0}' seems not relevant.\".format(characteristic)\n                )\n\n        ###\n        ### Analysis 2: Describe gender by relevant characteristics.\n        ###\n\n        print(\n            \"\\n\",\n            \"2. According to remaining characteristics:\",\n        )\n        for gender in sorted(set(people_genders)):\n            print(\n                \"    - Which characteristic seems important to recognize a '{0}' ?\".format(gender)\n            )\n\n            for characteristic in fmc_computer.get_most_active_features_by_a_classe(\n                classe=gender,\n            ):\n                print(\n                    \"        - '{0}' seems important (fmeasure of '{1:.2f}', contrast of '{2:.2f}').\".format(\n                        characteristic,\n                        fmc_computer.features_fmeasure[characteristic][gender],\n                        fmc_computer.features_contrast[characteristic][gender],\n                    )\n                )\n        ```\n\n    References:\n        - Features Maximization Metric: `Lamirel, J.-C., Cuxac, P., & Hajlaoui, K. (2016). A Novel Approach to Feature Selection Based on Quality Estimation Metrics. In Advances in Knowledge Discovery and Management (pp. 121\u2013140). Springer International Publishing. https://doi.org/10.1007/978-3-319-45763-5_7`\n    \"\"\"\n\n    # =========================================================================================\n    # INITIALIZATION\n    # =========================================================================================\n\n    def __init__(\n        self,\n        data_vectors: csr_matrix,\n        data_classes: List[str],\n        list_of_possible_features: List[str],\n        amplification_factor: int = 1,\n        verbose: bool = False,\n    ):\n        \"\"\"\n        The constructor for `FeaturesMaximizationMetric` class.\n        It applies the several steps of ***Feature Maximization***:\n            1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics).\n            2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison).\n            3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison).\n\n        Args:\n            data_vectors (scipy.sparse.csr_matrix): A sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`).\n            data_classes (List[str]): A list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`).\n            list_of_possible_features (List[str]): A list of existing vectors features.\n            amplification_factor (int, optional): A positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Defaults to `1`.\n            verbose (bool): An option to display progress status of computations. Defaults to `False`.\n\n        Raises:\n            ValueError: if `data_vectors` and `data_classes` have inconsistent shapes.\n            ValueError: if `data_vectors` and `list_of_possible_features` have inconsistent shapes.\n            ValueError: if `amplification_factor` is not a positive integer.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Check parameters.\")\n\n        # Check data size.\n        if data_vectors.shape[0] != len(data_classes):\n            raise ValueError(\n                \"The vectors `data_vectors` and the list of classes `data_classes` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                    data_vectors.shape[0],\n                    len(data_classes),\n                )\n            )\n\n        # Check features size.\n        if data_vectors.shape[1] != len(list_of_possible_features):\n            raise ValueError(\n                \"The vectors `data_vectors` and the list of features `list_of_possible_features` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                    data_vectors.shape[1],\n                    len(list_of_possible_features),\n                )\n            )\n\n        # Check amplification factor.\n        if (not isinstance(amplification_factor, int)) or amplification_factor < 1:\n            raise ValueError(\n                \"The amplification factor `amplification_factor` has to be a positive integer (currently: '{0}').\".format(\n                    amplification_factor,\n                )\n            )\n\n        ###\n        ### Store parameters.\n        ###\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Store parameters.\")\n\n        # Store data information.\n        self.data_vectors: csr_matrix = data_vectors\n        self.data_classes: List[str] = data_classes\n        # Store features and classes lists.\n        self.list_of_possible_features: List[str] = list_of_possible_features\n        self.list_of_possible_classes: List[str] = sorted(set(data_classes))\n        # Store amplification factor.\n        self.amplification_factor: int = amplification_factor\n\n        ###\n        ### Compute Features Maximization Metric.\n        ###\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Start computations.\")\n\n        # 1. Compute the *Features F-Measure* metric (based on *Features Recall* and *Features Predominance* metrics).\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features F-Measure.\")\n\n        # Initialize variables.\n        self.features_frecall: Dict[str, Dict[str, float]]\n        self.features_fpredominance: Dict[str, Dict[str, float]]\n        self.features_fmeasure: Dict[str, Dict[str, float]]\n        # Compute variables.\n        self._compute_features_frecall_fpredominance_fmeasure()\n\n        # 2. Perform a *Features Selection* (based on *F-Measure Overall Average* comparison).\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Selection.\")\n\n        # Initialize variables.\n        self.features_overall_average: float\n        self.features_selection: Dict[str, bool]\n        # Compute variables.\n        self._compute_features_selection()\n\n        # 3. Compute the *Features Contrast* and *Features Activation* (based on *F-Measure Marginal Averages* comparison).\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Contrast.\")\n\n        # Initialize variables.\n        self.features_marginal_averages: Dict[str, float]\n        self.features_contrast: Dict[str, Dict[str, float]]\n        self.features_activation: Dict[str, Dict[str, bool]]\n        # Compute variables.\n        self._compute_features_contrast_and_activation()\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Computations done.\")\n\n    # ==============================================================================\n    # COMPUTE FEATURES F-MEASURE\n    # ==============================================================================\n\n    def _compute_features_frecall_fpredominance_fmeasure(\n        self,\n    ) -> None:\n        \"\"\"\n        Compute:\n            (a) the ***Features Recall*** (cf. `self.features_frecall`),\n            (b) the ***Features Predominance*** (cf. `self.features_fpredominance`), and\n            (c) the ***Features F-Measure*** (cf. `self.features_fmeasure`).\n        \"\"\"\n\n        ###\n        ### Temporary computations.\n        ###\n\n        # Temporary variable used to store sums of all vectors weights for a given feature `f` and a given class `c`.\n        # Needed for both Features Recall and Features Predominance computations.\n        sum_by_feature_and_classe: Dict[str, Dict[str, float]] = {\n            feature: {classe: 0.0 for classe in self.list_of_possible_classes}\n            for feature in self.list_of_possible_features\n        }\n\n        # Temporary variable used to store sums of all vectors weights for a given feature `f` and all classes.\n        # Needed for Features Recall computation.\n        sum_by_features: Dict[str, float] = {feature: 0.0 for feature in self.list_of_possible_features}\n\n        # Temporary variable used to store sums of all vectors weights for all features and a given class `c`.\n        # Needed for Features Predominance computation.\n        sum_by_classe: Dict[str, float] = {classe: 0.0 for classe in self.list_of_possible_classes}\n\n        # Index used to get non zero elements in the sparse matrix weights.\n        indices_x, indices_y = self.data_vectors.nonzero()\n\n        # Browse non zero weights in vectors to compute all the needed sums.\n        for index in range(self.data_vectors.nnz):\n            # Get needed information (data, class/ classe, feature, vectors weight)\n            data_index: int = indices_x[index]\n            data_classe: str = self.data_classes[data_index]\n            feature_index: int = indices_y[index]\n            data_feature: str = self.list_of_possible_features[feature_index]\n            weight: float = self.data_vectors[data_index, feature_index]  # TODO: check if np.nan ?\n\n            # Update the several sums.\n            sum_by_feature_and_classe[data_feature][data_classe] += weight\n            sum_by_features[data_feature] += weight\n            sum_by_classe[data_classe] += weight\n\n        ###\n        ### Features F-Measure computation.\n        ###\n\n        # Compute Features Recall.\n        self.features_frecall = {\n            feature: {\n                classe: (\n                    0.0  # TODO: set to np.nan ?\n                    if sum_by_features[feature] == 0\n                    else sum_by_feature_and_classe[feature][classe] / sum_by_features[feature]\n                )\n                for classe in self.list_of_possible_classes\n            }\n            for feature in self.list_of_possible_features\n        }\n\n        # Compute Features Predominance.\n        self.features_fpredominance = {\n            feature: {\n                classe: (\n                    0.0  # TODO: set to np.nan ?\n                    if sum_by_classe[classe] == 0\n                    else sum_by_feature_and_classe[feature][classe] / sum_by_classe[classe]\n                )\n                for classe in self.list_of_possible_classes\n            }\n            for feature in self.list_of_possible_features\n        }\n\n        # Compute Features F-Measure.\n        self.features_fmeasure = {\n            feature: {\n                classe: (\n                    0.0  # TODO: set to np.nan ?\n                    if (self.features_frecall[feature][classe] + self.features_fpredominance[feature][classe] == 0)\n                    else (\n                        2\n                        * (self.features_frecall[feature][classe] * self.features_fpredominance[feature][classe])\n                        / (self.features_frecall[feature][classe] + self.features_fpredominance[feature][classe])\n                    )\n                )\n                for classe in self.list_of_possible_classes\n            }\n            for feature in self.list_of_possible_features\n        }\n\n    # =============================================================================================\n    # COMPUTE FEATURES SELECTION\n    # =============================================================================================\n\n    def _compute_features_selection(\n        self,\n    ) -> None:\n        \"\"\"\n        Compute:\n            (d) the ***F-Measure Overall Average*** (cf. `self.features_overall_average`), and\n            (e) the ***Features Selected*** (cf. `self.features_selection`).\n        \"\"\"\n\n        ###\n        ### Features F-Measure Overall Average computation.\n        ###\n\n        # Temporary variable used to store the overall sum in order to compute the overall average of Features F-Measure.\n        overall_sum: float = 0.0\n        nb_overall: int = 0\n\n        # For each feature...\n        for feature1 in self.list_of_possible_features:\n            # For each classe...\n            for classe1 in self.list_of_possible_classes:\n                # Update the overall sum and count.\n                overall_sum += self.features_fmeasure[feature1][classe1]\n                nb_overall += 1\n\n        # Compute the overall average of Features F-Measure.\n        self.features_overall_average = 0.0 if nb_overall == 0 else overall_sum / nb_overall  # TODO: set to np.nan ?\n\n        ###\n        ### Features Selection computation.\n        ###\n\n        # Temporary variable used store the selected features.\n        self.features_selection = {}\n\n        # Browse features to determine the selected ones.\n        for feature2 in self.list_of_possible_features:\n            # Set default state of selection.\n            self.features_selection[feature2] = False\n\n            # For each feature, browse class to find one for which the Features F-Measure is bigger than the overall average.\n            for classe2 in self.list_of_possible_classes:\n                # Check that the Feature F-Measure is bigger than the overall average.\n                if self.features_fmeasure[feature2][classe2] > self.features_overall_average:\n                    # Approve the selection and then break the loop.\n                    self.features_selection[feature2] = True\n                    break\n\n    # =============================================================================================\n    # COMPUTE FEATURES CONSTRAST AND ACTIVATION\n    # =============================================================================================\n\n    def _compute_features_contrast_and_activation(\n        self,\n    ) -> None:\n        \"\"\"\n        Compute:\n            (g) The ***F-Measure Marginal Averages*** (cf. `self.features_marginal_averages`), and\n            (h) The ***Features Contrast*** (cf. `self.features_contrast`).\n            (i) the ***Features Activation*** (cf. `self.features_activation`).\n        \"\"\"\n\n        ###\n        ### Features F-Measure Marginal computation.\n        ###\n\n        # Initialize the marginal average of Features F-Measure.\n        self.features_marginal_averages = {}\n\n        # Browse features to compute the averages.\n        for feature1 in self.list_of_possible_features:\n            # Temporary variable used to store the marginal sum in order to compute the marginal average of Features F-Measure over the current feature.\n            sum_marginal: float = 0.0\n            nb_marginal: int = 0\n\n            # Update the marginal sum of Features F-Measure over the current feature.\n            for classe1 in self.list_of_possible_classes:\n                sum_marginal += self.features_fmeasure[feature1][classe1]\n                nb_marginal += 1\n\n            # Compute the marginal averages of Features F-Measure over the current feature.\n            self.features_marginal_averages[feature1] = (\n                0.0 if nb_marginal == 0 else sum_marginal / nb_marginal\n            )  # TODO: set to np.nan ?\n\n        ###\n        ### Features Contrast computation.\n        ###\n\n        # Temporary variable used to store the contrast of a feature for a class.\n        self.features_contrast = {\n            feature2: {\n                classe2: (\n                    0.0  # TODO: set to np.nan ?\n                    if (self.features_selection[feature2] is False or self.features_marginal_averages[feature2] == 0)\n                    else (self.features_fmeasure[feature2][classe2] / self.features_marginal_averages[feature2])\n                    ** self.amplification_factor\n                )\n                for classe2 in self.list_of_possible_classes\n            }\n            for feature2 in self.list_of_possible_features\n        }\n\n        ###\n        ### Features Activation computation.\n        ###\n\n        # Temporary variable used store the features activation.\n        self.features_activation = {\n            feature3: {\n                classe3: bool(\n                    self.features_selection[feature3] is True and self.features_contrast[feature3][classe3] > 1\n                )\n                for classe3 in self.list_of_possible_classes\n            }\n            for feature3 in self.list_of_possible_features\n        }\n\n    # =============================================================================================\n    # GET: MOST ACTIVATED CLASSES FOR A FEATURE\n    # =============================================================================================\n\n    def get_most_activated_classes_by_a_feature(\n        self,\n        feature: str,\n        activation_only: bool = True,\n        sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n        max_number: Optional[int] = None,\n    ) -> List[str]:\n        \"\"\"\n        Get the list of classes for which the requested feature is the most relevant.\n\n        Args:\n            feature (str): The feature to analyze.\n            sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of classes. Defaults to `\"contrast\"`.\n            activation_only (bool): The option to get only activated classes. Defaults to `True`.\n            max_number (Optional[int]): The maximum number of classes to return. Defaults to `None`.\n\n        Raises:\n            ValueError: if `feature` is not in `self.list_of_possible_features`.\n            ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n        Returns:\n            List[str]: The list of classes for which the requested feature is the most relevant.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Check parameter `feature`.\n        if feature not in self.list_of_possible_features:\n            raise ValueError(\n                \"The requested feature `'{0}'` is unknown.\".format(\n                    feature,\n                )\n            )\n\n        # Check parameter `sort_by`.\n        if sort_by not in {\"contrast\", \"fmeasure\"}:\n            raise ValueError(\n                \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                    sort_by,\n                )\n            )\n\n        ###\n        ### Compute the requested list.\n        ###\n\n        # Define list of possible results (classe + contrast/fmeasure).\n        list_of_possible_results: List[Tuple[float, str]] = [\n            (\n                # 0: the metric: contrast or fmeasure.\n                (\n                    self.features_contrast[feature][classe]\n                    if sort_by == \"contrast\"\n                    else self.features_fmeasure[feature][classe]\n                ),\n                # 1: the classe.\n                classe,\n            )\n            for classe in self.list_of_possible_classes\n            if (activation_only is False or self.features_activation[feature][classe] is True)\n        ]\n\n        # Return top classes sorted by requested metric.\n        return [\n            activated_classe\n            for _, activated_classe in sorted(\n                list_of_possible_results,\n                reverse=True,\n            )\n        ][:max_number]\n\n    # =============================================================================================\n    # GET: MOST ACTIVATED FEATURES FOR A CLASSE\n    # =============================================================================================\n\n    def get_most_active_features_by_a_classe(\n        self,\n        classe: str,\n        activation_only: bool = True,\n        sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n        max_number: Optional[int] = None,\n    ) -> List[str]:\n        \"\"\"\n        Get the list of features which are the most relevant for the requested classe.\n\n        Args:\n            classe (str): The classe to analyze.\n            sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of features. Defaults to `\"contrast\"`.\n            activation_only (bool): The option to get only active features. Defaults to `True`.\n            max_number (Optional[int]): The maximum number of features to return. Defaults to `None`.\n\n        Raises:\n            ValueError: if `classe` is not in `self.list_of_possible_classes`.\n            ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n        Returns:\n            List[str]: The list of features which are the most relevant for the requested classe.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Check parameter `feature`.\n        if classe not in self.list_of_possible_classes:\n            raise ValueError(\n                \"The requested classe `'{0}'` is unknown.\".format(\n                    classe,\n                )\n            )\n\n        # Check parameter `sort_by`.\n        if sort_by not in {\"contrast\", \"fmeasure\"}:\n            raise ValueError(\n                \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                    sort_by,\n                )\n            )\n\n        ###\n        ### Compute the requested list.\n        ###\n\n        # Define list of possible results (feature + contrast/fmeasure).\n        list_of_possible_results: List[Tuple[float, str]] = [\n            (\n                # 0: the metric: contrast or fmeasure.\n                (\n                    self.features_contrast[feature][classe]\n                    if sort_by == \"contrast\"\n                    else self.features_fmeasure[feature][classe]\n                ),\n                # 1: the feature.\n                feature,\n            )\n            for feature in self.list_of_possible_features\n            if (activation_only is False or self.features_activation[feature][classe] is True)\n        ]\n\n        # Return top features sorted by requested metric.\n        return [\n            active_feature\n            for _, active_feature in sorted(\n                list_of_possible_results,\n                reverse=True,\n            )\n        ][:max_number]\n\n    # =============================================================================================\n    # COMPARE: WITH AN OTHER FMC\n    # =============================================================================================\n\n    def compare(\n        self,\n        fmc_reference: \"FeaturesMaximizationMetric\",\n        rounded: Optional[int] = None,\n    ) -> Tuple[float, float, float]:\n        \"\"\"\n        Gives a similarity score in agreement with a reference FMC modelization.\n        The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure),\n        where each FMC modelization is represented by the Features Activation of their vector features.\n        In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization.\n\n\n        Args:\n            fmc_reference (FeaturesMaximizationMetric): Another Features Maximization modelization used as reference for the comparison.\n            rounded (Optional[int]): The option to round the result to counter log approximation. Defaults to `None`.\n\n        Raises:\n            ValueError: if `list_of_possible_features` are different.\n\n        Returns:\n            Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Check list_of_possible_features equality.\n        if self.list_of_possible_features != fmc_reference.list_of_possible_features:\n            list_of_in_excess_features: List[str] = [\n                feature\n                for feature in self.list_of_possible_features\n                if feature not in fmc_reference.list_of_possible_features\n            ]\n            list_of_missing_features: List[str] = [\n                feature\n                for feature in fmc_reference.list_of_possible_features\n                if feature not in self.list_of_possible_features\n            ]\n            raise ValueError(\n                \"The list of features `list_of_possible_features` must be the same for both FMC modelization. +: {0}, -: {1}\".format(\n                    str(list_of_in_excess_features), str(list_of_missing_features)\n                )\n            )\n\n        ###\n        ### Format Features Activation as classification label of features.\n        ###\n\n        # Initialize\n        list_of_self_features_activations: List[str] = []\n        list_of_reference_features_activations: List[str] = []\n\n        # Define default value if feature not activated.\n        # NB: we can't set a fixed value in case this value is in the list of possible classes...\n        # Example: can't set `\"\"` or `\"None\"` in case self.list_of_possible_classes==[\"A\", \"\"] and fmc_reference.list_of_possible_classes==[\"B\", \"None\"].\n        default_label_if_feature_not_activated: str = \"NOT_ACTIVATED:{possible_classe}\".format(\n            possible_classe=self.list_of_possible_classes + fmc_reference.list_of_possible_classes\n        )\n\n        # Browse activated features to\u00e0 compare Features Activation.\n        for feature in fmc_reference.list_of_possible_features:\n            # Get Features Activation.\n            list_of_most_activated_classes_for_feature_in_self: List[\n                str\n            ] = self.get_most_activated_classes_by_a_feature(\n                feature=feature,\n            )\n            list_of_most_activated_classes_for_feature_in_reference: List[\n                str\n            ] = fmc_reference.get_most_activated_classes_by_a_feature(\n                feature=feature,\n            )\n\n            # TODO: Skip if feature is not activated in both modelization.\n            if (\n                len(list_of_most_activated_classes_for_feature_in_self) != 1\n                and len(list_of_most_activated_classes_for_feature_in_reference) != 1\n            ):\n                continue\n\n            # Format Feature Activation as classification label. Set to `-1` if not activated.\n            list_of_self_features_activations.append(\n                list_of_most_activated_classes_for_feature_in_self[0]\n                if len(list_of_most_activated_classes_for_feature_in_self) == 1\n                else default_label_if_feature_not_activated\n            )\n            list_of_reference_features_activations.append(\n                list_of_most_activated_classes_for_feature_in_reference[0]\n                if len(list_of_most_activated_classes_for_feature_in_reference) == 1\n                else default_label_if_feature_not_activated\n            )\n\n        ###\n        ### Compute FMC modelizations similarity.\n        ###\n\n        # Compute standard metrics for clustering.\n        homogeneity: float\n        completeness: float\n        v_measure: float\n        homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(\n            labels_pred=list_of_self_features_activations,\n            labels_true=list_of_reference_features_activations,\n        )\n\n        # Round the results.\n        if rounded is not None:\n            homogeneity = round(homogeneity, rounded)\n            completeness = round(completeness, rounded)\n            v_measure = round(v_measure, rounded)\n\n        # Return values.\n        return homogeneity, completeness, v_measure\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.__init__","title":"__init__(data_vectors, data_classes, list_of_possible_features, amplification_factor=1, verbose=False)","text":"

The constructor for FeaturesMaximizationMetric class. It applies the several steps of Feature Maximization: 1. Compute the Features F-Measure metric (based on Features Recall and Features Predominance metrics). 2. Compute the Features Selection (based on F-Measure Overall Average comparison). 3. Compute the Features Contrast and Features Activation (based on F-Measure Marginal Averages comparison).

Parameters:

Name Type Description Default data_vectors scipy.sparse.csr_matrix

A sparse matrix representing the vector of each data (i.e. data_vectors[d,f] is the weight of data d for feature f).

required data_classes List[str]

A list representing the class of each data (i.e. data_classes[d] is the class of data d).

required list_of_possible_features List[str]

A list of existing vectors features.

required amplification_factor int

A positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Defaults to 1.

1 verbose bool

An option to display progress status of computations. Defaults to False.

False

Raises:

Type Description ValueError

if data_vectors and data_classes have inconsistent shapes.

ValueError

if data_vectors and list_of_possible_features have inconsistent shapes.

ValueError

if amplification_factor is not a positive integer.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def __init__(\n    self,\n    data_vectors: csr_matrix,\n    data_classes: List[str],\n    list_of_possible_features: List[str],\n    amplification_factor: int = 1,\n    verbose: bool = False,\n):\n    \"\"\"\n    The constructor for `FeaturesMaximizationMetric` class.\n    It applies the several steps of ***Feature Maximization***:\n        1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics).\n        2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison).\n        3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison).\n\n    Args:\n        data_vectors (scipy.sparse.csr_matrix): A sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`).\n        data_classes (List[str]): A list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`).\n        list_of_possible_features (List[str]): A list of existing vectors features.\n        amplification_factor (int, optional): A positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Defaults to `1`.\n        verbose (bool): An option to display progress status of computations. Defaults to `False`.\n\n    Raises:\n        ValueError: if `data_vectors` and `data_classes` have inconsistent shapes.\n        ValueError: if `data_vectors` and `list_of_possible_features` have inconsistent shapes.\n        ValueError: if `amplification_factor` is not a positive integer.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Check parameters.\")\n\n    # Check data size.\n    if data_vectors.shape[0] != len(data_classes):\n        raise ValueError(\n            \"The vectors `data_vectors` and the list of classes `data_classes` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                data_vectors.shape[0],\n                len(data_classes),\n            )\n        )\n\n    # Check features size.\n    if data_vectors.shape[1] != len(list_of_possible_features):\n        raise ValueError(\n            \"The vectors `data_vectors` and the list of features `list_of_possible_features` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                data_vectors.shape[1],\n                len(list_of_possible_features),\n            )\n        )\n\n    # Check amplification factor.\n    if (not isinstance(amplification_factor, int)) or amplification_factor < 1:\n        raise ValueError(\n            \"The amplification factor `amplification_factor` has to be a positive integer (currently: '{0}').\".format(\n                amplification_factor,\n            )\n        )\n\n    ###\n    ### Store parameters.\n    ###\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Store parameters.\")\n\n    # Store data information.\n    self.data_vectors: csr_matrix = data_vectors\n    self.data_classes: List[str] = data_classes\n    # Store features and classes lists.\n    self.list_of_possible_features: List[str] = list_of_possible_features\n    self.list_of_possible_classes: List[str] = sorted(set(data_classes))\n    # Store amplification factor.\n    self.amplification_factor: int = amplification_factor\n\n    ###\n    ### Compute Features Maximization Metric.\n    ###\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Start computations.\")\n\n    # 1. Compute the *Features F-Measure* metric (based on *Features Recall* and *Features Predominance* metrics).\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features F-Measure.\")\n\n    # Initialize variables.\n    self.features_frecall: Dict[str, Dict[str, float]]\n    self.features_fpredominance: Dict[str, Dict[str, float]]\n    self.features_fmeasure: Dict[str, Dict[str, float]]\n    # Compute variables.\n    self._compute_features_frecall_fpredominance_fmeasure()\n\n    # 2. Perform a *Features Selection* (based on *F-Measure Overall Average* comparison).\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Selection.\")\n\n    # Initialize variables.\n    self.features_overall_average: float\n    self.features_selection: Dict[str, bool]\n    # Compute variables.\n    self._compute_features_selection()\n\n    # 3. Compute the *Features Contrast* and *Features Activation* (based on *F-Measure Marginal Averages* comparison).\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Contrast.\")\n\n    # Initialize variables.\n    self.features_marginal_averages: Dict[str, float]\n    self.features_contrast: Dict[str, Dict[str, float]]\n    self.features_activation: Dict[str, Dict[str, bool]]\n    # Compute variables.\n    self._compute_features_contrast_and_activation()\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Computations done.\")\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.compare","title":"compare(fmc_reference, rounded=None)","text":"

Gives a similarity score in agreement with a reference FMC modelization. The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure), where each FMC modelization is represented by the Features Activation of their vector features. In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization.

Parameters:

Name Type Description Default fmc_reference FeaturesMaximizationMetric

Another Features Maximization modelization used as reference for the comparison.

required rounded Optional[int]

The option to round the result to counter log approximation. Defaults to None.

None

Raises:

Type Description ValueError

if list_of_possible_features are different.

Returns:

Type Description Tuple[float, float, float]

Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def compare(\n    self,\n    fmc_reference: \"FeaturesMaximizationMetric\",\n    rounded: Optional[int] = None,\n) -> Tuple[float, float, float]:\n    \"\"\"\n    Gives a similarity score in agreement with a reference FMC modelization.\n    The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure),\n    where each FMC modelization is represented by the Features Activation of their vector features.\n    In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization.\n\n\n    Args:\n        fmc_reference (FeaturesMaximizationMetric): Another Features Maximization modelization used as reference for the comparison.\n        rounded (Optional[int]): The option to round the result to counter log approximation. Defaults to `None`.\n\n    Raises:\n        ValueError: if `list_of_possible_features` are different.\n\n    Returns:\n        Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Check list_of_possible_features equality.\n    if self.list_of_possible_features != fmc_reference.list_of_possible_features:\n        list_of_in_excess_features: List[str] = [\n            feature\n            for feature in self.list_of_possible_features\n            if feature not in fmc_reference.list_of_possible_features\n        ]\n        list_of_missing_features: List[str] = [\n            feature\n            for feature in fmc_reference.list_of_possible_features\n            if feature not in self.list_of_possible_features\n        ]\n        raise ValueError(\n            \"The list of features `list_of_possible_features` must be the same for both FMC modelization. +: {0}, -: {1}\".format(\n                str(list_of_in_excess_features), str(list_of_missing_features)\n            )\n        )\n\n    ###\n    ### Format Features Activation as classification label of features.\n    ###\n\n    # Initialize\n    list_of_self_features_activations: List[str] = []\n    list_of_reference_features_activations: List[str] = []\n\n    # Define default value if feature not activated.\n    # NB: we can't set a fixed value in case this value is in the list of possible classes...\n    # Example: can't set `\"\"` or `\"None\"` in case self.list_of_possible_classes==[\"A\", \"\"] and fmc_reference.list_of_possible_classes==[\"B\", \"None\"].\n    default_label_if_feature_not_activated: str = \"NOT_ACTIVATED:{possible_classe}\".format(\n        possible_classe=self.list_of_possible_classes + fmc_reference.list_of_possible_classes\n    )\n\n    # Browse activated features to\u00e0 compare Features Activation.\n    for feature in fmc_reference.list_of_possible_features:\n        # Get Features Activation.\n        list_of_most_activated_classes_for_feature_in_self: List[\n            str\n        ] = self.get_most_activated_classes_by_a_feature(\n            feature=feature,\n        )\n        list_of_most_activated_classes_for_feature_in_reference: List[\n            str\n        ] = fmc_reference.get_most_activated_classes_by_a_feature(\n            feature=feature,\n        )\n\n        # TODO: Skip if feature is not activated in both modelization.\n        if (\n            len(list_of_most_activated_classes_for_feature_in_self) != 1\n            and len(list_of_most_activated_classes_for_feature_in_reference) != 1\n        ):\n            continue\n\n        # Format Feature Activation as classification label. Set to `-1` if not activated.\n        list_of_self_features_activations.append(\n            list_of_most_activated_classes_for_feature_in_self[0]\n            if len(list_of_most_activated_classes_for_feature_in_self) == 1\n            else default_label_if_feature_not_activated\n        )\n        list_of_reference_features_activations.append(\n            list_of_most_activated_classes_for_feature_in_reference[0]\n            if len(list_of_most_activated_classes_for_feature_in_reference) == 1\n            else default_label_if_feature_not_activated\n        )\n\n    ###\n    ### Compute FMC modelizations similarity.\n    ###\n\n    # Compute standard metrics for clustering.\n    homogeneity: float\n    completeness: float\n    v_measure: float\n    homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(\n        labels_pred=list_of_self_features_activations,\n        labels_true=list_of_reference_features_activations,\n    )\n\n    # Round the results.\n    if rounded is not None:\n        homogeneity = round(homogeneity, rounded)\n        completeness = round(completeness, rounded)\n        v_measure = round(v_measure, rounded)\n\n    # Return values.\n    return homogeneity, completeness, v_measure\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.get_most_activated_classes_by_a_feature","title":"get_most_activated_classes_by_a_feature(feature, activation_only=True, sort_by='contrast', max_number=None)","text":"

Get the list of classes for which the requested feature is the most relevant.

Parameters:

Name Type Description Default feature str

The feature to analyze.

required sort_by Literal['contrast', 'fmeasure']

The sort criterion for the list of classes. Defaults to \"contrast\".

'contrast' activation_only bool

The option to get only activated classes. Defaults to True.

True max_number Optional[int]

The maximum number of classes to return. Defaults to None.

None

Raises:

Type Description ValueError

if feature is not in self.list_of_possible_features.

ValueError

if sort_by is not in {\"contrast\", \"fmeasure\"}.

Returns:

Type Description List[str]

List[str]: The list of classes for which the requested feature is the most relevant.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def get_most_activated_classes_by_a_feature(\n    self,\n    feature: str,\n    activation_only: bool = True,\n    sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n    max_number: Optional[int] = None,\n) -> List[str]:\n    \"\"\"\n    Get the list of classes for which the requested feature is the most relevant.\n\n    Args:\n        feature (str): The feature to analyze.\n        sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of classes. Defaults to `\"contrast\"`.\n        activation_only (bool): The option to get only activated classes. Defaults to `True`.\n        max_number (Optional[int]): The maximum number of classes to return. Defaults to `None`.\n\n    Raises:\n        ValueError: if `feature` is not in `self.list_of_possible_features`.\n        ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n    Returns:\n        List[str]: The list of classes for which the requested feature is the most relevant.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Check parameter `feature`.\n    if feature not in self.list_of_possible_features:\n        raise ValueError(\n            \"The requested feature `'{0}'` is unknown.\".format(\n                feature,\n            )\n        )\n\n    # Check parameter `sort_by`.\n    if sort_by not in {\"contrast\", \"fmeasure\"}:\n        raise ValueError(\n            \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                sort_by,\n            )\n        )\n\n    ###\n    ### Compute the requested list.\n    ###\n\n    # Define list of possible results (classe + contrast/fmeasure).\n    list_of_possible_results: List[Tuple[float, str]] = [\n        (\n            # 0: the metric: contrast or fmeasure.\n            (\n                self.features_contrast[feature][classe]\n                if sort_by == \"contrast\"\n                else self.features_fmeasure[feature][classe]\n            ),\n            # 1: the classe.\n            classe,\n        )\n        for classe in self.list_of_possible_classes\n        if (activation_only is False or self.features_activation[feature][classe] is True)\n    ]\n\n    # Return top classes sorted by requested metric.\n    return [\n        activated_classe\n        for _, activated_classe in sorted(\n            list_of_possible_results,\n            reverse=True,\n        )\n    ][:max_number]\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.get_most_active_features_by_a_classe","title":"get_most_active_features_by_a_classe(classe, activation_only=True, sort_by='contrast', max_number=None)","text":"

Get the list of features which are the most relevant for the requested classe.

Parameters:

Name Type Description Default classe str

The classe to analyze.

required sort_by Literal['contrast', 'fmeasure']

The sort criterion for the list of features. Defaults to \"contrast\".

'contrast' activation_only bool

The option to get only active features. Defaults to True.

True max_number Optional[int]

The maximum number of features to return. Defaults to None.

None

Raises:

Type Description ValueError

if classe is not in self.list_of_possible_classes.

ValueError

if sort_by is not in {\"contrast\", \"fmeasure\"}.

Returns:

Type Description List[str]

List[str]: The list of features which are the most relevant for the requested classe.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def get_most_active_features_by_a_classe(\n    self,\n    classe: str,\n    activation_only: bool = True,\n    sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n    max_number: Optional[int] = None,\n) -> List[str]:\n    \"\"\"\n    Get the list of features which are the most relevant for the requested classe.\n\n    Args:\n        classe (str): The classe to analyze.\n        sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of features. Defaults to `\"contrast\"`.\n        activation_only (bool): The option to get only active features. Defaults to `True`.\n        max_number (Optional[int]): The maximum number of features to return. Defaults to `None`.\n\n    Raises:\n        ValueError: if `classe` is not in `self.list_of_possible_classes`.\n        ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n    Returns:\n        List[str]: The list of features which are the most relevant for the requested classe.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Check parameter `feature`.\n    if classe not in self.list_of_possible_classes:\n        raise ValueError(\n            \"The requested classe `'{0}'` is unknown.\".format(\n                classe,\n            )\n        )\n\n    # Check parameter `sort_by`.\n    if sort_by not in {\"contrast\", \"fmeasure\"}:\n        raise ValueError(\n            \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                sort_by,\n            )\n        )\n\n    ###\n    ### Compute the requested list.\n    ###\n\n    # Define list of possible results (feature + contrast/fmeasure).\n    list_of_possible_results: List[Tuple[float, str]] = [\n        (\n            # 0: the metric: contrast or fmeasure.\n            (\n                self.features_contrast[feature][classe]\n                if sort_by == \"contrast\"\n                else self.features_fmeasure[feature][classe]\n            ),\n            # 1: the feature.\n            feature,\n        )\n        for feature in self.list_of_possible_features\n        if (activation_only is False or self.features_activation[feature][classe] is True)\n    ]\n\n    # Return top features sorted by requested metric.\n    return [\n        active_feature\n        for _, active_feature in sorted(\n            list_of_possible_results,\n            reverse=True,\n        )\n    ][:max_number]\n
"},{"location":"coverage/","title":"Coverage report","text":""}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Features Maximization Metric","text":"

Implementation of Features Maximization Metric, an unbiased metric aimed at estimate the quality of an unsupervised classification.

"},{"location":"#quick-description","title":"Quick description","text":"

Features Maximization (FMC) is a features selection method described in Lamirel, J.-C., Cuxac, P., & Hajlaoui, K. (2016). A Novel Approach to Feature Selection Based on Quality Estimation Metrics. In Advances in Knowledge Discovery and Management (pp. 121\u2013140). Springer International Publishing. https://doi.org/10.1007/978-3-319-45763-5_7.

This metric is computed by applying the following steps:

  1. Compute the Features F-Measure metric (based on Features Recall and Features Predominance metrics).

    (a) The Features Recall FR[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of feature f for all data. It answers the question: \"Can the feature f distinguish the class c from other classes c' ?\"

    (b) The Features Predominance FP[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of all feature f' for data in class c. It answers the question: \"Can the feature f better identify the class c than the other features f' ?\"

    (c) The Features F-Measure FM[f][c] for a given class c and a given feature f is the harmonic mean of the Features Recall (a) and the Features Predominance (c). It answers the question: \"How much information does the feature f contain about the class c ?\"

  2. Compute the Features Selection (based on F-Measure Overall Average comparison).

    (d) The F-Measure Overall Average is the average of Features F-Measure (c) for all classes c and for all features f. It answers the question: \"What are the mean of information contained by features in all classes ?\"

    (e) A feature f is Selected if and only if it exist at least one class c for which the Features F-Measure (c) FM[f][c] is bigger than the F-Measure Overall Average (d). It answers the question: \"What are the features which contain more information than the mean of information in the dataset ?\"

    (f) A Feature f is Deleted if and only if the Features F-Measure (c) FM[f][c] is always lower than the F-Measure Overall Average (d) for each class c. It answers the question: \"What are the features which do not contain more information than the mean of information in the dataset ?\"

  3. Compute the Features Contrast and Features Activation (based on F-Measure Marginal Averages comparison).

    (g) The F-Measure Marginal Averages for a given feature f is the average of Features F-Measure (c) for all classes c and for the given feature f. It answers the question: \"What are the mean of information contained by the feature f in all classes ?\"

    (h) The Features Contrast FC[f][c] for a given class c and a given selected feature f is the ratio between the Features F-Measure (c) FM[f][c] and the F-Measure Marginal Averages (g) for selected feature f put to the power of an Amplification Factor. It answers the question: \"How relevant is the feature f to distinguish the class c ?\"

    (i) A selected Feature f is Active for a given class c if and only if the Features Contrast (h) FC[f][c] is bigger than 1.0. It answers the question : \"For which classes a selected feature f is relevant ?\"

This metric is an efficient method to:

"},{"location":"#documentation","title":"Documentation","text":""},{"location":"#installation","title":"Installation","text":"

Features Maximization Metric requires Python 3.8 or above.

To install with pip:

# install package\npython3 -m pip install cognitivefactory-features-maximization-metric\n

To install with pipx:

# install pipx\npython3 -m pip install --user pipx\n\n# install package\npipx install --python python3 cognitivefactory-features-maximization-metric\n
"},{"location":"#development","title":"Development","text":"

To work on this project or contribute to it, please read:

"},{"location":"#references","title":"References","text":""},{"location":"#how-to-cite","title":"How to cite","text":"

Schild, E. (2023). cognitivefactory/features-maximization-metric. Zenodo. https://doi.org/10.5281/zenodo.7646382.

"},{"location":"changelog/","title":"Changelog","text":"

All notable changes to this project will be documented in this file.

The format is based on Keep a Changelog and this project adheres to Semantic Versioning.

"},{"location":"changelog/#100-2023-11-14","title":"1.0.0 - 2023-11-14","text":"

Compare with 0.1.1

"},{"location":"changelog/#build","title":"Build","text":""},{"location":"changelog/#code-refactoring","title":"Code Refactoring","text":""},{"location":"changelog/#011-2023-02-16","title":"0.1.1 - 2023-02-16","text":"

Compare with 0.1.0

"},{"location":"changelog/#build_1","title":"Build","text":""},{"location":"changelog/#010-2023-02-16","title":"0.1.0 - 2023-02-16","text":"

Compare with first commit

"},{"location":"changelog/#build_2","title":"Build","text":""},{"location":"changelog/#features","title":"Features","text":""},{"location":"code_of_conduct/","title":"Contributor Covenant Code of Conduct","text":""},{"location":"code_of_conduct/#our-pledge","title":"Our Pledge","text":"

In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.

"},{"location":"code_of_conduct/#our-standards","title":"Our Standards","text":"

Examples of behavior that contributes to creating a positive environment include:

Examples of unacceptable behavior by participants include:

"},{"location":"code_of_conduct/#our-responsibilities","title":"Our Responsibilities","text":"

Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.

"},{"location":"code_of_conduct/#scope","title":"Scope","text":"

This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.

"},{"location":"code_of_conduct/#enforcement","title":"Enforcement","text":"

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at erwan.schild@e-i.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.

"},{"location":"code_of_conduct/#attribution","title":"Attribution","text":"

This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4

"},{"location":"contributing/","title":"Contributing","text":"

Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given.

"},{"location":"contributing/#environment-setup","title":"Environment setup","text":"

Nothing easier! Follow the instructions below.

Note

We STRONGLY recommend using a Linux distribution for Python development (Windows sometimes leads to obscure compatibility errors...)

  1. Install Git to version and track our software changes.

    • On Windows, use the official installer: Git-for-Windows.

    • On Linux, simply use your package manager.

    Note

    Git-for-Windows doesn't provide the command make. In following step, use pdm instead.

  2. Install Python as programming language for this projet.

    • On Windows, use the official installer: Python Releases for Windows.

    • On Linux, simply use your package manager.

    Note

    You can also use use pyenv.

    # install pyenv\ngit clone https://github.com/pyenv/pyenv ~/.pyenv\n\n# setup pyenv (you should also put these three lines in .bashrc or similar)\nexport PATH=\"${HOME}/.pyenv/bin:${PATH}\"\nexport PYENV_ROOT=\"${HOME}/.pyenv\"\neval \"$(pyenv init -)\"\n\n# install Python 3.8\npyenv install 3.8\n\n# make it available globally\npyenv global system 3.8\n
  3. Fork and clone the repository:

    git clone https://github.com/cognitivefactory/features-maximization-metric/\ncd features-maximization-metric\n
  4. Install the dependencies of the projet with:

    cd interactive-clustering\nmake setup # on Linux\npdm install # on Windows\n

    Note

    If it fails for some reason (especially on Windows), you'll need to install pipx and pdm manually.

    You can install them with:

    python3 -m pip install --user pipx\npipx install pdm\n

    Now you can try running make setup again, or simply pdm install.

Your project is now ready and dependencies are installed.

"},{"location":"contributing/#available-template-tasks","title":"Available template tasks","text":"

This project uses duty to run tasks. A Makefile is also provided. To run a task, use make TASK on Linux and pdm run duty TASK on Windows.

To show the available template task:

make help # on Linux\npdm run duty --list # on Windows\n

The Makefile will try to run certain tasks on multiple Python versions. If for some reason you don't want to run the task on multiple Python versions, you can do one of the following:

  1. export PYTHON_VERSIONS=: this will run the task with only the current Python version
  2. run the task directly with pdm run duty TASK

The Makefile detects if a virtual environment is activated, so make/pdm will work the same with the virtualenv activated or not.

"},{"location":"contributing/#development-journey","title":"Development journey","text":"

As usual:

  1. create a new branch: git checkout -b feature-or-bugfix-name
  2. edit the code and/or the documentation

If you updated the documentation or the project dependencies:

  1. run make docs-regen
  2. run make docs-serve, go to http://localhost:8000 and check that everything looks good

Before committing:

  1. run make format to auto-format the code
  2. run make check to check everything (fix any warning)
  3. run make test to run the tests (fix any issue)
  4. follow our commit message convention

If you are unsure about how to fix or ignore a warning, just let the continuous integration fail, and we will help you during review.

Don't bother updating the changelog, we will take care of this.

"},{"location":"contributing/#commit-message-convention","title":"Commit message convention","text":"

Commits messages must follow the Angular style:

<type>[(scope)]: Subject\n\n[Body]\n

Scope and body are optional. Type can be:

Subject (and body) must be valid Markdown. If you write a body, please add issues references at the end:

Body.\n\nReferences: #10, #11.\nFixes #15.\n
"},{"location":"contributing/#pull-requests-guidelines","title":"Pull requests guidelines","text":"

Link to any related issue in the Pull Request message.

During review, we recommend using fixups:

# SHA is the SHA of the commit you want to fix\ngit commit --fixup=SHA\n

Once all the changes are approved, you can squash your commits:

git rebase -i --autosquash master\n

And force-push:

git push -f\n

If this seems all too complicated, you can push or force-push each new commit, and we will squash them ourselves if needed, before merging.

"},{"location":"credits/","title":"Credits","text":"

These projects were used to build cognitivefactory-features-maximization-metric. Thank you!

python | pdm | copier-pdm

"},{"location":"credits/#exec-1--runtime-dependencies","title":"Runtime dependencies","text":"Project Summary Version (accepted) Version (last resolved) License joblib Lightweight pipelining with Python functions >=1.1.1 1.2.0 BSD numpy Fundamental package for array computing in Python >=1.22.2 1.24.4 BSD-3-Clause scikit-learn A set of python modules for machine learning and data mining >=0.24.1 1.3.2 new BSD scipy Fundamental algorithms for scientific computing in Python >=1.7.3 1.9.3 BSD License threadpoolctl threadpoolctl >=2.0.0 3.1.0 BSD-3-Clause"},{"location":"credits/#exec-1--development-dependencies","title":"Development dependencies","text":"Project Summary Version (accepted) Version (last resolved) License ansimarkup Produce colored terminal text with an xml-like markup ~=1.4 1.5.0 Revised BSD License astor Read/rewrite/write Python ASTs >=0.8 0.8.1 BSD-3-Clause attrs Classes Without Boilerplate >=19.2.0 22.1.0 MIT autoflake Removes unused imports and unused variables >=1.4 1.7.8 MIT babel Internationalization utilities ~=2.10 2.13.1 BSD-3-Clause bandit Security oriented static analyser for python code. >=1.7.3 1.7.4 Apache-2.0 license black The uncompromising code formatter. >=21.10b0 23.11.0 MIT certifi Python package for providing Mozilla's CA Bundle. >=2022.12.7 2023.7.22 MPL-2.0 charset-normalizer The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. <3,>=2 2.1.1 MIT click Composable command line interface toolkit >=8.0.0 8.1.3 BSD-3-Clause colorama Cross-platform colored terminal text. ; platform_system == \"Windows\" 0.4.6 BSD License coverage Code coverage measurement for Python [toml]>=5.2.1 6.5.0 Apache 2.0 darglint A utility for ensuring Google-style docstrings stay up to date with the source code. >=1.8 1.8.1 MIT dparse A parser for Python dependency files >=0.6.2 0.6.2 MIT license duty A simple task runner. >=0.7 1.1.0 ISC exceptiongroup Backport of PEP 654 (exception groups) >=1.0.0rc8; python_version < \"3.11\" 1.0.4 MIT License execnet execnet: rapid multi-Python deployment >=1.1 1.9.0 MIT failprint Run a command, print its output only if it fails. !=1.0.0,>=0.11 1.0.2 ISC flake8 the modular source code checker: pep8 pyflakes and co >=4.0 5.0.4 MIT flake8-bandit Automated security testing with bandit and flake8. >=2.1 4.1.1 MIT flake8-black flake8 plugin to call black as a code style validator >=0.2 0.3.6 MIT flake8-bugbear A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle. >=21.9 23.3.12 MIT flake8-builtins Check for python builtins being used as variables or parameters >=1.5 2.2.0 GNU General Public License v2 (GPLv2) flake8-comprehensions A flake8 plugin to help you write better list/set/dict comprehensions. >=3.7 3.14.0 MIT flake8-docstrings Extension for flake8 which uses pydocstyle to check docstrings >=1.6 1.7.0 MIT flake8-plugin-utils The package provides base classes and utils for flake8 plugin writing <2.0.0,>=1.3.2 1.3.2 MIT flake8-polyfill Polyfill package for Flake8 plugins >=1.0.2 1.0.2 MIT flake8-pytest-style A flake8 plugin checking common style issues or inconsistencies with pytest-based tests. >=1.5 1.7.2 MIT flake8-string-format string format checker, plugin for flake8 >=0.3 0.3.0 MIT License flake8-tidy-imports A flake8 plugin that helps you write tidier imports. >=4.5 4.10.0 MIT flake8-variables-names A flake8 extension that helps to make more readable variables names >=0.0 0.0.6 MIT ghp-import Copy your docs directly to the gh-pages branch. >=1.0 2.1.0 Apache Software License git-changelog Automatic Changelog generator using Jinja2 templates. >=0.4,<1.0 0.6.0 ISC gitdb Git Object Database <5,>=4.0.1 4.0.10 BSD License gitpython GitPython is a Python library used to interact with Git repositories >=3.1.30 3.1.40 BSD griffe Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API. >=0.24 0.24.1 ISC idna Internationalized Domain Names in Applications (IDNA) <4,>=2.5 3.4 BSD License importlib-metadata Read metadata from Python packages >=4.3; python_version < \"3.10\" 4.13.0 Apache Software License iniconfig iniconfig: brain-dead simple config-ini parsing 1.1.1 MIT License isort A Python utility / library to sort Python imports. >=5.10 5.12.0 MIT jinja2 A very fast and expressive template engine. <4,>=2.11 3.1.2 BSD-3-Clause markdown Python implementation of Markdown. <4.0.0,>=3.3.3 3.3.7 BSD License markdown-callouts Markdown extension: a classier syntax for admonitions >=0.2 0.3.0 MIT markdown-exec Utilities to execute code blocks in Markdown files. >=0.5 1.7.0 ISC markupsafe Safely add untrusted strings to HTML/XML markup. >=2.0 2.1.1 BSD-3-Clause mccabe McCabe checker, plugin for flake8 <0.8.0,>=0.7.0 0.7.0 Expat license mergedeep A deep merge function for \ud83d\udc0d. >=1.3.4 1.3.4 MIT License mkdocs Project documentation with Markdown. >=1.3 1.5.3 BSD License mkdocs-coverage MkDocs plugin to integrate your coverage HTML report into your site. >=0.2 1.0.0 ISC mkdocs-gen-files MkDocs plugin to programmatically generate documentation pages during the build >=0.3 0.5.0 MIT License mkdocs-literate-nav MkDocs plugin to specify the navigation in Markdown instead of YAML >=0.4 0.6.1 MIT License mkdocs-material Documentation that simply works >=7.3 9.4.8 MIT License mkdocs-material-extensions Extension pack for Python Markdown and MkDocs Material. ~=1.3 1.3 MIT License mkdocs-section-index MkDocs plugin to allow clickable sections that lead to an index page >=0.3 0.3.8 MIT License mkdocstrings Automatic documentation from sources, for MkDocs. [python]>=0.18 0.24.0 ISC mkdocstrings-python A Python handler for mkdocstrings. >=0.5.2 0.8.2 ISC mypy Optional static typing for Python >=0.910 1.7.0 MIT mypy-extensions Type system extensions for programs checked with the mypy type checker. >=0.4.3 1.0.0 MIT License packaging Core utilities for Python packages >=22.0 23.0 BSD License paginate Divides large result sets into pages for easier browsing ~=0.5 0.5.6 MIT pathspec Utility library for gitignore style pattern matching of file paths. >=0.9.0 0.11.2 Mozilla Public License 2.0 (MPL 2.0) pbr Python Build Reasonableness !=2.1.0,>=2.0.0 5.11.0 Apache Software License pep8-naming Check PEP-8 naming conventions, plugin for flake8 >=0.12 0.13.3 Expat license platformdirs A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\". >=2 2.5.4 MIT License pluggy plugin and hook calling mechanisms for python <2.0,>=0.12 1.0.0 MIT ptyprocess Run a subprocess in a pseudo terminal ~=0.6; sys_platform != \"win32\" 0.7.0 ? pycodestyle Python style guide checker <2.10.0,>=2.9.0 2.9.1 Expat license pydocstyle Python docstring style checker >=2.1 6.1.1 MIT pyflakes passive checker of Python programs <3,>=1.1.0 2.5.0 MIT pygments Pygments is a syntax highlighting package written in Python. ~=2.16 2.16.1 BSD-2-Clause pymdown-extensions Extension pack for Python Markdown. >=9 10.4 MIT License pytest pytest: simple powerful testing with Python >=6.2 7.4.3 MIT pytest-cov Pytest plugin for measuring coverage. >=3.0 4.1.0 MIT pytest-randomly Pytest plugin to randomly order tests and control random.seed. >=3.10 3.15.0 MIT pytest-xdist pytest xdist plugin for distributed testing, most importantly across multiple CPUs >=2.4 3.4.0 MIT python-dateutil Extensions to the standard Python datetime module >=2.8.1 2.8.2 Dual License pytz World timezone definitions, modern and historical >=2015.7; python_version < \"3.9\" 2023.3.post1 MIT pyyaml YAML parser and emitter for Python >=5.1 6.0 MIT pyyaml-env-tag A custom YAML tag for referencing environment variables in YAML files. >=0.1 0.1 MIT License regex Alternative regular expression module, to replace re. >=2022.4 2022.10.31 Apache Software License requests Python HTTP for Humans. ~=2.26 2.28.1 Apache 2.0 ruamel-yaml ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order >=0.17.21 0.17.21 MIT license ruamel-yaml-clib C version of reader, parser and emitter for ruamel.yaml derived from libyaml >=0.2.6; platform_python_implementation == \"CPython\" and python_version < \"3.11\" 0.2.7 MIT safety Checks installed dependencies for known vulnerabilities and licenses. >=2 2.3.4 MIT license semver Python helper for Semantic Versioning (http://semver.org/) ~=2.13 2.13.0 BSD setuptools Easily download, build, install, upgrade, and uninstall Python packages >=19.3 65.6.3 MIT License six Python 2 and 3 compatibility utilities >=1.5 1.16.0 MIT smmap A pure Python implementation of a sliding window memory map manager <6,>=3.0.1 5.0.0 BSD snowballstemmer This package provides 29 stemmers for 28 languages generated from Snowball algorithms. 2.2.0 BSD-3-Clause stevedore Manage dynamic plugins for Python applications >=1.20.0 3.5.2 Apache Software License toml Python Library for Tom's Obvious, Minimal Language >=0.10 0.10.2 MIT tomli A lil' TOML parser >=2.0.1; python_version < \"3.11\" 2.0.1 MIT License types-markdown Typing stubs for Markdown >=3.3 3.5.0.3 Apache-2.0 license types-toml Typing stubs for toml >=0.10 0.10.8.7 Apache-2.0 license typing-extensions Backported and Experimental Type Hints for Python 3.7+ >=4.0.1; python_version < \"3.11\" 4.4.0 Python Software Foundation License urllib3 HTTP library with thread-safe connection pooling, file post, and more. <1.27,>=1.21.1 1.26.13 MIT watchdog Filesystem events monitoring >=2.0 2.1.9 Apache License 2.0 wps-light The strictest and most opinionated python linter ever (lighter fork). >=0.15 0.16.1 MIT zipp Backport of pathlib-compatible object wrapper for zip files >=0.5 3.10.0 MIT License"},{"location":"license/","title":"CeCILL-C FREE SOFTWARE LICENSE AGREEMENT","text":""},{"location":"license/#notice","title":"Notice","text":"

This Agreement is a Free Software license agreement that is the result of discussions between its authors in order to ensure compliance with the two main principles guiding its drafting:

The authors of the CeCILL-C license are:

CeCILL stands for Ce(a) C(nrs) I(nria) L(ogiciel) L(ibre)

"},{"location":"license/#preamble","title":"Preamble","text":"

The purpose of this Free Software license agreement is to grant users the right to modify and re-use the software governed by this license.

The exercising of this right is conditional upon the obligation to make available to the community the modifications made to the source code of the software so as to contribute to its evolution.

In consideration of access to the source code and the rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors only have limited liability.

In this respect, the risks associated with loading, using, modifying and/or developing or reproducing the software by the user are brought to the user's attention, given its Free Software status, which may make it complicated to use, with the result that its use is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the suitability of the software as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions of security. This Agreement may be freely reproduced and published, provided it is not altered, and that no provisions are either added or removed herefrom.

This Agreement may apply to any or all software for which the holder of the economic rights decides to submit the use thereof to its provisions.

"},{"location":"license/#article-1-definitions","title":"Article 1 - DEFINITIONS","text":"

For the purpose of this Agreement, when the following expressions commence with a capital letter, they shall have the following meaning:

Agreement: means this license agreement, and its possible subsequent versions and annexes.

Software: means the software in its Object Code and/or Source Code form and, where applicable, its documentation, \"as is\" when the Licensee accepts the Agreement.

Initial Software: means the Software in its Source Code and possibly its Object Code form and, where applicable, its documentation, \"as is\" when it is first distributed under the terms and conditions of the Agreement.

Modified Software: means the Software modified by at least one Integrated Contribution.

Source Code: means all the Software's instructions and program lines to which access is required so as to modify the Software.

Object Code: means the binary files originating from the compilation of the Source Code.

Holder: means the holder(s) of the economic rights over the Initial Software.

Licensee: means the Software user(s) having accepted the Agreement.

Contributor: means a Licensee having made at least one Integrated Contribution.

Licensor: means the Holder, or any other individual or legal entity, who distributes the Software under the Agreement.

Integrated Contribution: means any or all modifications, corrections, translations, adaptations and/or new functions integrated into the Source Code by any or all Contributors.

Related Module: means a set of sources files including their documentation that, without modification to the Source Code, enables supplementary functions or services in addition to those offered by the Software.

Derivative Software: means any combination of the Software, modified or not, and of a Related Module.

Parties: mean both the Licensee and the Licensor.

These expressions may be used both in singular and plural form.

"},{"location":"license/#article-2-purpose","title":"Article 2 - PURPOSE","text":"

The purpose of the Agreement is the grant by the Licensor to the Licensee of a non-exclusive, transferable and worldwide license for the Software as set forth in Article 5 hereinafter for the whole term of the protection granted by the rights over said Software.

"},{"location":"license/#article-3-acceptance","title":"Article 3 - ACCEPTANCE","text":"

3.1 The Licensee shall be deemed as having accepted the terms and conditions of this Agreement upon the occurrence of the first of the following events:

3.2 One copy of the Agreement, containing a notice relating to the characteristics of the Software, to the limited warranty, and to the fact that its use is restricted to experienced users has been provided to the Licensee prior to its acceptance as set forth in Article 3.1 hereinabove, and the Licensee hereby acknowledges that it has read and understood it.

"},{"location":"license/#article-4-effective-date-and-term","title":"Article 4 - EFFECTIVE DATE AND TERM","text":""},{"location":"license/#41-effective-date","title":"4.1 EFFECTIVE DATE","text":"

The Agreement shall become effective on the date when it is accepted by the Licensee as set forth in Article 3.1.

"},{"location":"license/#42-term","title":"4.2 TERM","text":"

The Agreement shall remain in force for the entire legal term of protection of the economic rights over the Software.

"},{"location":"license/#article-5-scope-of-rights-granted","title":"Article 5 - SCOPE OF RIGHTS GRANTED","text":"

The Licensor hereby grants to the Licensee, who accepts, the following rights over the Software for any or all use, and for the term of the Agreement, on the basis of the terms and conditions set forth hereinafter.

Besides, if the Licensor owns or comes to own one or more patents protecting all or part of the functions of the Software or of its components, the Licensor undertakes not to enforce the rights granted by these patents against successive Licensees using, exploiting or modifying the Software. If these patents are transferred, the Licensor undertakes to have the transferees subscribe to the obligations set forth in this paragraph.

"},{"location":"license/#51-right-of-use","title":"5.1 RIGHT OF USE","text":"

The Licensee is authorized to use the Software, without any limitation as to its fields of application, with it being hereinafter specified that this comprises:

  1. permanent or temporary reproduction of all or part of the Software by any or all means and in any or all form.

  2. loading, displaying, running, or storing the Software on any or all medium.

  3. entitlement to observe, study or test its operation so as to determine the ideas and principles behind any or all constituent elements of said Software. This shall apply when the Licensee carries out any or all loading, displaying, running, transmission or storage operation as regards the Software, that it is entitled to carry out hereunder.

"},{"location":"license/#52-right-of-modification","title":"5.2 RIGHT OF MODIFICATION","text":"

The right of modification includes the right to translate, adapt, arrange, or make any or all modifications to the Software, and the right to reproduce the resulting software. It includes, in particular, the right to create a Derivative Software.

The Licensee is authorized to make any or all modification to the Software provided that it includes an explicit notice that it is the author of said modification and indicates the date of the creation thereof.

"},{"location":"license/#53-right-of-distribution","title":"5.3 RIGHT OF DISTRIBUTION","text":"

In particular, the right of distribution includes the right to publish, transmit and communicate the Software to the general public on any or all medium, and by any or all means, and the right to market, either in consideration of a fee, or free of charge, one or more copies of the Software by any means.

The Licensee is further authorized to distribute copies of the modified or unmodified Software to third parties according to the terms and conditions set forth hereinafter.

"},{"location":"license/#531-distribution-of-software-without-modification","title":"5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION","text":"

The Licensee is authorized to distribute true copies of the Software in Source Code or Object Code form, provided that said distribution complies with all the provisions of the Agreement and is accompanied by:

  1. a copy of the Agreement,

  2. a notice relating to the limitation of both the Licensor's warranty and liability as set forth in Article 8 and Article 9,

and that, in the event that only the Object Code of the Software is redistributed, the Licensee allows effective access to the full Source Code of the Software at a minimum during the entire period of its distribution of the Software, it being understood that the additional cost of acquiring the Source Code shall not exceed the cost of transferring the data.

"},{"location":"license/#532-distribution-of-modified-software","title":"5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE","text":"

When the Licensee makes an Integrated Contribution to the Software, the terms and conditions for the distribution of the resulting Modified Software become subject to all the provisions of this Agreement.

The Licensee is authorized to distribute the Modified Software, in source code or object code form, provided that said distribution complies with all the provisions of the Agreement and is accompanied by:

  1. a copy of the Agreement,

  2. a notice relating to the limitation of both the Licensor's warranty and liability as set forth in Article 8 and Article 9,

and that, in the event that only the object code of the Modified Software is redistributed, the Licensee allows effective access to the full source code of the Modified Software at a minimum during the entire period of its distribution of the Modified Software, it being understood that the additional cost of acquiring the source code shall not exceed the cost of transferring the data.

"},{"location":"license/#533-distribution-of-derivative-software","title":"5.3.3 DISTRIBUTION OF DERIVATIVE SOFTWARE","text":"

When the Licensee creates Derivative Software, this Derivative Software may be distributed under a license agreement other than this Agreement, subject to compliance with the requirement to include a notice concerning the rights over the Software as defined in Article 6.4. In the event the creation of the Derivative Software required modification of the Source Code, the Licensee undertakes that:

  1. the resulting Modified Software will be governed by this Agreement,

  2. the Integrated Contributions in the resulting Modified Software will be clearly identified and documented,

  3. the Licensee will allow effective access to the source code of the Modified Software, at a minimum during the entire period of distribution of the Derivative Software, such that such modifications may be carried over in a subsequent version of the Software; it being understood that the additional cost of purchasing the source code of the Modified Software shall not exceed the cost of transferring the data.

"},{"location":"license/#534-compatibility-with-the-cecill-license","title":"5.3.4 COMPATIBILITY WITH THE CeCILL LICENSE","text":"

When a Modified Software contains an Integrated Contribution subject to the CeCILL license agreement, or when a Derivative Software contains a Related Module subject to the CeCILL license agreement, the provisions set forth in the third item of Article 6.4 are optional.

"},{"location":"license/#article-6-intellectual-property","title":"Article 6 - INTELLECTUAL PROPERTY","text":""},{"location":"license/#61-over-the-initial-software","title":"6.1 OVER THE INITIAL SOFTWARE","text":"

The Holder owns the economic rights over the Initial Software. Any or all use of the Initial Software is subject to compliance with the terms and conditions under which the Holder has elected to distribute its work and no one shall be entitled to modify the terms and conditions for the distribution of said Initial Software.

The Holder undertakes that the Initial Software will remain ruled at least by this Agreement, for the duration set forth in Article 4.2.

"},{"location":"license/#62-over-the-integrated-contributions","title":"6.2 OVER THE INTEGRATED CONTRIBUTIONS","text":"

The Licensee who develops an Integrated Contribution is the owner of the intellectual property rights over this Contribution as defined by applicable law.

"},{"location":"license/#63-over-the-related-modules","title":"6.3 OVER THE RELATED MODULES","text":"

The Licensee who develops a Related Module is the owner of the intellectual property rights over this Related Module as defined by applicable law and is free to choose the type of agreement that shall govern its distribution under the conditions defined in Article 5.3.3.

"},{"location":"license/#64-notice-of-rights","title":"6.4 NOTICE OF RIGHTS","text":"

The Licensee expressly undertakes:

  1. not to remove, or modify, in any manner, the intellectual property notices attached to the Software;

  2. to reproduce said notices, in an identical manner, in the copies of the Software modified or not;

  3. to ensure that use of the Software, its intellectual property notices and the fact that it is governed by the Agreement is indicated in a text that is easily accessible, specifically from the interface of any Derivative Software.

The Licensee undertakes not to directly or indirectly infringe the intellectual property rights of the Holder and/or Contributors on the Software and to take, where applicable, vis-\u00e0-vis its staff, any and all measures required to ensure respect of said intellectual property rights of the Holder and/or Contributors.

"},{"location":"license/#article-7-related-services","title":"Article 7 - RELATED SERVICES","text":"

7.1 Under no circumstances shall the Agreement oblige the Licensor to provide technical assistance or maintenance services for the Software.

However, the Licensor is entitled to offer this type of services. The terms and conditions of such technical assistance, and/or such maintenance, shall be set forth in a separate instrument. Only the Licensor offering said maintenance and/or technical assistance services shall incur liability therefor.

7.2 Similarly, any Licensor is entitled to offer to its licensees, under its sole responsibility, a warranty, that shall only be binding upon itself, for the redistribution of the Software and/or the Modified Software, under terms and conditions that it is free to decide. Said warranty, and the financial terms and conditions of its application, shall be subject of a separate instrument executed between the Licensor and the Licensee.

"},{"location":"license/#article-8-liability","title":"Article 8 - LIABILITY","text":"

8.1 Subject to the provisions of Article 8.2, the Licensee shall be entitled to claim compensation for any direct loss it may have suffered from the Software as a result of a fault on the part of the relevant Licensor, subject to providing evidence thereof.

8.2 The Licensor's liability is limited to the commitments made under this Agreement and shall not be incurred as a result of in particular:

In particular the Parties expressly agree that any or all pecuniary or business loss (i.e. loss of data, loss of profits, operating loss, loss of customers or orders, opportunity cost, any disturbance to business activities) or any or all legal proceedings instituted against the Licensee by a third party, shall constitute consequential loss and shall not provide entitlement to any or all compensation from the Licensor.

"},{"location":"license/#article-9-warranty","title":"Article 9 - WARRANTY","text":"

9.1 The Licensee acknowledges that the scientific and technical state-of-the-art when the Software was distributed did not enable all possible uses to be tested and verified, nor for the presence of possible defects to be detected. In this respect, the Licensee's attention has been drawn to the risks associated with loading, using, modifying and/or developing and reproducing the Software which are reserved for experienced users.

The Licensee shall be responsible for verifying, by any or all means, the suitability of the product for its requirements, its good working order, and for ensuring that it shall not cause damage to either persons or properties.

9.2 The Licensor hereby represents, in good faith, that it is entitled to grant all the rights over the Software (including in particular the rights set forth in Article 5).

9.3 The Licensee acknowledges that the Software is supplied \"as is\" by the Licensor without any other express or tacit warranty, other than that provided for in Article 9.2 and, in particular, without any warranty as to its commercial value, its secured, safe, innovative or relevant nature.

Specifically, the Licensor does not warrant that the Software is free from any error, that it will operate without interruption, that it will be compatible with the Licensee's own equipment and software configuration, nor that it will meet the Licensee's requirements.

9.4 The Licensor does not either expressly or tacitly warrant that the Software does not infringe any third party intellectual property right relating to a patent, software or any other property right. Therefore, the Licensor disclaims any and all liability towards the Licensee arising out of any or all proceedings for infringement that may be instituted in respect of the use, modification and redistribution of the Software. Nevertheless, should such proceedings be instituted against the Licensee, the Licensor shall provide it with technical and legal assistance for its defense. Such technical and legal assistance shall be decided on a case-by-case basis between the relevant Licensor and the Licensee pursuant to a memorandum of understanding. The Licensor disclaims any and all liability as regards the Licensee's use of the name of the Software. No warranty is given as regards the existence of prior rights over the name of the Software or as regards the existence of a trademark.

"},{"location":"license/#article-10-termination","title":"Article 10 - TERMINATION","text":"

10.1 In the event of a breach by the Licensee of its obligations hereunder, the Licensor may automatically terminate this Agreement thirty (30) days after notice has been sent to the Licensee and has remained ineffective.

10.2 A Licensee whose Agreement is terminated shall no longer be authorized to use, modify or distribute the Software. However, any licenses that it may have granted prior to termination of the Agreement shall remain valid subject to their having been granted in compliance with the terms and conditions hereof.

"},{"location":"license/#article-11-miscellaneous","title":"Article 11 - MISCELLANEOUS","text":"

11.1 EXCUSABLE EVENTS Neither Party shall be liable for any or all delay, or failure to perform the Agreement, that may be attributable to an event of force majeure, an act of God or an outside cause, such as defective functioning or interruptions of the electricity or telecommunications networks, network paralysis following a virus attack, intervention by government authorities, natural disasters, water damage, earthquakes, fire, explosions, strikes and labor unrest, war, etc.

11.2 Any failure by either Party, on one or more occasions, to invoke one or more of the provisions hereof, shall under no circumstances be interpreted as being a waiver by the interested Party of its right to invoke said provision(s) subsequently.

11.3 The Agreement cancels and replaces any or all previous agreements, whether written or oral, between the Parties and having the same purpose, and constitutes the entirety of the agreement between said Parties concerning said purpose. No supplement or modification to the terms and conditions hereof shall be effective as between the Parties unless it is made in writing and signed by their duly authorized representatives.

11.4 In the event that one or more of the provisions hereof were to conflict with a current or future applicable act or legislative text, said act or legislative text shall prevail, and the Parties shall make the necessary amendments so as to comply with said act or legislative text. All other provisions shall remain effective. Similarly, invalidity of a provision of the Agreement, for any reason whatsoever, shall not cause the Agreement as a whole to be invalid.

11.5 LANGUAGE The Agreement is drafted in both French and English and both versions are deemed authentic.

"},{"location":"license/#article-12-new-versions-of-the-agreement","title":"Article 12 - NEW VERSIONS OF THE AGREEMENT","text":"

12.1 Any person is authorized to duplicate and distribute copies of this Agreement.

12.2 So as to ensure coherence, the wording of this Agreement is protected and may only be modified by the authors of the License, who reserve the right to periodically publish updates or new versions of the Agreement, each with a separate number. These subsequent versions may address new issues encountered by Free Software.

12.3 Any Software distributed under a given version of the Agreement may only be subsequently distributed under the same version of the Agreement or a subsequent version.

"},{"location":"license/#article-13-governing-law-and-jurisdiction","title":"Article 13 - GOVERNING LAW AND JURISDICTION","text":"

13.1 The Agreement is governed by French law. The Parties agree to endeavor to seek an amicable solution to any disagreements or disputes that may arise during the performance of the Agreement.

13.2 Failing an amicable solution within two (2) months as from their occurrence, and unless emergency proceedings are necessary, the disagreements or disputes shall be referred to the Paris Courts having jurisdiction, by the more diligent Party.

Version 1.0 dated 2006-09-05.

"},{"location":"usage/","title":"Usage","text":""},{"location":"usage/#basic-usecase-what-are-the-physical-characteristics-that-most-distinguish-men-from-women","title":"Basic usecase: \"What are the physical characteristics that most distinguish men from women?\"","text":"
  1. Load Python dependencies:

    ###\n### Python dependencies.\n###\n\nfrom cognitivefactory.features_maximization_metric.fmc import FeaturesMaximizationMetric\nfrom scipy.sparse import csr_matrix\nfrom typing import List\n

  2. Define problem data:

    ###\n### Data.\n###\n\n# Define people characteristics that will be studied.\ncharacteristics_studied: List[str] = [\n    \"Shoes size\",\n    \"Hair size\",\n    \"Nose size\",\n]\n\n# Get people characteristics.\npeople_characteristics: csr_matrix = csr_matrix(\n    [\n        [9, 5, 5],\n        [9, 10, 5],\n        [9, 20, 6],\n        [5, 15, 5],\n        [6, 25, 6],\n        [5, 25, 5],\n    ]\n)\n\n# Get people genders.\npeople_genders: List[str] = [\n    \"Man\",\n    \"Man\",\n    \"Man\",\n    \"Woman\",\n    \"Woman\",\n    \"Woman\",\n]\n

  3. Modelize the problem:

    ###\n### Feature Maximization Metrics.\n###\n\n# Main computation.\nfmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric(\n    data_vectors=people_characteristics,\n    data_classes=people_genders,\n    list_of_possible_features=characteristics_studied,\n    amplification_factor=1,\n)\n

  4. Determine relevant characteristics:

    ###\n### Analysis 1: Delete characteristics that aren't relevant.\n###\n\nprint(\n    \"\\n\",\n    \"1. Which characteristic seems not relevant to distinguish men from women ?\",\n)\nfor characteristic in characteristics_studied:\n    if not fmc_computer.features_selection[characteristic]:\n        print(\n            \"    - '{0}' seems not relevant.\".format(characteristic)\n        )\n
    1. Which characteristic seems not relevant to distinguish men from women ?\n    - 'Nose size' seems not relevant.\n

  5. Describe gender by relevant characteristics.:

    ###\n### Analysis 2: Describe gender by relevant characteristics.\n###\n\nprint(\n    \"\\n\",\n    \"2. According to remaining characteristics:\",\n)\nfor gender in sorted(set(people_genders)):\n    print(\n        \"    - Which characteristic seems important to recognize a '{0}' ?\".format(gender)\n    )\n\n    for characteristic in fmc_computer.get_most_active_features_by_a_classe(\n        classe=gender,\n    ):\n        print(\n            \"        - '{0}' seems important (fmeasure of '{1:.2f}', contrast of '{2:.2f}').\".format(\n                characteristic,\n                fmc_computer.features_fmeasure[characteristic][gender],\n                fmc_computer.features_contrast[characteristic][gender],\n            )\n        )\n
    2. According to remaining characteristics:\n    - Which characteristic seems important to recognize a 'Man' ?\n        - 'Shoes size' seems important (fmeasure of '0.45', contrast of '1.32').\n    - Which characteristic seems important to recognize a 'Woman' ?\n        - 'Hair size' seems important (fmeasure of '0.66', contrast of '1.25').\n

"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":""},{"location":"reference/cognitivefactory/features_maximization_metric/","title":"features_maximization_metric","text":"

One module is available:

"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/","title":"fmc","text":""},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric","title":"FeaturesMaximizationMetric","text":"

This class implements the Features Maximization Metric. It's a dataset modelization based on vectors features and data labels: for each couple (feature, classe), it gives a score (called F-Measure) that describe the power of identification and distinction of the feature for this classe.

This metric is computed by applying the following steps
  1. Compute the Features F-Measure metric (based on Features Recall and Features Predominance metrics).

    (a) The Features Recall FR[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of feature f for all data. It answers the question: \"Can the feature f distinguish the class c from other classes c' ?\"

    (b) The Features Predominance FP[f][c] for a given class c and a given feature f is the ratio between the sum of the vectors weights of the feature f for data in class c and the sum of all vectors weights of all feature f' for data in class c. It answers the question: \"Can the feature f better identify the class c than the other features f' ?\"

    (c) The Features F-Measure FM[f][c] for a given class c and a given feature f is the harmonic mean of the Features Recall (a) and the Features Predominance (c). It answers the question: \"How much information does the feature f contain about the class c ?\"

  2. Compute the Features Selection (based on F-Measure Overall Average comparison).

    (d) The F-Measure Overall Average is the average of Features F-Measure (c) for all classes c and for all features f. It answers the question: \"What are the mean of information contained by features in all classes ?\"

    (e) A feature f is Selected if and only if it exist at least one class c for which the Features F-Measure (c) FM[f][c] is bigger than the F-Measure Overall Average (d). It answers the question: \"What are the features which contain more information than the mean of information in the dataset ?\"

    (f) A Feature f is Deleted if and only if the Features F-Measure (c) FM[f][c] is always lower than the F-Measure Overall Average (d) for each class c. It answers the question: \"What are the features which do not contain more information than the mean of information in the dataset ?\"

  3. Compute the Features Contrast and Features Activation (based on F-Measure Marginal Averages comparison).

    (g) The F-Measure Marginal Averages for a given feature f is the average of Features F-Measure (c) for all classes c and for the given feature f. It answers the question: \"What are the mean of information contained by the feature f in all classes ?\"

    (h) The Features Contrast FC[f][c] for a given class c and a given selected feature f is the ratio between the Features F-Measure (c) FM[f][c] and the F-Measure Marginal Averages (g) for selected feature f put to the power of an Amplification Factor. It answers the question: \"How relevant is the feature f to distinguish the class c ?\"

    (i) A selected Feature f is Active for a given class c if and only if the Features Contrast (h) FC[f][c] is bigger than 1.0. It answers the question : \"For which classes a selected feature f is relevant ?\"

In order to evaluate it according to a reference, a FMC modelization is represented by the Features Activation of its vector features, and a similarity score to the reference is computed, based on common metrics on clustering (homogeneity, completeness, v_measure).

Attributes:

Name Type Description data_vectors csr_matrix

The sparse matrix representing the vector of each data (i.e. data_vectors[d,f] is the weight of data d for feature f).

data_classes List[str]

The list representing the class of each data (i.e. data_classes[d] is the class of data d).

list_of_possible_features List[str]

The list of existing vectors features.

list_of_possible_classes List[str]

The list of existing data classes.

amplification_factor int

The positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Usually at 1.

features_frecall Dict[str, Dict[str, float]]

The computation of Features Recall (Can the feature f distinguish the class c from other classes l' ?).

features_fpredominance Dict[str, Dict[str, float]]

The computation of Features Predominance (Can the feature f better identify the class c than the other features f' ?).

features_fmeasure Dict[str, Dict[str, float]]

The computation of Features F-Measure (How much information does the feature f contain about the class c ?).

features_overall_average float

The computation of Overall Average of Features F-Measure (What are the mean of information contained by features in all classes ?).

features_selection Dict[str, bool]

The computation of Features Selected (What are the features which contain more information than the mean of information in the dataset ?).

features_marginal_averages Dict[str, float]

The computation of Marginal Averages of Features F-Measure (What are the mean of information contained by the feature f in all classes ?).

features_contrast Dict[str, Dict[str, float]]

The computation of Features Contrast (How important is the feature f to distinguish the class c ?).

features_activation Dict[str, Dict[str, bool]]

The computation of Features Activation (For which classes a selected feature f is relevant ?).

Example References Source code in cognitivefactory\\features_maximization_metric\\fmc.py
class FeaturesMaximizationMetric:\n    r\"\"\"\n    This class implements the ***Features Maximization Metric***.\n    It's a dataset modelization based on vectors features and data labels:\n    for each couple `(feature, classe)`, it gives a score (called **F-Measure**) that describe the power of identification and distinction of the feature for this classe.\n\n    This metric is computed by applying the following steps:\n\n        1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics).\n\n            > (a) The ***Features Recall*** `FR[f][c]` for a given class `c` and a given feature `f` is the ratio between\n            > the sum of the vectors weights of the feature `f` for data in class `c`\n            > and the sum of all vectors weights of feature `f` for all data.\n            > It answers the question: \"_Can the feature `f` distinguish the class `c` from other classes `c'` ?_\"\n\n            > (b) The ***Features Predominance*** `FP[f][c]` for a given class `c` and a given feature `f` is the ratio between\n            > the sum of the vectors weights of the feature `f` for data in class `c`\n            > and the sum of all vectors weights of all feature `f'` for data in class `c`.\n            > It answers the question: \"_Can the feature `f` better identify the class `c` than the other features `f'` ?_\"\n\n            > (c) The ***Features F-Measure*** `FM[f][c]` for a given class `c` and a given feature `f` is\n            > the harmonic mean of the ***Features Recall*** (a) and the ***Features Predominance*** (c).\n            > It answers the question: \"_How much information does the feature `f` contain about the class `c` ?_\"\n\n        2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison).\n\n            > (d) The ***F-Measure Overall Average*** is the average of ***Features F-Measure*** (c) for all classes `c` and for all features `f`.\n            > It answers the question: \"_What are the mean of information contained by features in all classes ?_\"\n\n            > (e) A feature `f` is ***Selected*** if and only if it exist at least one class `c` for which the ***Features F-Measure*** (c) `FM[f][c]` is bigger than the ***F-Measure Overall Average*** (d).\n            > It answers the question: \"_What are the features which contain more information than the mean of information in the dataset ?_\"\n\n            > (f) A Feature `f` is ***Deleted*** if and only if the ***Features F-Measure*** (c) `FM[f][c]` is always lower than the ***F-Measure Overall Average*** (d) for each class `c`.\n            > It answers the question: \"_What are the features which do not contain more information than the mean of information in the dataset ?_\"\n\n        3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison).\n\n            > (g) The ***F-Measure Marginal Averages*** for a given feature `f` is the average of ***Features F-Measure*** (c) for all classes `c` and for the given feature `f`.\n            > It answers the question: \"_What are the mean of information contained by the feature `f` in all classes ?_\"\n\n            > (h) The ***Features Contrast*** `FC[f][c]` for a given class `c` and a given selected feature `f` is the ratio between\n            > the ***Features F-Measure*** (c) `FM[f][c]`\n            > and the ***F-Measure Marginal Averages*** (g) for selected feature f\n            > put to the power of an ***Amplification Factor***.\n            > It answers the question: \"_How relevant is the feature `f` to distinguish the class `c` ?_\"\n\n            > (i) A selected Feature `f` is ***Active*** for a given class `c` if and only if the ***Features Contrast*** (h) `FC[f][c]` is bigger than `1.0`.\n            > It answers the question : \"_For which classes a selected feature `f` is relevant ?_\"\n\n    In order to ***evaluate it according to a reference***, a FMC modelization is represented by the Features Activation of its vector features,\n    and a similarity score to the reference is computed, based on common metrics on clustering (homogeneity, completeness, v_measure).\n\n    Attributes:\n        data_vectors (csr_matrix): The sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`).\n        data_classes (List[str]): The list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`).\n        list_of_possible_features (List[str]): The list of existing vectors features.\n        list_of_possible_classes (List[str]):  The list of existing data classes.\n        amplification_factor (int): The positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Usually at `1`.\n        features_frecall (Dict[str, Dict[str, float]]): The computation of *Features Recall* (_Can the feature `f` distinguish the class `c` from other classes `l'` ?_).\n        features_fpredominance (Dict[str, Dict[str, float]]): The computation of *Features Predominance* (_Can the feature `f` better identify the class `c` than the other features `f'` ?_).\n        features_fmeasure (Dict[str, Dict[str, float]]): The computation of *Features F-Measure* (_How much information does the feature `f` contain about the class `c` ?_).\n        features_overall_average (float): The computation of *Overall Average of Features F-Measure* (_What are the mean of information contained by features in all classes ?_).\n        features_selection (Dict[str, bool]): The computation of *Features Selected* (_What are the features which contain more information than the mean of information in the dataset ?_).\n        features_marginal_averages (Dict[str, float]):  The computation of *Marginal Averages of Features F-Measure* (_What are the mean of information contained by the feature `f` in all classes ?_).\n        features_contrast (Dict[str, Dict[str, float]]): The computation of *Features Contrast* (_How important is the feature `f` to distinguish the class `c` ?_).\n        features_activation (Dict[str, Dict[str, bool]]): The computation of *Features Activation* (_For which classes a selected feature `f` is relevant ?_).\n\n    Example:\n        - Basic usecase: \"_What are the physical characteristics that most distinguish men from women ?_\"\n        ```python\n\n        # Problem to solve.\n        print(\">> What are the physical characteristics that most distinguish men from women ?\")\n\n        ###\n        ### Python dependencies.\n        ###\n\n        from cognitivefactory.features_maximization_metric.fmc import FeaturesMaximizationMetric\n        from scipy.sparse import csr_matrix\n        from typing import List\n\n        ###\n        ### Data.\n        ###\n\n        # Define people characteristics that will be studied.\n        characteristics_studied: List[str] = [\n            \"Shoes size\",\n            \"Hair size\",\n            \"Nose size\",\n        ]\n\n        # Get people characteristics.\n        people_characteristics: csr_matrix = csr_matrix(\n            [\n                [9, 5, 5],\n                [9, 10, 5],\n                [9, 20, 6],\n                [5, 15, 5],\n                [6, 25, 6],\n                [5, 25, 5],\n            ]\n        )\n\n        # Get people genders.\n        people_genders: List[str] = [\n            \"Man\",\n            \"Man\",\n            \"Man\",\n            \"Woman\",\n            \"Woman\",\n            \"Woman\",\n        ]\n\n        ###\n        ### Feature Maximization Metrics.\n        ###\n\n        # Main computation.\n        fmc_computer: FeaturesMaximizationMetric = FeaturesMaximizationMetric(\n            data_vectors=people_characteristics,\n            data_classes=people_genders,\n            list_of_possible_features=characteristics_studied,\n            amplification_factor=1,\n        )\n\n        ###\n        ### Analysis 1: Delete characteristics that aren't relevant.\n        ###\n\n        print(\n            \"\\n\",\n            \"1. Which characteristic seems not relevant to distinguish men from women ?\",\n        )\n        for characteristic in characteristics_studied:\n            if not fmc_computer.features_selection[characteristic]:\n                print(\n                    \"    - '{0}' seems not relevant.\".format(characteristic)\n                )\n\n        ###\n        ### Analysis 2: Describe gender by relevant characteristics.\n        ###\n\n        print(\n            \"\\n\",\n            \"2. According to remaining characteristics:\",\n        )\n        for gender in sorted(set(people_genders)):\n            print(\n                \"    - Which characteristic seems important to recognize a '{0}' ?\".format(gender)\n            )\n\n            for characteristic in fmc_computer.get_most_active_features_by_a_classe(\n                classe=gender,\n            ):\n                print(\n                    \"        - '{0}' seems important (fmeasure of '{1:.2f}', contrast of '{2:.2f}').\".format(\n                        characteristic,\n                        fmc_computer.features_fmeasure[characteristic][gender],\n                        fmc_computer.features_contrast[characteristic][gender],\n                    )\n                )\n        ```\n\n    References:\n        - Features Maximization Metric: `Lamirel, J.-C., Cuxac, P., & Hajlaoui, K. (2016). A Novel Approach to Feature Selection Based on Quality Estimation Metrics. In Advances in Knowledge Discovery and Management (pp. 121\u2013140). Springer International Publishing. https://doi.org/10.1007/978-3-319-45763-5_7`\n    \"\"\"\n\n    # =========================================================================================\n    # INITIALIZATION\n    # =========================================================================================\n\n    def __init__(\n        self,\n        data_vectors: csr_matrix,\n        data_classes: List[str],\n        list_of_possible_features: List[str],\n        amplification_factor: int = 1,\n        verbose: bool = False,\n    ):\n        \"\"\"\n        The constructor for `FeaturesMaximizationMetric` class.\n        It applies the several steps of ***Feature Maximization***:\n            1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics).\n            2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison).\n            3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison).\n\n        Args:\n            data_vectors (scipy.sparse.csr_matrix): A sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`).\n            data_classes (List[str]): A list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`).\n            list_of_possible_features (List[str]): A list of existing vectors features.\n            amplification_factor (int, optional): A positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Defaults to `1`.\n            verbose (bool): An option to display progress status of computations. Defaults to `False`.\n\n        Raises:\n            ValueError: if `data_vectors` and `data_classes` have inconsistent shapes.\n            ValueError: if `data_vectors` and `list_of_possible_features` have inconsistent shapes.\n            ValueError: if `amplification_factor` is not a positive integer.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Check parameters.\")\n\n        # Check data size.\n        if data_vectors.shape[0] != len(data_classes):\n            raise ValueError(\n                \"The vectors `data_vectors` and the list of classes `data_classes` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                    data_vectors.shape[0],\n                    len(data_classes),\n                )\n            )\n\n        # Check features size.\n        if data_vectors.shape[1] != len(list_of_possible_features):\n            raise ValueError(\n                \"The vectors `data_vectors` and the list of features `list_of_possible_features` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                    data_vectors.shape[1],\n                    len(list_of_possible_features),\n                )\n            )\n\n        # Check amplification factor.\n        if (not isinstance(amplification_factor, int)) or amplification_factor < 1:\n            raise ValueError(\n                \"The amplification factor `amplification_factor` has to be a positive integer (currently: '{0}').\".format(\n                    amplification_factor,\n                )\n            )\n\n        ###\n        ### Store parameters.\n        ###\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Store parameters.\")\n\n        # Store data information.\n        self.data_vectors: csr_matrix = data_vectors\n        self.data_classes: List[str] = data_classes\n        # Store features and classes lists.\n        self.list_of_possible_features: List[str] = list_of_possible_features\n        self.list_of_possible_classes: List[str] = sorted(set(data_classes))\n        # Store amplification factor.\n        self.amplification_factor: int = amplification_factor\n\n        ###\n        ### Compute Features Maximization Metric.\n        ###\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Start computations.\")\n\n        # 1. Compute the *Features F-Measure* metric (based on *Features Recall* and *Features Predominance* metrics).\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features F-Measure.\")\n\n        # Initialize variables.\n        self.features_frecall: Dict[str, Dict[str, float]]\n        self.features_fpredominance: Dict[str, Dict[str, float]]\n        self.features_fmeasure: Dict[str, Dict[str, float]]\n        # Compute variables.\n        self._compute_features_frecall_fpredominance_fmeasure()\n\n        # 2. Perform a *Features Selection* (based on *F-Measure Overall Average* comparison).\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Selection.\")\n\n        # Initialize variables.\n        self.features_overall_average: float\n        self.features_selection: Dict[str, bool]\n        # Compute variables.\n        self._compute_features_selection()\n\n        # 3. Compute the *Features Contrast* and *Features Activation* (based on *F-Measure Marginal Averages* comparison).\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Contrast.\")\n\n        # Initialize variables.\n        self.features_marginal_averages: Dict[str, float]\n        self.features_contrast: Dict[str, Dict[str, float]]\n        self.features_activation: Dict[str, Dict[str, bool]]\n        # Compute variables.\n        self._compute_features_contrast_and_activation()\n\n        # Display progress status if requested.\n        if verbose:\n            print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Computations done.\")\n\n    # ==============================================================================\n    # COMPUTE FEATURES F-MEASURE\n    # ==============================================================================\n\n    def _compute_features_frecall_fpredominance_fmeasure(\n        self,\n    ) -> None:\n        \"\"\"\n        Compute:\n            (a) the ***Features Recall*** (cf. `self.features_frecall`),\n            (b) the ***Features Predominance*** (cf. `self.features_fpredominance`), and\n            (c) the ***Features F-Measure*** (cf. `self.features_fmeasure`).\n        \"\"\"\n\n        ###\n        ### Temporary computations.\n        ###\n\n        # Temporary variable used to store sums of all vectors weights for a given feature `f` and a given class `c`.\n        # Needed for both Features Recall and Features Predominance computations.\n        sum_by_feature_and_classe: Dict[str, Dict[str, float]] = {\n            feature: {classe: 0.0 for classe in self.list_of_possible_classes}\n            for feature in self.list_of_possible_features\n        }\n\n        # Temporary variable used to store sums of all vectors weights for a given feature `f` and all classes.\n        # Needed for Features Recall computation.\n        sum_by_features: Dict[str, float] = {feature: 0.0 for feature in self.list_of_possible_features}\n\n        # Temporary variable used to store sums of all vectors weights for all features and a given class `c`.\n        # Needed for Features Predominance computation.\n        sum_by_classe: Dict[str, float] = {classe: 0.0 for classe in self.list_of_possible_classes}\n\n        # Index used to get non zero elements in the sparse matrix weights.\n        indices_x, indices_y = self.data_vectors.nonzero()\n\n        # Browse non zero weights in vectors to compute all the needed sums.\n        for index in range(self.data_vectors.nnz):\n            # Get needed information (data, class/ classe, feature, vectors weight)\n            data_index: int = indices_x[index]\n            data_classe: str = self.data_classes[data_index]\n            feature_index: int = indices_y[index]\n            data_feature: str = self.list_of_possible_features[feature_index]\n            weight: float = self.data_vectors[data_index, feature_index]  # TODO: check if np.nan ?\n\n            # Update the several sums.\n            sum_by_feature_and_classe[data_feature][data_classe] += weight\n            sum_by_features[data_feature] += weight\n            sum_by_classe[data_classe] += weight\n\n        ###\n        ### Features F-Measure computation.\n        ###\n\n        # Compute Features Recall.\n        self.features_frecall = {\n            feature: {\n                classe: (\n                    0.0  # TODO: set to np.nan ?\n                    if sum_by_features[feature] == 0\n                    else sum_by_feature_and_classe[feature][classe] / sum_by_features[feature]\n                )\n                for classe in self.list_of_possible_classes\n            }\n            for feature in self.list_of_possible_features\n        }\n\n        # Compute Features Predominance.\n        self.features_fpredominance = {\n            feature: {\n                classe: (\n                    0.0  # TODO: set to np.nan ?\n                    if sum_by_classe[classe] == 0\n                    else sum_by_feature_and_classe[feature][classe] / sum_by_classe[classe]\n                )\n                for classe in self.list_of_possible_classes\n            }\n            for feature in self.list_of_possible_features\n        }\n\n        # Compute Features F-Measure.\n        self.features_fmeasure = {\n            feature: {\n                classe: (\n                    0.0  # TODO: set to np.nan ?\n                    if (self.features_frecall[feature][classe] + self.features_fpredominance[feature][classe] == 0)\n                    else (\n                        2\n                        * (self.features_frecall[feature][classe] * self.features_fpredominance[feature][classe])\n                        / (self.features_frecall[feature][classe] + self.features_fpredominance[feature][classe])\n                    )\n                )\n                for classe in self.list_of_possible_classes\n            }\n            for feature in self.list_of_possible_features\n        }\n\n    # =============================================================================================\n    # COMPUTE FEATURES SELECTION\n    # =============================================================================================\n\n    def _compute_features_selection(\n        self,\n    ) -> None:\n        \"\"\"\n        Compute:\n            (d) the ***F-Measure Overall Average*** (cf. `self.features_overall_average`), and\n            (e) the ***Features Selected*** (cf. `self.features_selection`).\n        \"\"\"\n\n        ###\n        ### Features F-Measure Overall Average computation.\n        ###\n\n        # Temporary variable used to store the overall sum in order to compute the overall average of Features F-Measure.\n        overall_sum: float = 0.0\n        nb_overall: int = 0\n\n        # For each feature...\n        for feature1 in self.list_of_possible_features:\n            # For each classe...\n            for classe1 in self.list_of_possible_classes:\n                # Update the overall sum and count.\n                overall_sum += self.features_fmeasure[feature1][classe1]\n                nb_overall += 1\n\n        # Compute the overall average of Features F-Measure.\n        self.features_overall_average = 0.0 if nb_overall == 0 else overall_sum / nb_overall  # TODO: set to np.nan ?\n\n        ###\n        ### Features Selection computation.\n        ###\n\n        # Temporary variable used store the selected features.\n        self.features_selection = {}\n\n        # Browse features to determine the selected ones.\n        for feature2 in self.list_of_possible_features:\n            # Set default state of selection.\n            self.features_selection[feature2] = False\n\n            # For each feature, browse class to find one for which the Features F-Measure is bigger than the overall average.\n            for classe2 in self.list_of_possible_classes:\n                # Check that the Feature F-Measure is bigger than the overall average.\n                if self.features_fmeasure[feature2][classe2] > self.features_overall_average:\n                    # Approve the selection and then break the loop.\n                    self.features_selection[feature2] = True\n                    break\n\n    # =============================================================================================\n    # COMPUTE FEATURES CONSTRAST AND ACTIVATION\n    # =============================================================================================\n\n    def _compute_features_contrast_and_activation(\n        self,\n    ) -> None:\n        \"\"\"\n        Compute:\n            (g) The ***F-Measure Marginal Averages*** (cf. `self.features_marginal_averages`), and\n            (h) The ***Features Contrast*** (cf. `self.features_contrast`).\n            (i) the ***Features Activation*** (cf. `self.features_activation`).\n        \"\"\"\n\n        ###\n        ### Features F-Measure Marginal computation.\n        ###\n\n        # Initialize the marginal average of Features F-Measure.\n        self.features_marginal_averages = {}\n\n        # Browse features to compute the averages.\n        for feature1 in self.list_of_possible_features:\n            # Temporary variable used to store the marginal sum in order to compute the marginal average of Features F-Measure over the current feature.\n            sum_marginal: float = 0.0\n            nb_marginal: int = 0\n\n            # Update the marginal sum of Features F-Measure over the current feature.\n            for classe1 in self.list_of_possible_classes:\n                sum_marginal += self.features_fmeasure[feature1][classe1]\n                nb_marginal += 1\n\n            # Compute the marginal averages of Features F-Measure over the current feature.\n            self.features_marginal_averages[feature1] = (\n                0.0 if nb_marginal == 0 else sum_marginal / nb_marginal\n            )  # TODO: set to np.nan ?\n\n        ###\n        ### Features Contrast computation.\n        ###\n\n        # Temporary variable used to store the contrast of a feature for a class.\n        self.features_contrast = {\n            feature2: {\n                classe2: (\n                    0.0  # TODO: set to np.nan ?\n                    if (self.features_selection[feature2] is False or self.features_marginal_averages[feature2] == 0)\n                    else (self.features_fmeasure[feature2][classe2] / self.features_marginal_averages[feature2])\n                    ** self.amplification_factor\n                )\n                for classe2 in self.list_of_possible_classes\n            }\n            for feature2 in self.list_of_possible_features\n        }\n\n        ###\n        ### Features Activation computation.\n        ###\n\n        # Temporary variable used store the features activation.\n        self.features_activation = {\n            feature3: {\n                classe3: bool(\n                    self.features_selection[feature3] is True and self.features_contrast[feature3][classe3] > 1\n                )\n                for classe3 in self.list_of_possible_classes\n            }\n            for feature3 in self.list_of_possible_features\n        }\n\n    # =============================================================================================\n    # GET: MOST ACTIVATED CLASSES FOR A FEATURE\n    # =============================================================================================\n\n    def get_most_activated_classes_by_a_feature(\n        self,\n        feature: str,\n        activation_only: bool = True,\n        sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n        max_number: Optional[int] = None,\n    ) -> List[str]:\n        \"\"\"\n        Get the list of classes for which the requested feature is the most relevant.\n\n        Args:\n            feature (str): The feature to analyze.\n            sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of classes. Defaults to `\"contrast\"`.\n            activation_only (bool): The option to get only activated classes. Defaults to `True`.\n            max_number (Optional[int]): The maximum number of classes to return. Defaults to `None`.\n\n        Raises:\n            ValueError: if `feature` is not in `self.list_of_possible_features`.\n            ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n        Returns:\n            List[str]: The list of classes for which the requested feature is the most relevant.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Check parameter `feature`.\n        if feature not in self.list_of_possible_features:\n            raise ValueError(\n                \"The requested feature `'{0}'` is unknown.\".format(\n                    feature,\n                )\n            )\n\n        # Check parameter `sort_by`.\n        if sort_by not in {\"contrast\", \"fmeasure\"}:\n            raise ValueError(\n                \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                    sort_by,\n                )\n            )\n\n        ###\n        ### Compute the requested list.\n        ###\n\n        # Define list of possible results (classe + contrast/fmeasure).\n        list_of_possible_results: List[Tuple[float, str]] = [\n            (\n                # 0: the metric: contrast or fmeasure.\n                (\n                    self.features_contrast[feature][classe]\n                    if sort_by == \"contrast\"\n                    else self.features_fmeasure[feature][classe]\n                ),\n                # 1: the classe.\n                classe,\n            )\n            for classe in self.list_of_possible_classes\n            if (activation_only is False or self.features_activation[feature][classe] is True)\n        ]\n\n        # Return top classes sorted by requested metric.\n        return [\n            activated_classe\n            for _, activated_classe in sorted(\n                list_of_possible_results,\n                reverse=True,\n            )\n        ][:max_number]\n\n    # =============================================================================================\n    # GET: MOST ACTIVATED FEATURES FOR A CLASSE\n    # =============================================================================================\n\n    def get_most_active_features_by_a_classe(\n        self,\n        classe: str,\n        activation_only: bool = True,\n        sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n        max_number: Optional[int] = None,\n    ) -> List[str]:\n        \"\"\"\n        Get the list of features which are the most relevant for the requested classe.\n\n        Args:\n            classe (str): The classe to analyze.\n            sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of features. Defaults to `\"contrast\"`.\n            activation_only (bool): The option to get only active features. Defaults to `True`.\n            max_number (Optional[int]): The maximum number of features to return. Defaults to `None`.\n\n        Raises:\n            ValueError: if `classe` is not in `self.list_of_possible_classes`.\n            ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n        Returns:\n            List[str]: The list of features which are the most relevant for the requested classe.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Check parameter `feature`.\n        if classe not in self.list_of_possible_classes:\n            raise ValueError(\n                \"The requested classe `'{0}'` is unknown.\".format(\n                    classe,\n                )\n            )\n\n        # Check parameter `sort_by`.\n        if sort_by not in {\"contrast\", \"fmeasure\"}:\n            raise ValueError(\n                \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                    sort_by,\n                )\n            )\n\n        ###\n        ### Compute the requested list.\n        ###\n\n        # Define list of possible results (feature + contrast/fmeasure).\n        list_of_possible_results: List[Tuple[float, str]] = [\n            (\n                # 0: the metric: contrast or fmeasure.\n                (\n                    self.features_contrast[feature][classe]\n                    if sort_by == \"contrast\"\n                    else self.features_fmeasure[feature][classe]\n                ),\n                # 1: the feature.\n                feature,\n            )\n            for feature in self.list_of_possible_features\n            if (activation_only is False or self.features_activation[feature][classe] is True)\n        ]\n\n        # Return top features sorted by requested metric.\n        return [\n            active_feature\n            for _, active_feature in sorted(\n                list_of_possible_results,\n                reverse=True,\n            )\n        ][:max_number]\n\n    # =============================================================================================\n    # COMPARE: WITH AN OTHER FMC\n    # =============================================================================================\n\n    def compare(\n        self,\n        fmc_reference: \"FeaturesMaximizationMetric\",\n        rounded: Optional[int] = None,\n    ) -> Tuple[float, float, float]:\n        \"\"\"\n        Gives a similarity score in agreement with a reference FMC modelization.\n        The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure),\n        where each FMC modelization is represented by the Features Activation of their vector features.\n        In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization.\n\n\n        Args:\n            fmc_reference (FeaturesMaximizationMetric): Another Features Maximization modelization used as reference for the comparison.\n            rounded (Optional[int]): The option to round the result to counter log approximation. Defaults to `None`.\n\n        Raises:\n            ValueError: if `list_of_possible_features` are different.\n\n        Returns:\n            Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure.\n        \"\"\"\n\n        ###\n        ### Check parameters.\n        ###\n\n        # Check list_of_possible_features equality.\n        if self.list_of_possible_features != fmc_reference.list_of_possible_features:\n            list_of_in_excess_features: List[str] = [\n                feature\n                for feature in self.list_of_possible_features\n                if feature not in fmc_reference.list_of_possible_features\n            ]\n            list_of_missing_features: List[str] = [\n                feature\n                for feature in fmc_reference.list_of_possible_features\n                if feature not in self.list_of_possible_features\n            ]\n            raise ValueError(\n                \"The list of features `list_of_possible_features` must be the same for both FMC modelization. +: {0}, -: {1}\".format(\n                    str(list_of_in_excess_features), str(list_of_missing_features)\n                )\n            )\n\n        ###\n        ### Format Features Activation as classification label of features.\n        ###\n\n        # Initialize\n        list_of_self_features_activations: List[str] = []\n        list_of_reference_features_activations: List[str] = []\n\n        # Define default value if feature not activated.\n        # NB: we can't set a fixed value in case this value is in the list of possible classes...\n        # Example: can't set `\"\"` or `\"None\"` in case self.list_of_possible_classes==[\"A\", \"\"] and fmc_reference.list_of_possible_classes==[\"B\", \"None\"].\n        default_label_if_feature_not_activated: str = \"NOT_ACTIVATED:{possible_classe}\".format(\n            possible_classe=self.list_of_possible_classes + fmc_reference.list_of_possible_classes\n        )\n\n        # Browse activated features to\u00e0 compare Features Activation.\n        for feature in fmc_reference.list_of_possible_features:\n            # Get Features Activation.\n            list_of_most_activated_classes_for_feature_in_self: List[\n                str\n            ] = self.get_most_activated_classes_by_a_feature(\n                feature=feature,\n            )\n            list_of_most_activated_classes_for_feature_in_reference: List[\n                str\n            ] = fmc_reference.get_most_activated_classes_by_a_feature(\n                feature=feature,\n            )\n\n            # TODO: Skip if feature is not activated in both modelization.\n            if (\n                len(list_of_most_activated_classes_for_feature_in_self) != 1\n                and len(list_of_most_activated_classes_for_feature_in_reference) != 1\n            ):\n                continue\n\n            # Format Feature Activation as classification label. Set to `-1` if not activated.\n            list_of_self_features_activations.append(\n                list_of_most_activated_classes_for_feature_in_self[0]\n                if len(list_of_most_activated_classes_for_feature_in_self) == 1\n                else default_label_if_feature_not_activated\n            )\n            list_of_reference_features_activations.append(\n                list_of_most_activated_classes_for_feature_in_reference[0]\n                if len(list_of_most_activated_classes_for_feature_in_reference) == 1\n                else default_label_if_feature_not_activated\n            )\n\n        ###\n        ### Compute FMC modelizations similarity.\n        ###\n\n        # Compute standard metrics for clustering.\n        homogeneity: float\n        completeness: float\n        v_measure: float\n        homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(\n            labels_pred=list_of_self_features_activations,\n            labels_true=list_of_reference_features_activations,\n        )\n\n        # Round the results.\n        if rounded is not None:\n            homogeneity = round(homogeneity, rounded)\n            completeness = round(completeness, rounded)\n            v_measure = round(v_measure, rounded)\n\n        # Return values.\n        return homogeneity, completeness, v_measure\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.__init__","title":"__init__(data_vectors, data_classes, list_of_possible_features, amplification_factor=1, verbose=False)","text":"

The constructor for FeaturesMaximizationMetric class. It applies the several steps of Feature Maximization: 1. Compute the Features F-Measure metric (based on Features Recall and Features Predominance metrics). 2. Compute the Features Selection (based on F-Measure Overall Average comparison). 3. Compute the Features Contrast and Features Activation (based on F-Measure Marginal Averages comparison).

Parameters:

Name Type Description Default data_vectors scipy.sparse.csr_matrix

A sparse matrix representing the vector of each data (i.e. data_vectors[d,f] is the weight of data d for feature f).

required data_classes List[str]

A list representing the class of each data (i.e. data_classes[d] is the class of data d).

required list_of_possible_features List[str]

A list of existing vectors features.

required amplification_factor int

A positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Defaults to 1.

1 verbose bool

An option to display progress status of computations. Defaults to False.

False

Raises:

Type Description ValueError

if data_vectors and data_classes have inconsistent shapes.

ValueError

if data_vectors and list_of_possible_features have inconsistent shapes.

ValueError

if amplification_factor is not a positive integer.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def __init__(\n    self,\n    data_vectors: csr_matrix,\n    data_classes: List[str],\n    list_of_possible_features: List[str],\n    amplification_factor: int = 1,\n    verbose: bool = False,\n):\n    \"\"\"\n    The constructor for `FeaturesMaximizationMetric` class.\n    It applies the several steps of ***Feature Maximization***:\n        1. Compute the ***Features F-Measure*** metric (based on ***Features Recall*** and ***Features Predominance*** metrics).\n        2. Compute the ***Features Selection*** (based on ***F-Measure Overall Average*** comparison).\n        3. Compute the ***Features Contrast*** and ***Features Activation*** (based on ***F-Measure Marginal Averages*** comparison).\n\n    Args:\n        data_vectors (scipy.sparse.csr_matrix): A sparse matrix representing the vector of each data (i.e. `data_vectors[d,f]` is the weight of data `d` for feature `f`).\n        data_classes (List[str]): A list representing the class of each data (i.e. `data_classes[d]` is the class of data `d`).\n        list_of_possible_features (List[str]): A list of existing vectors features.\n        amplification_factor (int, optional): A positive integer called \"amplification factor\" aimed at emphasize the feature contrast. Defaults to `1`.\n        verbose (bool): An option to display progress status of computations. Defaults to `False`.\n\n    Raises:\n        ValueError: if `data_vectors` and `data_classes` have inconsistent shapes.\n        ValueError: if `data_vectors` and `list_of_possible_features` have inconsistent shapes.\n        ValueError: if `amplification_factor` is not a positive integer.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Check parameters.\")\n\n    # Check data size.\n    if data_vectors.shape[0] != len(data_classes):\n        raise ValueError(\n            \"The vectors `data_vectors` and the list of classes `data_classes` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                data_vectors.shape[0],\n                len(data_classes),\n            )\n        )\n\n    # Check features size.\n    if data_vectors.shape[1] != len(list_of_possible_features):\n        raise ValueError(\n            \"The vectors `data_vectors` and the list of features `list_of_possible_features` have inconsistent shapes (currently: '{0}' vs '{1}').\".format(\n                data_vectors.shape[1],\n                len(list_of_possible_features),\n            )\n        )\n\n    # Check amplification factor.\n    if (not isinstance(amplification_factor, int)) or amplification_factor < 1:\n        raise ValueError(\n            \"The amplification factor `amplification_factor` has to be a positive integer (currently: '{0}').\".format(\n                amplification_factor,\n            )\n        )\n\n    ###\n    ### Store parameters.\n    ###\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Store parameters.\")\n\n    # Store data information.\n    self.data_vectors: csr_matrix = data_vectors\n    self.data_classes: List[str] = data_classes\n    # Store features and classes lists.\n    self.list_of_possible_features: List[str] = list_of_possible_features\n    self.list_of_possible_classes: List[str] = sorted(set(data_classes))\n    # Store amplification factor.\n    self.amplification_factor: int = amplification_factor\n\n    ###\n    ### Compute Features Maximization Metric.\n    ###\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Start computations.\")\n\n    # 1. Compute the *Features F-Measure* metric (based on *Features Recall* and *Features Predominance* metrics).\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features F-Measure.\")\n\n    # Initialize variables.\n    self.features_frecall: Dict[str, Dict[str, float]]\n    self.features_fpredominance: Dict[str, Dict[str, float]]\n    self.features_fmeasure: Dict[str, Dict[str, float]]\n    # Compute variables.\n    self._compute_features_frecall_fpredominance_fmeasure()\n\n    # 2. Perform a *Features Selection* (based on *F-Measure Overall Average* comparison).\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Selection.\")\n\n    # Initialize variables.\n    self.features_overall_average: float\n    self.features_selection: Dict[str, bool]\n    # Compute variables.\n    self._compute_features_selection()\n\n    # 3. Compute the *Features Contrast* and *Features Activation* (based on *F-Measure Marginal Averages* comparison).\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Compute Features Contrast.\")\n\n    # Initialize variables.\n    self.features_marginal_averages: Dict[str, float]\n    self.features_contrast: Dict[str, Dict[str, float]]\n    self.features_activation: Dict[str, Dict[str, bool]]\n    # Compute variables.\n    self._compute_features_contrast_and_activation()\n\n    # Display progress status if requested.\n    if verbose:\n        print(\"`FeaturesMaximizationMetric.__init__`\", \":\", \"Computations done.\")\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.compare","title":"compare(fmc_reference, rounded=None)","text":"

Gives a similarity score in agreement with a reference FMC modelization. The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure), where each FMC modelization is represented by the Features Activation of their vector features. In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization.

Parameters:

Name Type Description Default fmc_reference FeaturesMaximizationMetric

Another Features Maximization modelization used as reference for the comparison.

required rounded Optional[int]

The option to round the result to counter log approximation. Defaults to None.

None

Raises:

Type Description ValueError

if list_of_possible_features are different.

Returns:

Type Description Tuple[float, float, float]

Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def compare(\n    self,\n    fmc_reference: \"FeaturesMaximizationMetric\",\n    rounded: Optional[int] = None,\n) -> Tuple[float, float, float]:\n    \"\"\"\n    Gives a similarity score in agreement with a reference FMC modelization.\n    The similarity score computation is based on common metrics on clustering (homogeneity, completeness, v_measure),\n    where each FMC modelization is represented by the Features Activation of their vector features.\n    In order to be able to compute these similarity, data classes can be different, but vector features must be the same in both FMC modelization.\n\n\n    Args:\n        fmc_reference (FeaturesMaximizationMetric): Another Features Maximization modelization used as reference for the comparison.\n        rounded (Optional[int]): The option to round the result to counter log approximation. Defaults to `None`.\n\n    Raises:\n        ValueError: if `list_of_possible_features` are different.\n\n    Returns:\n        Tuple[float, float, float]: Computation of homogeneity, completeness and v_measure.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Check list_of_possible_features equality.\n    if self.list_of_possible_features != fmc_reference.list_of_possible_features:\n        list_of_in_excess_features: List[str] = [\n            feature\n            for feature in self.list_of_possible_features\n            if feature not in fmc_reference.list_of_possible_features\n        ]\n        list_of_missing_features: List[str] = [\n            feature\n            for feature in fmc_reference.list_of_possible_features\n            if feature not in self.list_of_possible_features\n        ]\n        raise ValueError(\n            \"The list of features `list_of_possible_features` must be the same for both FMC modelization. +: {0}, -: {1}\".format(\n                str(list_of_in_excess_features), str(list_of_missing_features)\n            )\n        )\n\n    ###\n    ### Format Features Activation as classification label of features.\n    ###\n\n    # Initialize\n    list_of_self_features_activations: List[str] = []\n    list_of_reference_features_activations: List[str] = []\n\n    # Define default value if feature not activated.\n    # NB: we can't set a fixed value in case this value is in the list of possible classes...\n    # Example: can't set `\"\"` or `\"None\"` in case self.list_of_possible_classes==[\"A\", \"\"] and fmc_reference.list_of_possible_classes==[\"B\", \"None\"].\n    default_label_if_feature_not_activated: str = \"NOT_ACTIVATED:{possible_classe}\".format(\n        possible_classe=self.list_of_possible_classes + fmc_reference.list_of_possible_classes\n    )\n\n    # Browse activated features to\u00e0 compare Features Activation.\n    for feature in fmc_reference.list_of_possible_features:\n        # Get Features Activation.\n        list_of_most_activated_classes_for_feature_in_self: List[\n            str\n        ] = self.get_most_activated_classes_by_a_feature(\n            feature=feature,\n        )\n        list_of_most_activated_classes_for_feature_in_reference: List[\n            str\n        ] = fmc_reference.get_most_activated_classes_by_a_feature(\n            feature=feature,\n        )\n\n        # TODO: Skip if feature is not activated in both modelization.\n        if (\n            len(list_of_most_activated_classes_for_feature_in_self) != 1\n            and len(list_of_most_activated_classes_for_feature_in_reference) != 1\n        ):\n            continue\n\n        # Format Feature Activation as classification label. Set to `-1` if not activated.\n        list_of_self_features_activations.append(\n            list_of_most_activated_classes_for_feature_in_self[0]\n            if len(list_of_most_activated_classes_for_feature_in_self) == 1\n            else default_label_if_feature_not_activated\n        )\n        list_of_reference_features_activations.append(\n            list_of_most_activated_classes_for_feature_in_reference[0]\n            if len(list_of_most_activated_classes_for_feature_in_reference) == 1\n            else default_label_if_feature_not_activated\n        )\n\n    ###\n    ### Compute FMC modelizations similarity.\n    ###\n\n    # Compute standard metrics for clustering.\n    homogeneity: float\n    completeness: float\n    v_measure: float\n    homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(\n        labels_pred=list_of_self_features_activations,\n        labels_true=list_of_reference_features_activations,\n    )\n\n    # Round the results.\n    if rounded is not None:\n        homogeneity = round(homogeneity, rounded)\n        completeness = round(completeness, rounded)\n        v_measure = round(v_measure, rounded)\n\n    # Return values.\n    return homogeneity, completeness, v_measure\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.get_most_activated_classes_by_a_feature","title":"get_most_activated_classes_by_a_feature(feature, activation_only=True, sort_by='contrast', max_number=None)","text":"

Get the list of classes for which the requested feature is the most relevant.

Parameters:

Name Type Description Default feature str

The feature to analyze.

required sort_by Literal['contrast', 'fmeasure']

The sort criterion for the list of classes. Defaults to \"contrast\".

'contrast' activation_only bool

The option to get only activated classes. Defaults to True.

True max_number Optional[int]

The maximum number of classes to return. Defaults to None.

None

Raises:

Type Description ValueError

if feature is not in self.list_of_possible_features.

ValueError

if sort_by is not in {\"contrast\", \"fmeasure\"}.

Returns:

Type Description List[str]

List[str]: The list of classes for which the requested feature is the most relevant.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def get_most_activated_classes_by_a_feature(\n    self,\n    feature: str,\n    activation_only: bool = True,\n    sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n    max_number: Optional[int] = None,\n) -> List[str]:\n    \"\"\"\n    Get the list of classes for which the requested feature is the most relevant.\n\n    Args:\n        feature (str): The feature to analyze.\n        sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of classes. Defaults to `\"contrast\"`.\n        activation_only (bool): The option to get only activated classes. Defaults to `True`.\n        max_number (Optional[int]): The maximum number of classes to return. Defaults to `None`.\n\n    Raises:\n        ValueError: if `feature` is not in `self.list_of_possible_features`.\n        ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n    Returns:\n        List[str]: The list of classes for which the requested feature is the most relevant.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Check parameter `feature`.\n    if feature not in self.list_of_possible_features:\n        raise ValueError(\n            \"The requested feature `'{0}'` is unknown.\".format(\n                feature,\n            )\n        )\n\n    # Check parameter `sort_by`.\n    if sort_by not in {\"contrast\", \"fmeasure\"}:\n        raise ValueError(\n            \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                sort_by,\n            )\n        )\n\n    ###\n    ### Compute the requested list.\n    ###\n\n    # Define list of possible results (classe + contrast/fmeasure).\n    list_of_possible_results: List[Tuple[float, str]] = [\n        (\n            # 0: the metric: contrast or fmeasure.\n            (\n                self.features_contrast[feature][classe]\n                if sort_by == \"contrast\"\n                else self.features_fmeasure[feature][classe]\n            ),\n            # 1: the classe.\n            classe,\n        )\n        for classe in self.list_of_possible_classes\n        if (activation_only is False or self.features_activation[feature][classe] is True)\n    ]\n\n    # Return top classes sorted by requested metric.\n    return [\n        activated_classe\n        for _, activated_classe in sorted(\n            list_of_possible_results,\n            reverse=True,\n        )\n    ][:max_number]\n
"},{"location":"reference/cognitivefactory/features_maximization_metric/fmc/#cognitivefactory.features_maximization_metric.fmc.FeaturesMaximizationMetric.get_most_active_features_by_a_classe","title":"get_most_active_features_by_a_classe(classe, activation_only=True, sort_by='contrast', max_number=None)","text":"

Get the list of features which are the most relevant for the requested classe.

Parameters:

Name Type Description Default classe str

The classe to analyze.

required sort_by Literal['contrast', 'fmeasure']

The sort criterion for the list of features. Defaults to \"contrast\".

'contrast' activation_only bool

The option to get only active features. Defaults to True.

True max_number Optional[int]

The maximum number of features to return. Defaults to None.

None

Raises:

Type Description ValueError

if classe is not in self.list_of_possible_classes.

ValueError

if sort_by is not in {\"contrast\", \"fmeasure\"}.

Returns:

Type Description List[str]

List[str]: The list of features which are the most relevant for the requested classe.

Source code in cognitivefactory\\features_maximization_metric\\fmc.py
def get_most_active_features_by_a_classe(\n    self,\n    classe: str,\n    activation_only: bool = True,\n    sort_by: Literal[\"contrast\", \"fmeasure\"] = \"contrast\",\n    max_number: Optional[int] = None,\n) -> List[str]:\n    \"\"\"\n    Get the list of features which are the most relevant for the requested classe.\n\n    Args:\n        classe (str): The classe to analyze.\n        sort_by (Literal[\"contrast\", \"fmeasure\"]): The sort criterion for the list of features. Defaults to `\"contrast\"`.\n        activation_only (bool): The option to get only active features. Defaults to `True`.\n        max_number (Optional[int]): The maximum number of features to return. Defaults to `None`.\n\n    Raises:\n        ValueError: if `classe` is not in `self.list_of_possible_classes`.\n        ValueError: if `sort_by` is not in `{\"contrast\", \"fmeasure\"}`.\n\n    Returns:\n        List[str]: The list of features which are the most relevant for the requested classe.\n    \"\"\"\n\n    ###\n    ### Check parameters.\n    ###\n\n    # Check parameter `feature`.\n    if classe not in self.list_of_possible_classes:\n        raise ValueError(\n            \"The requested classe `'{0}'` is unknown.\".format(\n                classe,\n            )\n        )\n\n    # Check parameter `sort_by`.\n    if sort_by not in {\"contrast\", \"fmeasure\"}:\n        raise ValueError(\n            \"The sort option factor `sort_by` has to be in the following values: `{{'contrast', 'fmeasure'}}` (currently: '{0}').\".format(\n                sort_by,\n            )\n        )\n\n    ###\n    ### Compute the requested list.\n    ###\n\n    # Define list of possible results (feature + contrast/fmeasure).\n    list_of_possible_results: List[Tuple[float, str]] = [\n        (\n            # 0: the metric: contrast or fmeasure.\n            (\n                self.features_contrast[feature][classe]\n                if sort_by == \"contrast\"\n                else self.features_fmeasure[feature][classe]\n            ),\n            # 1: the feature.\n            feature,\n        )\n        for feature in self.list_of_possible_features\n        if (activation_only is False or self.features_activation[feature][classe] is True)\n    ]\n\n    # Return top features sorted by requested metric.\n    return [\n        active_feature\n        for _, active_feature in sorted(\n            list_of_possible_results,\n            reverse=True,\n        )\n    ][:max_number]\n
"},{"location":"coverage/","title":"Coverage report","text":""}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index 8b72236..bb35d9e 100755 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,57 +2,57 @@ https://cognitivefactory.github.io/features-maximization-metric/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/changelog/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/code_of_conduct/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/contributing/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/credits/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/license/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/usage/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/reference/SUMMARY/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/reference/cognitivefactory/features_maximization_metric/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/reference/cognitivefactory/features_maximization_metric/fmc/ - 2023-11-14 + 2023-11-16 daily https://cognitivefactory.github.io/features-maximization-metric/coverage/ - 2023-11-14 + 2023-11-16 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index f643396..3032bae 100755 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ