Skip to content

Commit

Permalink
Filtered data entry Redux
Browse files Browse the repository at this point in the history
For  https://github.com/jasp-stats/jasp-test-release/issues/2515

Allow for filters to be created by JASPControls etc

this should help speeding up jasp quite a bit for large datasets in analyses that use filtering in their gui.
So, Audit

the filters didnt get a dataset id by me and it seems they should.
for tomorrow

Ok DataSet is now provided to a component

probably works now, but Audit needs to be slightly rewritten to get the info from the filter directly
and also, how does it get the data from the filter?

dit dan?

make it compile and use test module again

postmerge fixes

change some lambdas

use testmodule with special case for filtered data entry

Dont allow translaters to write R code for the filter

link computed column creation through to listmodelfiltereddataentry

commit jaspAudit with some mods in dataEntryRedux branch

add submodules
  • Loading branch information
JorisGoosen committed Sep 26, 2024
1 parent b6e2a6f commit 3a581cc
Show file tree
Hide file tree
Showing 54 changed files with 555 additions and 272 deletions.
2 changes: 1 addition & 1 deletion Common/enginedefinitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
/// Using enumutilities templates to make sure we can easily and quickly go from enum -> string -> enum for json communication
///

DECLARE_ENUM(engineState, initializing, idle, analysis, filter, rCode, computeColumn, moduleInstallRequest, moduleLoadRequest, pauseRequested, paused, resuming, stopRequested, stopped, logCfg, settings, killed, reloadData);
DECLARE_ENUM(engineState, initializing, idle, analysis, filter, filterByName, rCode, computeColumn, moduleInstallRequest, moduleLoadRequest, pauseRequested, paused, resuming, stopRequested, stopped, logCfg, settings, killed, reloadData);
DECLARE_ENUM(performType, run, abort, saveImg, editImg, rewriteImgs);
DECLARE_ENUM(analysisResultStatus, validationError, fatalError, imageSaved, imageEdited, imagesRewritten, complete, running, changed, waiting);
DECLARE_ENUM(moduleStatus, initializing, installNeeded, loading, installModPkgNeeded, readyForUse, error);
Expand Down
83 changes: 63 additions & 20 deletions CommonData/databaseinterface.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "databaseinterface.h"
#include "columntype.h"
#include "tempfiles.h"
#include "version.h"
#include "dataset.h"
#include "timers.h"
#include "utils.h"
Expand Down Expand Up @@ -40,6 +41,9 @@ void DatabaseInterface::upgradeDBFromVersion(Version originalVersion)
runStatements("ALTER TABLE DataSets ADD COLUMN dataFileTimestamp INT;");
}

if(originalVersion < "0.19.2" && !tableHasColumn("Filters", "name"))
runStatements("ALTER TABLE Filters ADD COLUMN name TEXT;");

transactionWriteEnd();
}

Expand Down Expand Up @@ -171,7 +175,7 @@ void DatabaseInterface::filterClear(int id)
JASPTIMER_SCOPE(DatabaseInterface::filterClear);
int dataSet = filterGetDataSetId(id);

runStatements("UPDATE " + dataSetName(dataSet) + " SET " + filterName(id) + " = 1;");
runStatements("UPDATE " + dataSetName(dataSet) + " SET " + filterTableName(id) + " = 1;");
}

void DatabaseInterface::filterDelete(int filterIndex)
Expand All @@ -182,56 +186,93 @@ void DatabaseInterface::filterDelete(int filterIndex)
int dataSetId = filterGetDataSetId(filterIndex);

if(dataSetId != -1)
runStatements("ALTER TABLE " + dataSetName(dataSetId) + " DROP COLUMN " + filterName(filterIndex) + ";");

runStatements("ALTER TABLE " + dataSetName(dataSetId) + " DROP COLUMN " + filterTableName(filterIndex) + ";");
runStatements("DELETE FROM Filters WHERE id = " + std::to_string(filterIndex) + ";");

transactionWriteEnd();
}


int DatabaseInterface::filterInsert(int dataSetId, const std::string & rFilter, const std::string & generatedFilter, const std::string & constructorJson, const std::string & constructorR)
int DatabaseInterface::filterInsert(int dataSetId, const std::string & rFilter, const std::string & generatedFilter, const std::string & constructorJson, const std::string & constructorR, const std::string & name)
{
JASPTIMER_SCOPE(DatabaseInterface::filterInsert);
JASPTIMER_SCOPE(DatabaseInterface::filterInsertDataSet);
std::function<void(sqlite3_stmt *stmt)> prepare = [&](sqlite3_stmt *stmt)
{
sqlite3_bind_int( stmt, 1, dataSetId);
sqlite3_bind_text(stmt, 2, rFilter.c_str(), rFilter.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 3, generatedFilter.c_str(), generatedFilter.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 4, constructorJson.c_str(), constructorJson.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 5, constructorR.c_str(), constructorR.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 5, name.c_str(), name.length(), SQLITE_TRANSIENT);
};

transactionWriteBegin();

int id = runStatementsId("INSERT INTO Filters (dataSet, rFilter, generatedFilter, constructorJson, constructorR) VALUES (?, ?, ?, ?, ?) RETURNING rowid;", prepare);
runStatements("ALTER TABLE " + dataSetName(dataSetId) + " ADD " + filterName(id) +" INT NOT NULL DEFAULT 1;");
int id = runStatementsId("INSERT INTO Filters (dataSet, rFilter, generatedFilter, constructorJson, constructorR, name) VALUES (?, ?, ?, ?, ?, ?) RETURNING rowid;", prepare);
runStatements("ALTER TABLE " + dataSetName(dataSetId) + " ADD " + filterTableName(id) +" INT NOT NULL DEFAULT 1;");

transactionWriteEnd();

return id;
}


//This one only works when there is but 1 filter per dataset, this might change later
int DatabaseInterface::filterGetId( int dataSetId)
{
JASPTIMER_SCOPE(DatabaseInterface::filterGetId);
int filterId = -1;

runStatements("SELECT id FROM Filters WHERE dataSet = ?",
runStatements("SELECT id FROM Filters WHERE dataSet = ? AND name = ''",
[&](sqlite3_stmt *stmt) { sqlite3_bind_int(stmt, 1, dataSetId); },
[&](size_t row, sqlite3_stmt *stmt) { filterId = sqlite3_column_int(stmt, 0); }
);

return filterId;
}

int DatabaseInterface::filterGetId(const std::string &name)
{
JASPTIMER_SCOPE(DatabaseInterface::filterGetId);
int filterId = -1;

runStatements("SELECT id FROM Filters WHERE name = ?",
[&](sqlite3_stmt *stmt) { sqlite3_bind_text(stmt, 1, name.c_str(), name.length(), SQLITE_TRANSIENT); },
[&](size_t row, sqlite3_stmt *stmt) { filterId = sqlite3_column_int(stmt, 0); }
);

return filterId;
}

int DatabaseInterface::filterGetDataSetId(int filterIndex)
{
JASPTIMER_SCOPE(DatabaseInterface::filterGetDataSetId);
return runStatementsId("SELECT dataSet from Filters WHERE id=" + std::to_string(filterIndex));
}

std::string DatabaseInterface::filterGetName(int filterIndex)
{
JASPTIMER_SCOPE(DatabaseInterface::filterGetName);
std::string errorMsg;

std::function<void(sqlite3_stmt *stmt)> prepare = [&](sqlite3_stmt *stmt)
{
sqlite3_bind_int(stmt, 1, filterIndex);
};

std::function<void(size_t row, sqlite3_stmt *stmt)> processRow = [&](size_t row, sqlite3_stmt *stmt)
{
int colCount = sqlite3_column_count(stmt);

assert(colCount == 1);
errorMsg = _wrap_sqlite3_column_text(stmt, 0);
};

runStatements("SELECT name FROM Filters WHERE id = ?;", prepare, processRow);

return errorMsg;
}

bool DatabaseInterface::filterSelect(int filterIndex, boolvec & bools)
{
JASPTIMER_SCOPE(DatabaseInterface::filterSelect);
Expand All @@ -250,7 +291,7 @@ bool DatabaseInterface::filterSelect(int filterIndex, boolvec & bools)

bools.resize(rows);

runStatements("SELECT " + filterName(filterIndex) + " FROM " + dataSetName(dataSet) + " ORDER BY rowNumber;",
runStatements("SELECT " + filterTableName(filterIndex) + " FROM " + dataSetName(dataSet) + " ORDER BY rowNumber;",
[&](sqlite3_stmt *){ }, [&](size_t row, sqlite3_stmt * stmt)
{
int val = sqlite3_column_int(stmt, 0);
Expand All @@ -264,7 +305,7 @@ bool DatabaseInterface::filterSelect(int filterIndex, boolvec & bools)
return changed;
}

void DatabaseInterface::filterUpdate(int filterIndex, const std::string & rFilter, const std::string & generatedFilter, const std::string & constructorJson, const std::string & constructorR)
void DatabaseInterface::filterUpdate(int filterIndex, const std::string & rFilter, const std::string & generatedFilter, const std::string & constructorJson, const std::string & constructorR, const std::string & name)
{
JASPTIMER_SCOPE(DatabaseInterface::filterUpdate);
std::function<void(sqlite3_stmt *stmt)> prepare = [&](sqlite3_stmt *stmt)
Expand All @@ -273,13 +314,14 @@ void DatabaseInterface::filterUpdate(int filterIndex, const std::string & rFilte
sqlite3_bind_text(stmt, 2, generatedFilter.c_str(), generatedFilter.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 3, constructorJson.c_str(), constructorJson.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 4, constructorR.c_str(), constructorR.length(), SQLITE_TRANSIENT);
sqlite3_bind_int (stmt, 5, filterIndex);
sqlite3_bind_text(stmt, 5, name.c_str(), name.length(), SQLITE_TRANSIENT);
sqlite3_bind_int (stmt, 6, filterIndex);
};

runStatements("UPDATE Filters SET rFilter=?, generatedFilter=?, constructorJson=?, constructorR=? WHERE id = ?;", prepare);
runStatements("UPDATE Filters SET rFilter=?, generatedFilter=?, constructorJson=?, constructorR=?, name=? WHERE id = ?;", prepare);
}

void DatabaseInterface::filterLoad(int filterIndex, std::string & rFilter, std::string & generatedFilter, std::string & constructorJson, std::string & constructorR, int & revision)
void DatabaseInterface::filterLoad(int filterIndex, std::string & rFilter, std::string & generatedFilter, std::string & constructorJson, std::string & constructorR, int & revision, std::string & name)
{
JASPTIMER_SCOPE(DatabaseInterface::filterLoad);
std::function<void(sqlite3_stmt *stmt)> prepare = [&](sqlite3_stmt *stmt)
Expand All @@ -291,15 +333,16 @@ void DatabaseInterface::filterLoad(int filterIndex, std::string & rFilter, std::
{
int colCount = sqlite3_column_count(stmt);

assert(colCount == 5);
assert(colCount == 6);
rFilter = _wrap_sqlite3_column_text(stmt, 0);
generatedFilter = _wrap_sqlite3_column_text(stmt, 1);
constructorJson = _wrap_sqlite3_column_text(stmt, 2);
constructorR = _wrap_sqlite3_column_text(stmt, 3);
revision = sqlite3_column_int( stmt, 4);
name = _wrap_sqlite3_column_text(stmt, 5);
};

runStatements("SELECT rFilter, generatedFilter, constructorJson, constructorR, revision FROM Filters WHERE id = ?;", prepare, processRow);
runStatements("SELECT rFilter, generatedFilter, constructorJson, constructorR, revision, name FROM Filters WHERE id = ?;", prepare, processRow);
}

std::string DatabaseInterface::filterLoadErrorMsg(int filterIndex)
Expand Down Expand Up @@ -369,7 +412,7 @@ void DatabaseInterface::filterWrite(int filterIndex, const std::vector<bool> & v

int dataSet = filterGetDataSetId(filterIndex);

const std::string updateFilterPrefix = "UPDATE " + dataSetName(dataSet) + " SET " + filterName(filterIndex) + "= ? WHERE rowNumber = ?;" ;
const std::string updateFilterPrefix = "UPDATE " + dataSetName(dataSet) + " SET " + filterTableName(filterIndex) + "= ? WHERE rowNumber = ?;" ;

size_t rowOutside;

Expand Down Expand Up @@ -442,7 +485,7 @@ void DatabaseInterface::dataSetCreateTable(DataSet * dataSet)
runStatements("DROP TABLE " + dataSetName(dataSet->id()) + ";");

std::stringstream statements;
statements << "CREATE TABLE " + dataSetName(dataSet->id()) + " (rowNumber INTEGER PRIMARY KEY, "+ filterName(dataSet->filter()->id()) + " INT NOT NULL DEFAULT 1";
statements << "CREATE TABLE " + dataSetName(dataSet->id()) + " (rowNumber INTEGER PRIMARY KEY, "+ filterTableName(dataSet->filter()->id()) + " INT NOT NULL DEFAULT 1";

for(Column * column : dataSet->columns())
statements << ", " << columnBaseName(column->id()) << "_DBL REAL NULL, " << columnBaseName(column->id()) << "_INT INT NULL";
Expand Down Expand Up @@ -523,7 +566,7 @@ void DatabaseInterface::dataSetBatchedValuesUpdate(DataSet * data, Columns colum
}

//And the filtername and rowNumber
statement << filterName(data->filter()->id()) << ", " << "rowNumber) VALUES (";
statement << filterTableName(data->filter()->id()) << ", " << "rowNumber) VALUES (";

for(size_t i=0; i<columns.size(); i++)
statement << "?, ?, ";
Expand Down Expand Up @@ -594,7 +637,7 @@ void DatabaseInterface::dataSetBatchedValuesLoad(DataSet *data, std::function<vo
for(Column * col : data->columns())
statement << "Column_" << col->id() << "_INT" << ", Column_" << col->id() << "_DBL, ";

statement << filterName(data->filter()->id()) << " FROM " << dataSetName(data->id()) << " ORDER BY rowNumber";
statement << filterTableName(data->filter()->id()) << " FROM " << dataSetName(data->id()) << " ORDER BY rowNumber";

std::function<void(sqlite3_stmt *stmt)> prepare = [&](sqlite3_stmt *stmt) {};

Expand Down Expand Up @@ -844,7 +887,7 @@ int DatabaseInterface::dataSetGetFilter(int dataSetId)
return runStatementsId("SELECT id FROM Filters WHERE dataSet=? LIMIT 1;", [&](sqlite3_stmt *stmt) { sqlite3_bind_int(stmt, 1, dataSetId); });
}

std::string DatabaseInterface::filterName(int filterIndex) const
std::string DatabaseInterface::filterTableName(int filterIndex) const
{
JASPTIMER_SCOPE(DatabaseInterface::filterName);
return "Filter_" + std::to_string(filterIndex);
Expand Down
10 changes: 6 additions & 4 deletions CommonData/databaseinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,18 @@ class DatabaseInterface
void dataSetBatchedValuesUpdate(DataSet * data, std::function<void(float)> progressCallback = [](float){});

//Filters
std::string filterName( int filterIndex) const;
std::string filterTableName( int filterIndex) const;
int filterGetId( int dataSetId);
int filterGetId( const std::string & name);
bool filterSelect( int filterIndex, boolvec & bools); ///< Loads result and errorMsg and returns whether there was a change in either of those.
void filterWrite( int filterIndex, const boolvec & values); ///< Overwrites the current filter values, no checks are done on the size. If too few the rest is TRUE nd superfluous bools are ignored.
int filterInsert( int dataSetId, const std::string & rFilter = "", const std::string & generatedFilter = "", const std::string & constructorJson = "", const std::string & constructorR = ""); ///< Inserts a new Filter row into Filters and creates an empty FilterValues_#id. It returns id
void filterUpdate( int filterIndex, const std::string & rFilter = "", const std::string & generatedFilter = "", const std::string & constructorJson = "", const std::string & constructorR = ""); ///< Updates an existing Filter row in Filters
void filterLoad( int filterIndex, std::string & rFilter, std::string & generatedFilter, std::string & constructorJson, std::string & constructorR, int & revision); ///< Loads an existing Filter row into arguments
int filterInsert( int dataSetId, const std::string & rFilter = "", const std::string & generatedFilter = "", const std::string & constructorJson = "", const std::string & constructorR = "", const std::string & name = ""); ///< Inserts a new Filter row into Filters and creates an empty FilterValues_#id. It returns id
void filterUpdate( int filterIndex, const std::string & rFilter = "", const std::string & generatedFilter = "", const std::string & constructorJson = "", const std::string & constructorR = "", const std::string & name = ""); ///< Updates an existing Filter row in Filters
void filterLoad( int filterIndex, std::string & rFilter, std::string & generatedFilter, std::string & constructorJson, std::string & constructorR, int & revision, std::string & name); ///< Loads an existing Filter row into arguments
void filterClear( int filterIndex); ///< Clears all values in Filter
void filterDelete( int filterIndex);
int filterGetDataSetId( int filterIndex);
std::string filterGetName( int filterIndex);
std::string filterLoadErrorMsg( int filterIndex);
void filterUpdateErrorMsg( int filterIndex, const std::string & errorMsg);
int filterIncRevision( int filterIndex);
Expand Down
2 changes: 1 addition & 1 deletion CommonData/datasetbasenode.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DataSetBaseNode
public:
typedef std::set<DataSetBaseNode*> NodeSet;

DataSetBaseNode(dataSetBaseNodeType typeNode, DataSetBaseNode * parent);
DataSetBaseNode(dataSetBaseNodeType typeNode, DataSetBaseNode * parent = nullptr);
~DataSetBaseNode();

dataSetBaseNodeType nodeType() const { return _type; }
Expand Down
22 changes: 17 additions & 5 deletions CommonData/filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,24 @@
#include "dataset.h"
#include "databaseinterface.h"

Filter::Filter(DataSet *data)
Filter::Filter(DataSet * data)
: DataSetBaseNode(dataSetBaseNodeType::filter, data), _data(data)
{ }

Filter::Filter(DataSet * data, const std::string & name, bool createIfMissing)
: DataSetBaseNode(dataSetBaseNodeType::filter), _data(data), _name(name)
{
assert(_name != "");

if(db().filterGetId(_name) > -1) dbLoad();
else if(createIfMissing) dbCreate();
else throw std::runtime_error("Filter by name '" + _name + "' but it doesnt exist and createIfMissing=false!\nAre you sure this filter should exist?");
}

void Filter::dbCreate()
{
assert(_id == -1);
_id = db().filterInsert(_data->id(), _rFilter, _generatedFilter, _constructorJson, _constructorR);
_id = db().filterInsert(_data->id(), _rFilter, _generatedFilter, _constructorJson, _constructorR, _name);
}

void Filter::dbUpdate()
Expand All @@ -21,7 +31,7 @@ void Filter::dbUpdate()

db().transactionWriteBegin();
if(!_data->writeBatchedToDB())
db().filterUpdate(_id, _rFilter, _generatedFilter, _constructorJson, _constructorR);
db().filterUpdate(_id, _rFilter, _generatedFilter, _constructorJson, _constructorR, _name);

incRevision();
db().transactionWriteEnd();
Expand All @@ -40,14 +50,16 @@ void Filter::dbUpdateErrorMsg()
void Filter::dbLoad()
{
if(_id == -1)
_id = db().filterGetId(_data->id());
_id = _name == "" ? db().filterGetId(_data->id()) : db().filterGetId(_name);

if(_id == -1)
return;

db().transactionReadBegin();

db().filterLoad(_id, _rFilter, _generatedFilter, _constructorJson, _constructorR, _revision);
std::string nameInDB = "";
db().filterLoad(_id, _rFilter, _generatedFilter, _constructorJson, _constructorR, _revision, nameInDB);
assert(nameInDB == _name);

_filteredRowCount = 0;

Expand Down
12 changes: 8 additions & 4 deletions CommonData/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ class Filter : public DataSetBaseNode
{
public:
Filter(DataSet * data);
Filter(DataSet * data, const std::string & name, bool createIfMissing = true);

DataSet * data() const { return _data; }
int id() const { return _id; }
const std::string & name() const { return _name; }
const std::string & rFilter() const { return _rFilter; }
const std::string & generatedFilter() const { return _generatedFilter; }
const std::string & constructorJson() const { return _constructorJson; }
Expand All @@ -38,6 +40,7 @@ class Filter : public DataSetBaseNode
void setConstructorJson( const std::string & constructorJson) { _constructorJson = constructorJson; dbUpdate(); }
void setConstructorR( const std::string & constructorR) { _constructorR = constructorR; dbUpdate(); }
void setErrorMsg( const std::string & errorMsg) { _errorMsg = errorMsg; dbUpdateErrorMsg(); }
void setName( const std::string & name) { _name = name; dbUpdate(); }
bool setFilterVector( const boolvec & filterResult);
void setFilterValueNoDB( size_t row, bool val);
void setRowCount( size_t rows);
Expand All @@ -61,11 +64,12 @@ class Filter : public DataSetBaseNode
DataSet * _data = nullptr;
int _id = -1,
_filteredRowCount = 0;
std::string _rFilter = "generatedFilter",
_generatedFilter = DEFAULT_FILTER_GEN,
_constructorJson = DEFAULT_FILTER_JSON,
std::string _rFilter = "",
_generatedFilter = "",
_constructorJson = "",
_constructorR = "",
_errorMsg = "";
_errorMsg = "",
_name = "";
std::vector<bool> _filtered;
};

Expand Down
3 changes: 2 additions & 1 deletion CommonData/internalDbDefinition.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ CREATE TABLE DataSets (
CREATE TABLE Filters (
id INTEGER PRIMARY KEY,
dataSet INT,
rFilter TEXT,
rFilter TEXT,
name TEXT,
generatedFilter TEXT,
constructorJson TEXT,
constructorR TEXT,
Expand Down
Loading

0 comments on commit 3a581cc

Please sign in to comment.