diff --git a/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-api.json b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-api.json new file mode 100644 index 000000000..33c65f53c --- /dev/null +++ b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-api.json @@ -0,0 +1,313 @@ +{ + "fields": [ + { + "fieldName": "aux_id", + "fieldType": "String", + "fieldLabel": "AUX ID", + "groups": [ + "identifiers", + "record_details" + ], + "scope": [ + "/patient-record/:uid", + "/golden-record/:uid", + "/record-details/:uid", + "/search/custom" + ], + "readOnly": true, + "accessLevel": [] + }, + { + "fieldName": "given_name", + "fieldType": "String", + "fieldLabel": "First Name", + "groups": [ + "name", + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/record-details/:uid", + "/browse-records" + ], + "validation": { + "required": true, + "onErrorMessage": "The family name cannot be empty" + }, + "accessLevel": [] + }, + { + "fieldName": "family_name", + "fieldType": "String", + "fieldLabel": "Last Name", + "groups": [ + "name", + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "validation": { + "required": true, + "onErrorMessage": "The family name cannot be empty" + }, + "accessLevel": [] + }, + { + "fieldName": "gender", + "fieldType": "String", + "fieldLabel": "Gender", + "groups": [ + "demographics", + "filter", + "sub_heading", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "validation": { + "required": true, + "regex": "^(male|female)$", + "onErrorMessage": "The Gender cannot be empty and should either be male, female or neutral" + }, + "accessLevel": [] + }, + { + "fieldName": "dob", + "fieldType": "Date", + "fieldLabel": "Date of Birth", + "groups": [ + "demographics", + "filter", + "sub_heading", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "onErrorMessage": "Date of birth cannot be empty" + } + }, + { + "fieldName": "city", + "fieldType": "String", + "fieldLabel": "City", + "groups": [ + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "onErrorMessage": "Date of birth cannot be empty" + } + }, + { + "fieldName": "phone_number", + "fieldType": "String", + "fieldLabel": "Phone No", + "groups": [ + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "national_id", + "fieldType": "String", + "fieldLabel": "National ID", + "groups": [ + "identifiers", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "regex": "", + "onErrorMessage": "The national Id cannot be empty" + } + } + ], + "systemFields": [ + { + "fieldName": "recordType", + "fieldType": "String", + "fieldLabel": "Record Type", + "groups": [ + "none" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink" + ], + "accessLevel": [] + }, + { + "fieldName": "uid", + "fieldType": "String", + "fieldLabel": "UID", + "groups": [ + "identifiers", + "sub_heading", + "linked_records", + "record_details", + "filter" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink", + "/search-results/golden", + "/search-results/patient", + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "createdAt", + "fieldType": "String", + "fieldLabel": "Created At", + "groups": [ + "linked_records", + "record_details", + "audit_trail" + ], + "scope": [ + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "sourceId", + "fieldType": "SourceId", + "fieldLabel": "Source Id", + "groups": [ + "registering_facility", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "score", + "fieldType": "Number", + "fieldLabel": "Score", + "groups": [ + "none", + "record_details" + ], + "scope": [ + "/patient-record/:uid", + "/golden-record/:uid", + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink" + ], + "accessLevel": [] + } + ], + "rules": { + "deterministic": { + "QUERY_DETERMINISTIC_GOLDEN_RECORD_CANDIDATES": { + "vars": [ + "given_name", + "family_name", + "phone_number", + "national_id" + ], + "text": "eq(national_id) or (eq(given_name) and eq(family_name) and eq(phone_number))" + } + }, + "probabilistic": { + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_DISTANCE": { + "vars": [ + "given_name", + "family_name", + "city" + ], + "text": "match(given_name,3) and match(family_name,3) or match(given_name,3) and match(city,3) or match(family_name,3) and match(city,3)" + }, + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_PHONE_NUMBER": { + "vars": [ + "phone_number" + ], + "text": "match(phone_number,3)" + }, + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_NATIONAL_ID": { + "vars": [ + "national_id" + ], + "text": "match(national_id,3)" + } + } + } +} \ No newline at end of file diff --git a/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-api.json b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-api.json new file mode 100644 index 000000000..33c65f53c --- /dev/null +++ b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-api.json @@ -0,0 +1,313 @@ +{ + "fields": [ + { + "fieldName": "aux_id", + "fieldType": "String", + "fieldLabel": "AUX ID", + "groups": [ + "identifiers", + "record_details" + ], + "scope": [ + "/patient-record/:uid", + "/golden-record/:uid", + "/record-details/:uid", + "/search/custom" + ], + "readOnly": true, + "accessLevel": [] + }, + { + "fieldName": "given_name", + "fieldType": "String", + "fieldLabel": "First Name", + "groups": [ + "name", + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/record-details/:uid", + "/browse-records" + ], + "validation": { + "required": true, + "onErrorMessage": "The family name cannot be empty" + }, + "accessLevel": [] + }, + { + "fieldName": "family_name", + "fieldType": "String", + "fieldLabel": "Last Name", + "groups": [ + "name", + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "validation": { + "required": true, + "onErrorMessage": "The family name cannot be empty" + }, + "accessLevel": [] + }, + { + "fieldName": "gender", + "fieldType": "String", + "fieldLabel": "Gender", + "groups": [ + "demographics", + "filter", + "sub_heading", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "validation": { + "required": true, + "regex": "^(male|female)$", + "onErrorMessage": "The Gender cannot be empty and should either be male, female or neutral" + }, + "accessLevel": [] + }, + { + "fieldName": "dob", + "fieldType": "Date", + "fieldLabel": "Date of Birth", + "groups": [ + "demographics", + "filter", + "sub_heading", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "onErrorMessage": "Date of birth cannot be empty" + } + }, + { + "fieldName": "city", + "fieldType": "String", + "fieldLabel": "City", + "groups": [ + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "onErrorMessage": "Date of birth cannot be empty" + } + }, + { + "fieldName": "phone_number", + "fieldType": "String", + "fieldLabel": "Phone No", + "groups": [ + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "national_id", + "fieldType": "String", + "fieldLabel": "National ID", + "groups": [ + "identifiers", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "regex": "", + "onErrorMessage": "The national Id cannot be empty" + } + } + ], + "systemFields": [ + { + "fieldName": "recordType", + "fieldType": "String", + "fieldLabel": "Record Type", + "groups": [ + "none" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink" + ], + "accessLevel": [] + }, + { + "fieldName": "uid", + "fieldType": "String", + "fieldLabel": "UID", + "groups": [ + "identifiers", + "sub_heading", + "linked_records", + "record_details", + "filter" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink", + "/search-results/golden", + "/search-results/patient", + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "createdAt", + "fieldType": "String", + "fieldLabel": "Created At", + "groups": [ + "linked_records", + "record_details", + "audit_trail" + ], + "scope": [ + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "sourceId", + "fieldType": "SourceId", + "fieldLabel": "Source Id", + "groups": [ + "registering_facility", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "score", + "fieldType": "Number", + "fieldLabel": "Score", + "groups": [ + "none", + "record_details" + ], + "scope": [ + "/patient-record/:uid", + "/golden-record/:uid", + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink" + ], + "accessLevel": [] + } + ], + "rules": { + "deterministic": { + "QUERY_DETERMINISTIC_GOLDEN_RECORD_CANDIDATES": { + "vars": [ + "given_name", + "family_name", + "phone_number", + "national_id" + ], + "text": "eq(national_id) or (eq(given_name) and eq(family_name) and eq(phone_number))" + } + }, + "probabilistic": { + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_DISTANCE": { + "vars": [ + "given_name", + "family_name", + "city" + ], + "text": "match(given_name,3) and match(family_name,3) or match(given_name,3) and match(city,3) or match(family_name,3) and match(city,3)" + }, + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_PHONE_NUMBER": { + "vars": [ + "phone_number" + ], + "text": "match(phone_number,3)" + }, + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_NATIONAL_ID": { + "vars": [ + "national_id" + ], + "text": "match(national_id,3)" + } + } + } +} \ No newline at end of file diff --git a/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-match-dp-api.json b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-match-dp-api.json new file mode 100644 index 000000000..33c65f53c --- /dev/null +++ b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-match-dp-api.json @@ -0,0 +1,313 @@ +{ + "fields": [ + { + "fieldName": "aux_id", + "fieldType": "String", + "fieldLabel": "AUX ID", + "groups": [ + "identifiers", + "record_details" + ], + "scope": [ + "/patient-record/:uid", + "/golden-record/:uid", + "/record-details/:uid", + "/search/custom" + ], + "readOnly": true, + "accessLevel": [] + }, + { + "fieldName": "given_name", + "fieldType": "String", + "fieldLabel": "First Name", + "groups": [ + "name", + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/record-details/:uid", + "/browse-records" + ], + "validation": { + "required": true, + "onErrorMessage": "The family name cannot be empty" + }, + "accessLevel": [] + }, + { + "fieldName": "family_name", + "fieldType": "String", + "fieldLabel": "Last Name", + "groups": [ + "name", + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "validation": { + "required": true, + "onErrorMessage": "The family name cannot be empty" + }, + "accessLevel": [] + }, + { + "fieldName": "gender", + "fieldType": "String", + "fieldLabel": "Gender", + "groups": [ + "demographics", + "filter", + "sub_heading", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "validation": { + "required": true, + "regex": "^(male|female)$", + "onErrorMessage": "The Gender cannot be empty and should either be male, female or neutral" + }, + "accessLevel": [] + }, + { + "fieldName": "dob", + "fieldType": "Date", + "fieldLabel": "Date of Birth", + "groups": [ + "demographics", + "filter", + "sub_heading", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "onErrorMessage": "Date of birth cannot be empty" + } + }, + { + "fieldName": "city", + "fieldType": "String", + "fieldLabel": "City", + "groups": [ + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "onErrorMessage": "Date of birth cannot be empty" + } + }, + { + "fieldName": "phone_number", + "fieldType": "String", + "fieldLabel": "Phone No", + "groups": [ + "demographics", + "filter", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/custom", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "national_id", + "fieldType": "String", + "fieldLabel": "National ID", + "groups": [ + "identifiers", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink", + "/search/simple", + "/search/custom", + "/search-results/golden", + "/search-results/patient", + "/browse-records" + ], + "accessLevel": [], + "validation": { + "required": true, + "regex": "", + "onErrorMessage": "The national Id cannot be empty" + } + } + ], + "systemFields": [ + { + "fieldName": "recordType", + "fieldType": "String", + "fieldLabel": "Record Type", + "groups": [ + "none" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink" + ], + "accessLevel": [] + }, + { + "fieldName": "uid", + "fieldType": "String", + "fieldLabel": "UID", + "groups": [ + "identifiers", + "sub_heading", + "linked_records", + "record_details", + "filter" + ], + "scope": [ + "/notifications/match-details", + "/record-details/:uid/relink", + "/search-results/golden", + "/search-results/patient", + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "createdAt", + "fieldType": "String", + "fieldLabel": "Created At", + "groups": [ + "linked_records", + "record_details", + "audit_trail" + ], + "scope": [ + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "sourceId", + "fieldType": "SourceId", + "fieldLabel": "Source Id", + "groups": [ + "registering_facility", + "linked_records", + "record_details" + ], + "scope": [ + "/record-details/:uid", + "/browse-records" + ], + "accessLevel": [] + }, + { + "fieldName": "score", + "fieldType": "Number", + "fieldLabel": "Score", + "groups": [ + "none", + "record_details" + ], + "scope": [ + "/patient-record/:uid", + "/golden-record/:uid", + "/record-details/:uid", + "/notifications/match-details", + "/record-details/:uid/relink" + ], + "accessLevel": [] + } + ], + "rules": { + "deterministic": { + "QUERY_DETERMINISTIC_GOLDEN_RECORD_CANDIDATES": { + "vars": [ + "given_name", + "family_name", + "phone_number", + "national_id" + ], + "text": "eq(national_id) or (eq(given_name) and eq(family_name) and eq(phone_number))" + } + }, + "probabilistic": { + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_DISTANCE": { + "vars": [ + "given_name", + "family_name", + "city" + ], + "text": "match(given_name,3) and match(family_name,3) or match(given_name,3) and match(city,3) or match(family_name,3) and match(city,3)" + }, + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_PHONE_NUMBER": { + "vars": [ + "phone_number" + ], + "text": "match(phone_number,3)" + }, + "QUERY_MATCH_GOLDEN_RECORD_CANDIDATES_BY_NATIONAL_ID": { + "vars": [ + "national_id" + ], + "text": "match(national_id,3)" + } + } + } +} \ No newline at end of file diff --git a/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-match-dp.json b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-match-dp.json new file mode 100644 index 000000000..5200b69a8 --- /dev/null +++ b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp-match-dp.json @@ -0,0 +1,250 @@ +{ + "uniqueInteractionFields": [ + { + "fieldName": "aux_date_created", + "fieldType": "DateTime" + }, + { + "fieldName": "aux_id", + "fieldType": "String", + "csvCol": 0 + }, + { + "fieldName": "aux_clinical_data", + "fieldType": "String", + "csvCol": 10 + } + ], + "uniqueGoldenRecordFields": [ + { + "fieldName": "aux_date_created", + "fieldType": "DateTime" + }, + { + "fieldName": "aux_auto_update_enabled", + "fieldType": "Bool", + "default": "true" + }, + { + "fieldName": "aux_id", + "fieldType": "String", + "source": "aux_id" + } + ], + "additionalNodes": [ + { + "nodeName": "SourceId", + "fields": [ + { + "fieldName": "facility", + "fieldType": "String", + "csvCol": 8 + }, + { + "fieldName": "patient", + "fieldType": "String", + "csvCol": 9 + } + ] + } + ], + "demographicFields": [ + { + "fieldName": "given_name", + "fieldType": "String", + "source": { + "csvCol": 1 + }, + "indexGoldenRecord": "@index(exact,trigram)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8806329, + "u": 0.0026558 + }, + "matchMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8806329, + "u": 0.0026558 + } + }, + { + "fieldName": "family_name", + "fieldType": "String", + "source": { + "csvCol": 2 + }, + "indexGoldenRecord": "@index(exact,trigram)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.9140443, + "u": 0.0006275 + }, + "matchMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.9140443, + "u": 0.0006275 + } + }, + { + "fieldName": "gender", + "fieldType": "String", + "source": { + "csvCol": 3 + }, + "indexGoldenRecord": "@index(exact,trigram)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.9468393, + "u": 0.4436446 + }, + "matchMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.9468393, + "u": 0.4436446 + } + }, + { + "fieldName": "dob", + "fieldType": "String", + "source": { + "csvCol": 4 + }, + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.7856196, + "u": 0.0000465 + }, + "matchMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.7856196, + "u": 0.0000465 + } + }, + { + "fieldName": "city", + "fieldType": "String", + "source": { + "csvCol": 5 + }, + "indexGoldenRecord": "@index(exact,trigram)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8445694, + "u": 0.0355741 + }, + "matchMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8408500, + "u": 0.0000004 + } + }, + { + "fieldName": "phone_number", + "fieldType": "String", + "source": { + "csvCol": 6 + }, + "indexGoldenRecord": "@index(exact,trigram)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8408500, + "u": 0.0000004 + }, + "matchMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8408500, + "u": 0.0000004 + } + }, + { + "fieldName": "national_id", + "fieldType": "String", + "source": { + "csvCol": 7 + }, + "indexGoldenRecord": "@index(exact)" + } + ], + "rules": { + "link": { + "deterministic": { + "QUERY_LINK_DETERMINISTIC_A": { + "vars": [ + "national_id" + ], + "text": "eq(national_id)" + } + } + }, + "validate": { + "deterministic": { + "QUERY_VALIDATE_DETERMINISTIC_A": { + "vars": [ + "given_name", + "family_name", + "phone_number" + ], + "text": "eq(given_name) and eq(family_name) and eq(phone_number)" + } + } + }, + "matchNotification": { + "deterministic": { + "QUERY_MATCH_DETERMINISTIC_A": { + "vars": [ + "given_name", + "family_name", + "phone_number" + ], + "text": "eq(given_name) and eq(family_name) and eq(phone_number)" + } + }, + "probabilistic": { + "QUERY_MATCH_PROBABILISTIC_BLOCK": { + "vars": [ + "given_name", + "family_name", + "phone_number" + ], + "text": "match(given_name,3) and match(family_name,3) or match(given_name,3) and match(phone_number,3) or match(family_name,3) and match(phone_number,3)" + } + } + } + } +} diff --git a/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp.json b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp.json new file mode 100644 index 000000000..b487dafdd --- /dev/null +++ b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d-validate-dp.json @@ -0,0 +1,188 @@ +{ + "uniqueInteractionFields": [ + { + "fieldName": "aux_date_created", + "fieldType": "DateTime" + }, + { + "fieldName": "aux_id", + "fieldType": "String", + "csvCol": 0 + }, + { + "fieldName": "aux_clinical_data", + "fieldType": "String", + "csvCol": 10 + } + ], + "uniqueGoldenRecordFields": [ + { + "fieldName": "aux_date_created", + "fieldType": "DateTime" + }, + { + "fieldName": "aux_auto_update_enabled", + "fieldType": "Bool", + "default": "true" + }, + { + "fieldName": "aux_id", + "fieldType": "String", + "source": "aux_id" + } + ], + "additionalNodes": [ + { + "nodeName": "SourceId", + "fields": [ + { + "fieldName": "facility", + "fieldType": "String", + "csvCol": 8 + }, + { + "fieldName": "patient", + "fieldType": "String", + "csvCol": 9 + } + ] + } + ], + "demographicFields": [ + { + "fieldName": "given_name", + "fieldType": "String", + "source": { + "csvCol": 1 + }, + "indexGoldenRecord": "@index(exact)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8806329, + "u": 0.0026558 + } + }, + { + "fieldName": "family_name", + "fieldType": "String", + "source": { + "csvCol": 2 + }, + "indexGoldenRecord": "@index(exact)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.9140443, + "u": 0.0006275 + } + }, + { + "fieldName": "gender", + "fieldType": "String", + "source": { + "csvCol": 3 + }, + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.9468393, + "u": 0.4436446 + } + }, + { + "fieldName": "dob", + "fieldType": "String", + "source": { + "csvCol": 4 + }, + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.7856196, + "u": 0.0000465 + } + }, + { + "fieldName": "city", + "fieldType": "String", + "source": { + "csvCol": 5 + }, + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8445694, + "u": 0.0355741 + } + }, + { + "fieldName": "phone_number", + "fieldType": "String", + "source": { + "csvCol": 6 + }, + "indexGoldenRecord": "@index(exact)", + "validateMetaData": { + "comparison": "JARO_WINKLER_SIMILARITY", + "comparisonLevels": [ + 0.92 + ], + "m": 0.8408500, + "u": 0.0000004 + } + }, + { + "fieldName": "national_id", + "fieldType": "String", + "source": { + "csvCol": 7 + }, + "indexGoldenRecord": "@index(exact)" + } + ], + "rules": { + "link": { + "deterministic": { + "QUERY_LINK_DETERMINISTIC_A": { + "vars": [ + "national_id" + ], + "text": "eq(national_id)" + }, + "QUERY_LINK_DETERMINISTIC_B": { + "vars": [ + "given_name", + "family_name", + "phone_number" + ], + "text": "eq(given_name) and eq(family_name) and eq(phone_number)" + } + } + }, + "validate": { + "deterministic": { + "QUERY_VALIDATE_DETERMINISTIC_A": { + "vars": [ + "given_name", + "family_name", + "phone_number" + ], + "text": "eq(given_name) and eq(family_name) and eq(phone_number)" + } + } + } + + } + +} diff --git a/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d.json b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d.json new file mode 100644 index 000000000..4a768d891 --- /dev/null +++ b/JeMPI_Apps/JeMPI_Configuration/reference/config-reference-link-d.json @@ -0,0 +1,126 @@ +{ + "uniqueInteractionFields": [ + { + "fieldName": "aux_date_created", + "fieldType": "DateTime" + }, + { + "fieldName": "aux_id", + "fieldType": "String", + "csvCol": 0 + }, + { + "fieldName": "aux_clinical_data", + "fieldType": "String", + "csvCol": 10 + } + ], + "uniqueGoldenRecordFields": [ + { + "fieldName": "aux_date_created", + "fieldType": "DateTime" + }, + { + "fieldName": "aux_auto_update_enabled", + "fieldType": "Bool", + "default": "true" + }, + { + "fieldName": "aux_id", + "fieldType": "String", + "source": "aux_id" + } + ], + "additionalNodes": [ + { + "nodeName": "SourceId", + "fields": [ + { + "fieldName": "facility", + "fieldType": "String", + "csvCol": 8 + }, + { + "fieldName": "patient", + "fieldType": "String", + "csvCol": 9 + } + ] + } + ], + "demographicFields": [ + { + "fieldName": "given_name", + "fieldType": "String", + "source": { + "csvCol": 1 + }, + "indexGoldenRecord": "@index(exact)" + }, + { + "fieldName": "family_name", + "fieldType": "String", + "source": { + "csvCol": 2 + }, + "indexGoldenRecord": "@index(exact)" + }, + { + "fieldName": "gender", + "fieldType": "String", + "source": { + "csvCol": 3 + } + }, + { + "fieldName": "dob", + "fieldType": "String", + "source": { + "csvCol": 4 + } + }, + { + "fieldName": "city", + "fieldType": "String", + "source": { + "csvCol": 5 + } + }, + { + "fieldName": "phone_number", + "fieldType": "String", + "source": { + "csvCol": 6 + }, + "indexGoldenRecord": "@index(exact)" + }, + { + "fieldName": "national_id", + "fieldType": "String", + "source": { + "csvCol": 7 + }, + "indexGoldenRecord": "@index(exact)" + } + ], + "rules": { + "link": { + "deterministic": { + "QUERY_LINK_DETERMINISTIC_A": { + "vars": [ + "national_id" + ], + "text": "eq(national_id)" + }, + "QUERY_LINK_DETERMINISTIC_B": { + "vars": [ + "given_name", + "family_name", + "phone_number" + ], + "text": "eq(given_name) and eq(family_name) and eq(phone_number)" + } + } + } + } +} diff --git a/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomLinkerProbabilistic.scala b/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomLinkerProbabilistic.scala index 56f391eb8..e33096a62 100644 --- a/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomLinkerProbabilistic.scala +++ b/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomLinkerProbabilistic.scala @@ -250,12 +250,13 @@ object CustomLinkerProbabilistic { def updateMU(): Unit = writer.println(" public static void updateMU(final CustomMU mu) {") - linkMuList.zipWithIndex.foreach((field, idx) => + if (!linkMuList.isEmpty) + linkMuList.zipWithIndex.foreach((field, idx) => val fieldName = Utils.snakeCaseToCamelCase(field.fieldName) if (idx == 0) - writer.print(" " * 6 + s"if (mu.$fieldName().m() > mu.$fieldName().u()") + writer.print(" " * 6 + s"if (mu.customLinkMU().$fieldName().m() > mu.customLinkMU().$fieldName().u()") else - writer.print(" " * 10 + s"&& mu.$fieldName().m() > mu.$fieldName().u()") + writer.print(" " * 10 + s"&& mu.customLinkMU().$fieldName().m() > mu.customLinkMU().$fieldName().u()") end if if (idx + 1 < linkMuList.length) writer.println() @@ -263,7 +264,6 @@ object CustomLinkerProbabilistic { writer.println(") {") end if ) - if (!linkMuList.isEmpty) writer.println(" " * 9 + "updatedLinkFields = new LinkFields(") linkMuList.zipWithIndex.foreach((field, idx) => val fieldName = Utils.snakeCaseToCamelCase(field.fieldName) @@ -276,7 +276,7 @@ object CustomLinkerProbabilistic { writer.print(" " * 12 + s"new LinkerProbabilistic.Field($comparison, ${ if (comparisonLevels.length == 1) "List.of(" else "Arrays.asList(" - }${extractComparisonList(comparisonLevels)}), mu.$fieldName().m(), mu.$fieldName().u())") + }${extractComparisonList(comparisonLevels)}), mu.customLinkMU().$fieldName().m(), mu.customLinkMU().$fieldName().u())") if (idx + 1 < linkMuList.length) writer.println(",") else @@ -285,6 +285,80 @@ object CustomLinkerProbabilistic { ) writer.println(" " * 6 + "}") end if + + if (!validateMuList.isEmpty) + validateMuList.zipWithIndex.foreach((field, idx) => + val fieldName = Utils.snakeCaseToCamelCase(field.fieldName) + if (idx == 0) + writer.print(" " * 6 + s"if (mu.customValidateMU().$fieldName().m() > mu.customValidateMU().$fieldName().u()") + else + writer.print(" " * 10 + s"&& mu.customValidateMU().$fieldName().m() > mu.customValidateMU().$fieldName().u()") + end if + if (idx + 1 < validateMuList.length) + writer.println() + else + writer.println(") {") + end if + ) + writer.println(" " * 9 + "updatedValidateFields = new ValidateFields(") + validateMuList.zipWithIndex.foreach((field, idx) => + val fieldName = Utils.snakeCaseToCamelCase(field.fieldName) + val comparison = field.validateMetaData.get.comparison + val comparisonLevels = field.validateMetaData.get.comparisonLevels + + def extractComparisonList(levels: List[Double]): String = { + levels.map(level => s""" ${level.toString}F""".stripMargin).mkString(",").trim + } + + writer.print(" " * 12 + s"new LinkerProbabilistic.Field($comparison, ${ + if (comparisonLevels.length == 1) "List.of(" else "Arrays.asList(" + }${extractComparisonList(comparisonLevels)}), mu.customValidateMU().$fieldName().m(), mu.customValidateMU().$fieldName().u())") + if (idx + 1 < validateMuList.length) + writer.println(",") + else + writer.println(");") + end if + ) + writer.println(" " * 6 + "}") + end if + + if (!matchNotificationMuList.isEmpty) + matchNotificationMuList.zipWithIndex.foreach((field, idx) => + val fieldName = Utils.snakeCaseToCamelCase(field.fieldName) + if (idx == 0) + writer.print(" " * 6 + s"if (mu.customMatchMU().$fieldName().m() > mu.customMatchMU().$fieldName().u()") + else + writer.print(" " * 10 + s"&& mu.customMatchMU().$fieldName().m() > mu.customMatchMU().$fieldName().u()") + end if + if (idx + 1 < matchNotificationMuList.length) + writer.println() + else + writer.println(") {") + end if + ) + writer.println(" " * 9 + "updatedMatchNotificationFields = new MatchNotificationFields(") + matchNotificationMuList.zipWithIndex.foreach((field, idx) => + val fieldName = Utils.snakeCaseToCamelCase(field.fieldName) + val comparison = field.matchMetaData.get.comparison + val comparisonLevels = field.matchMetaData.get.comparisonLevels + + def extractComparisonList(levels: List[Double]): String = { + levels.map(level => s""" ${level.toString}F""".stripMargin).mkString(",").trim + } + + writer.print(" " * 12 + s"new LinkerProbabilistic.Field($comparison, ${ + if (comparisonLevels.length == 1) "List.of(" else "Arrays.asList(" + }${extractComparisonList(comparisonLevels)}), mu.customMatchMU().$fieldName().m(), mu.customMatchMU().$fieldName().u())") + if (idx + 1 < matchNotificationMuList.length) + writer.println(",") + else + writer.println(");") + end if + ) + writer.println(" " * 6 + "}") + end if + + writer.println(" }") writer.println() @@ -294,11 +368,30 @@ object CustomLinkerProbabilistic { def checkUpdatedMU(): Unit = def generateCode(): String = - return s""" if (updatedLinkFields != null) { - | LOGGER.info("Using updated Link MU values: {}", updatedLinkFields); - | CustomLinkerProbabilistic.currentLinkFields = updatedLinkFields; - | updatedLinkFields = null; - | }""".stripMargin + val s1 = (if (linkMuList.length > 0) + s""" if (updatedLinkFields != null) { + | LOGGER.info("Using updated Link MU values: {}", updatedLinkFields); + | CustomLinkerProbabilistic.currentLinkFields = updatedLinkFields; + | updatedLinkFields = null; + | } + |""".stripMargin else "") + + val s2 = (if (validateMuList.length > 0) + s""" if (updatedValidateFields != null) { + | LOGGER.info("Using updated Validate MU values: {}", updatedValidateFields); + | CustomLinkerProbabilistic.currentValidateFields = updatedValidateFields; + | updatedValidateFields = null; + | } + |""".stripMargin else "") + + val s3 = (if (matchNotificationMuList.length > 0) + s""" if (updatedMatchNotificationFields != null) { + | LOGGER.info("Using updated MatchNotification MU values: {}", updatedMatchNotificationFields); + | CustomLinkerProbabilistic.currentMatchNotificationFields = updatedMatchNotificationFields; + | updatedMatchNotificationFields = null; + | }""".stripMargin else "") + + s1 + s2 + s3 end generateCode writer.println( diff --git a/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomMU.scala b/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomMU.scala index 3b7368c26..2a0d3ba72 100644 --- a/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomMU.scala +++ b/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/CustomMU.scala @@ -15,41 +15,90 @@ private object CustomMU { println("Creating " + classFile) val file: File = new File(classFile) val writer: PrintWriter = new PrintWriter(file) + + val linkFilteredFields = fields.filter(f => f.linkMetaData.isDefined) + val validateFilteredFields = + fields.filter(f => f.validateMetaData.isDefined) + val matchFilteredFields = fields.filter(f => f.matchMetaData.isDefined) + + val sendToEM = + (linkFilteredFields.length + validateFilteredFields.length + matchFilteredFields.length) > 0 + writer.print(s"""package $packageSharedModels; | |import com.fasterxml.jackson.annotation.JsonInclude; | |@JsonInclude(JsonInclude.Include.NON_NULL) |public record $customClassName(String tag, - |""".stripMargin) - val margin = 23 - val filteredFields = fields.filter(f => f.linkMetaData.isDefined) - if (filteredFields.length == 0) - writer.println(s""" Probability dummy) { - | - | public static final Boolean SEND_INTERACTIONS_TO_EM = false; - |""".stripMargin) + | CustomLinkMU customLinkMU, + | CustomValidateMU customValidateMU, + | CustomMatchMU customMatchMU) { + | + | public static final Boolean SEND_INTERACTIONS_TO_EM = ${ + if (sendToEM) "true" else "false" + }; + | + | public record Probability(float m, float u) { + | } + | + | public record CustomLinkMU(""".stripMargin) + val linkMargin = 30 + if (linkFilteredFields.length == 0) + writer.println(s"""Probability dummy) { + | }""".stripMargin) + else + linkFilteredFields.zipWithIndex.foreach { case (f, i) => + val parameterName = Utils.snakeCaseToCamelCase(f.fieldName) + writer.print(" " * (if (i == 0) 0 else linkMargin)) + writer.print(s"Probability $parameterName") + if (i + 1 < linkFilteredFields.length) writer.println(",") + else + writer.println(s""") { + | }""".stripMargin) + end if + } + end if + + writer.print(s""" + | public record CustomValidateMU(""".stripMargin) + val validateMargin = 34 + if (validateFilteredFields.length == 0) + writer.println(s"""Probability dummy) { + | }""".stripMargin) else - filteredFields.zipWithIndex.foreach { case (f, i) => + validateFilteredFields.zipWithIndex.foreach { case (f, i) => val parameterName = Utils.snakeCaseToCamelCase(f.fieldName) - writer.print(" " * margin) + writer.print(" " * (if (i == 0) 0 else validateMargin)) writer.print(s"Probability $parameterName") - if (i + 1 < filteredFields.length) writer.println(",") + if (i + 1 < validateFilteredFields.length) writer.println(",") else - writer.println(") {") - writer.print( - s""" - | public static final Boolean SEND_INTERACTIONS_TO_EM = true; - |""".stripMargin - ) + writer.println(s""") { + | }""".stripMargin) end if } end if + + writer.print(s""" + | public record CustomMatchMU(""".stripMargin) + val matchMargin = 31 + if (matchFilteredFields.length == 0) + writer.println(s"""Probability dummy) { + | }""".stripMargin) + else + matchFilteredFields.zipWithIndex.foreach { case (f, i) => + val parameterName = Utils.snakeCaseToCamelCase(f.fieldName) + writer.print(" " * (if (i == 0) 0 else matchMargin)) + writer.print(s"Probability $parameterName") + if (i + 1 < matchFilteredFields.length) writer.println(",") + else + writer.println(s""") { + | }""".stripMargin) + end if + } + end if + writer.println() - writer.println(s""" public record Probability(float m, float u) { - | } - | - |}""".stripMargin) + writer.println(s"""}""".stripMargin) writer.flush() writer.close() end generate diff --git a/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/ScalaCustomMU.scala b/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/ScalaCustomMU.scala index 258c23eed..2b3fe19fd 100644 --- a/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/ScalaCustomMU.scala +++ b/JeMPI_Apps/JeMPI_Configuration/src/main/scala/configuration/ScalaCustomMU.scala @@ -12,29 +12,77 @@ object ScalaCustomMU { def generate(config: Config): Any = { - def fieldDefs(): String = + def linkFieldDefs(): String = config.demographicFields .filter(f => f.linkMetaData.isDefined) .map(f => { val fieldName = Utils.snakeCaseToCamelCase(f.fieldName) - s"""${" " * 4}${fieldName}: Probability,""" + s"""${" " * 2}${fieldName}: Probability,""" }) .mkString(sys.props("line.separator")) .trim .dropRight(1) - end fieldDefs + end linkFieldDefs - def probSeqDefs(): String = + def validateFieldDefs(): String = + config.demographicFields + .filter(f => f.validateMetaData.isDefined) + .map(f => { + val fieldName = Utils.snakeCaseToCamelCase(f.fieldName) + s"""${" " * 2}${fieldName}: Probability,""" + }) + .mkString(sys.props("line.separator")) + .trim + .dropRight(1) + end validateFieldDefs + + def matchFieldDefs(): String = + config.demographicFields + .filter(f => f.matchMetaData.isDefined) + .map(f => { + val fieldName = Utils.snakeCaseToCamelCase(f.fieldName) + s"""${" " * 2}${fieldName}: Probability,""" + }) + .mkString(sys.props("line.separator")) + .trim + .dropRight(1) + end matchFieldDefs + + def linkProbSeqDefs(): String = config.demographicFields .filter(f => f.linkMetaData.isDefined) .zipWithIndex .map((f, i) => { - s"""${" " * 12}Probability(muSeq.apply(${i}).m, muSeq.apply(${i}).u),""" + s"""${" " * 8}Probability(muSeqLink.apply(${i}).m, muSeqLink.apply(${i}).u),""" + }) + .mkString(sys.props("line.separator")) + .trim + .dropRight(1) + end linkProbSeqDefs + + def validateProbSeqDefs(): String = + config.demographicFields + .filter(f => f.validateMetaData.isDefined) + .zipWithIndex + .map((f, i) => { + s"""${" " * 8}Probability(muSeqValidate.apply(${i}).m, muSeqValidate.apply(${i}).u),""" }) .mkString(sys.props("line.separator")) .trim .dropRight(1) - end probSeqDefs + end validateProbSeqDefs + + def matchProbSeqDefs(): String = + config.demographicFields + .filter(f => f.matchMetaData.isDefined) + .zipWithIndex + .map((f, i) => { + s"""${" " * 8}Probability(muSeqMatch.apply(${i}).m, muSeqMatch.apply(${i}).u),""" + }) + .mkString(sys.props("line.separator")) + .trim + .dropRight(1) + end matchProbSeqDefs val classFile: String = classLocation + File.separator + custom_className + ".scala" @@ -54,17 +102,39 @@ object ScalaCustomMU { |import scala.collection.immutable.ArraySeq | |case class ${custom_className}( - | tag: String, - | ${fieldDefs()} + | tag: String, + | customLinkMU: CustomLinkMU, + | customValidateMU: CustomValidateMU, + | customMatchMU: CustomMatchMU + |) + | + |case class CustomLinkMU( + | ${linkFieldDefs()} + |) + | + |case class CustomValidateMU( + | ${validateFieldDefs()} + |) + | + |case class CustomMatchMU( + | ${matchFieldDefs()} |) | |object ${custom_className} { | - | def fromArraySeq(tag: String, muSeq: ArraySeq[MU]): CustomMU = - | CustomMU( - | tag, - | ${probSeqDefs()} - | ) + | def fromArraySeq(tag: String, muSeqLink: ArraySeq[MU], muSeqValidate: ArraySeq[MU], muSeqMatch: ArraySeq[MU]): CustomMU = + | CustomMU( + | tag, + | CustomLinkMU( + | ${linkProbSeqDefs()} + | ), + | CustomValidateMU( + | ${validateProbSeqDefs()} + | ), + | CustomMatchMU( + | ${matchProbSeqDefs()} + | ) + | ) | |} |""".stripMargin) diff --git a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Scala.scala b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Scala.scala index c2e8c7298..ac46ca79c 100644 --- a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Scala.scala +++ b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Scala.scala @@ -5,6 +5,7 @@ import com.fasterxml.jackson.module.scala.{ ClassTagExtensions, DefaultScalaModule } + import com.typesafe.scalalogging.LazyLogging import org.apache.kafka.common.serialization.{Serde, Serdes} import org.apache.kafka.streams.kstream.{Consumed, KStream} @@ -92,14 +93,33 @@ object EM_Scala extends LazyLogging { interactions.map((fields: Array[String]) => ArraySeq.unsafeWrapArray(fields) ) - val (mu, ms) = Profile.profile(EM_Task.run(interactions_)) + var linkResults: (ArraySeq[MU], Double) = (null, 0.0) + var validateResults: (ArraySeq[MU], Double) = (null, 0.0) + var matchResults: (ArraySeq[MU], Double) = (null, 0.0) + + if (CustomFields.LINK_COLS.length > 1) { + linkResults = + Profile.profile(EM_Task.run(CustomFields.LINK_COLS, interactions_)) + } + if (CustomFields.VALIDATE_COLS.length > 1) { + validateResults = Profile.profile( + EM_Task.run(CustomFields.VALIDATE_COLS, interactions_) + ) + } + if (CustomFields.MATCH_COLS.length > 1) { + matchResults = + Profile.profile(EM_Task.run(CustomFields.MATCH_COLS, interactions_)) + } for (i <- LINK_COLS.indices) { - Utils.printMU(CustomFields.FIELDS.apply(LINK_COLS.apply(i)).name, mu(i)) + Utils.printMU( + CustomFields.FIELDS.apply(LINK_COLS.apply(i)).name, + linkResults._1(i) + ) } - logger.info(s"$ms ms") - Producer.send(tag, mu); + logger.info(s"${linkResults._2} ms") + Producer.send(tag, linkResults._1, validateResults._1, matchResults._1); } } diff --git a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Task.scala b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Task.scala index 7cdcd51ed..d487e035e 100644 --- a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Task.scala +++ b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/EM_Task.scala @@ -1,7 +1,7 @@ package org.jembi.jempi.em import com.typesafe.scalalogging.LazyLogging -import org.jembi.jempi.em.CustomFields.{FIELDS, LINK_COLS} +import org.jembi.jempi.em.CustomFields.FIELDS import org.jembi.jempi.em.Utils._ import java.lang.Math.log @@ -12,11 +12,14 @@ import scala.util.Random object EM_Task extends LazyLogging { - def run(interactions: ParVector[ArraySeq[String]]): ArraySeq[MU] = { + def run( + xxxCols: ArraySeq[Int], + interactions: ParVector[ArraySeq[String]] + ): ArraySeq[MU] = { val (gamma, ms2) = Profile.profile( Gamma.getGamma( - CustomFields.LINK_COLS, + xxxCols, Map[String, Long](), interactions.head, interactions.tail @@ -51,7 +54,7 @@ object EM_Task extends LazyLogging { randIndexes.map(idx => interactions(idx)).toVector ) val (tallies2, ms1) = Profile.profile( - scan(isPairMatch2(0.92), randInteractions) + scan(xxxCols, isPairMatch2(0.92), randInteractions) ) val lockedU = computeMU(tallies2) FIELDS.zipWithIndex.foreach(x => @@ -62,14 +65,20 @@ object EM_Task extends LazyLogging { ) ) logger.info(s"$ms1 ms") - runEM(0, lockedU.map(x => MU(0.8, x.u)), gamma) + runEM(xxxCols, 0, lockedU.map(x => MU(0.8, x.u)), gamma) } else { - runEM(0, for { _ <- FIELDS } yield MU(m = 0.8, u = 0.0001), gamma) + runEM( + xxxCols, + 0, + for { _ <- FIELDS } yield MU(m = 0.8, u = 0.0001), + gamma + ) } } @tailrec private def runEM( + xxxCols: ArraySeq[Int], iterations: Int, currentMU: ArraySeq[MU], gamma: Map[String, Long] @@ -141,24 +150,25 @@ object EM_Task extends LazyLogging { gamma_.map(x => x._1 -> computeGammaMetrics(x._2._1, x._2._2)) val tallies = mapGammaMetrics.values .map(x => x.tallies) - .fold(Tallies())((x, y) => addTallies(x, y)) + .fold(new Tallies(xxxCols.length))((x, y) => addTallies(x, y)) val newMU = computeMU(tallies) - for (i <- LINK_COLS.indices) { + for (i <- xxxCols.indices) { printTalliesAndMU( - FIELDS.apply(LINK_COLS.apply(i)).name, + FIELDS.apply(xxxCols.apply(i)).name, tallies.colTally(i), newMU(i) ) } if (LOCK_U) { - runEM(iterations + 1, mergeMU(newMU, currentMU), gamma) + runEM(xxxCols, iterations + 1, mergeMU(newMU, currentMU), gamma) } else { - runEM(iterations + 1, newMU, gamma) + runEM(xxxCols, iterations + 1, newMU, gamma) } } } private def scan( + xxxCols: ArraySeq[Int], isMatch: (ArraySeq[String], ArraySeq[String]) => ContributionSplit, interactions: ParVector[ArraySeq[String]] ): Tallies = { @@ -199,7 +209,7 @@ object EM_Task extends LazyLogging { ): Tallies = { interactions .map(right => tallyFieldsContribution(left, right)) - .fold(Tallies()) { (x, y) => addTallies(x, y) } + .fold(new Tallies(xxxCols.length)) { (x, y) => addTallies(x, y) } } if (right.isEmpty) { @@ -214,7 +224,7 @@ object EM_Task extends LazyLogging { } - outerLoop(new Tallies, interactions.head, interactions.tail) + outerLoop(new Tallies(xxxCols.length), interactions.head, interactions.tail) } } diff --git a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Tallies.scala b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Tallies.scala index cd6893980..b5d43712e 100644 --- a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Tallies.scala +++ b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Tallies.scala @@ -2,4 +2,8 @@ package org.jembi.jempi.em import scala.collection.immutable.ArraySeq -case class Tallies(colTally: ArraySeq[Tally] = CustomFields.LINK_COLS.map(_ => Tally())) +case class Tallies(colTally: ArraySeq[Tally]) { + + def this(n: Int) = this(ArraySeq.range(0, n).map(_ => Tally())) + +} diff --git a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Utils.scala b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Utils.scala index 6cfccb746..df657b1f0 100644 --- a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Utils.scala +++ b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/Utils.scala @@ -2,7 +2,6 @@ package org.jembi.jempi.em import com.typesafe.scalalogging.LazyLogging import Jaro.jaro -import org.jembi.jempi.em.CustomFields.{FIELDS, LINK_COLS} import scala.collection.immutable.ArraySeq diff --git a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/CustomMU.scala b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/CustomMU.scala index e08e6ae4b..d35ba6e8c 100644 --- a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/CustomMU.scala +++ b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/CustomMU.scala @@ -6,29 +6,51 @@ import org.jembi.jempi.em.MU import scala.collection.immutable.ArraySeq case class CustomMU( - tag: String, - givenName: Probability, - familyName: Probability, - gender: Probability, - dob: Probability, - city: Probability, - phoneNumber: Probability, - nationalId: Probability + tag: String, + customLinkMU: CustomLinkMU, + customValidateMU: CustomValidateMU, + customMatchMU: CustomMatchMU +) + +case class CustomLinkMU( + givenName: Probability, + familyName: Probability, + gender: Probability, + dob: Probability, + city: Probability, + phoneNumber: Probability, + nationalId: Probability +) + +case class CustomValidateMU( + +) + +case class CustomMatchMU( + ) object CustomMU { - def fromArraySeq(tag: String, muSeq: ArraySeq[MU]): CustomMU = - CustomMU( - tag, - Probability(muSeq.apply(0).m, muSeq.apply(0).u), - Probability(muSeq.apply(1).m, muSeq.apply(1).u), - Probability(muSeq.apply(2).m, muSeq.apply(2).u), - Probability(muSeq.apply(3).m, muSeq.apply(3).u), - Probability(muSeq.apply(4).m, muSeq.apply(4).u), - Probability(muSeq.apply(5).m, muSeq.apply(5).u), - Probability(muSeq.apply(6).m, muSeq.apply(6).u) - ) + def fromArraySeq(tag: String, muSeqLink: ArraySeq[MU], muSeqValidate: ArraySeq[MU], muSeqMatch: ArraySeq[MU]): CustomMU = + CustomMU( + tag, + CustomLinkMU( + Probability(muSeqLink.apply(0).m, muSeqLink.apply(0).u), + Probability(muSeqLink.apply(1).m, muSeqLink.apply(1).u), + Probability(muSeqLink.apply(2).m, muSeqLink.apply(2).u), + Probability(muSeqLink.apply(3).m, muSeqLink.apply(3).u), + Probability(muSeqLink.apply(4).m, muSeqLink.apply(4).u), + Probability(muSeqLink.apply(5).m, muSeqLink.apply(5).u), + Probability(muSeqLink.apply(6).m, muSeqLink.apply(6).u) + ), + CustomValidateMU( + + ), + CustomMatchMU( + + ) + ) } diff --git a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/Producer.scala b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/Producer.scala index c27a95679..0b9f32d41 100644 --- a/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/Producer.scala +++ b/JeMPI_Apps/JeMPI_EM_Scala/src/main/scala/org/jembi/jempi/em/kafka/Producer.scala @@ -18,7 +18,7 @@ import scala.collection.immutable.ArraySeq object Producer { - def send(tag: String, muSeq: ArraySeq[MU]): Unit = { + def send(tag: String, muSeqLink: ArraySeq[MU], muSeqValidate: ArraySeq[MU], muSeqMatch: ArraySeq[MU]): Unit = { val mapper = new ObjectMapper() with ClassTagExtensions mapper.registerModule(DefaultScalaModule) mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) @@ -36,7 +36,7 @@ object Producer { val producer = new KafkaProducer[String, String](props) - val customMU = CustomMU.fromArraySeq(tag, muSeq) + val customMU = CustomMU.fromArraySeq(tag, muSeqLink, muSeqValidate, muSeqMatch) val json = mapper.writeValueAsString(customMU) diff --git a/JeMPI_Apps/JeMPI_LibShared/src/main/java/org/jembi/jempi/shared/models/CustomMU.java b/JeMPI_Apps/JeMPI_LibShared/src/main/java/org/jembi/jempi/shared/models/CustomMU.java index 412bf46e9..e28603982 100644 --- a/JeMPI_Apps/JeMPI_LibShared/src/main/java/org/jembi/jempi/shared/models/CustomMU.java +++ b/JeMPI_Apps/JeMPI_LibShared/src/main/java/org/jembi/jempi/shared/models/CustomMU.java @@ -4,17 +4,28 @@ @JsonInclude(JsonInclude.Include.NON_NULL) public record CustomMU(String tag, - Probability givenName, - Probability familyName, - Probability gender, - Probability dob, - Probability city, - Probability phoneNumber, - Probability nationalId) { + CustomLinkMU customLinkMU, + CustomValidateMU customValidateMU, + CustomMatchMU customMatchMU) { public static final Boolean SEND_INTERACTIONS_TO_EM = true; public record Probability(float m, float u) { } + public record CustomLinkMU(Probability givenName, + Probability familyName, + Probability gender, + Probability dob, + Probability city, + Probability phoneNumber, + Probability nationalId) { + } + + public record CustomValidateMU(Probability dummy) { + } + + public record CustomMatchMU(Probability dummy) { + } + } diff --git a/JeMPI_Apps/JeMPI_Linker/src/main/java/org/jembi/jempi/linker/backend/CustomLinkerProbabilistic.java b/JeMPI_Apps/JeMPI_Linker/src/main/java/org/jembi/jempi/linker/backend/CustomLinkerProbabilistic.java index 34e97b75b..dbdc3a43f 100644 --- a/JeMPI_Apps/JeMPI_Linker/src/main/java/org/jembi/jempi/linker/backend/CustomLinkerProbabilistic.java +++ b/JeMPI_Apps/JeMPI_Linker/src/main/java/org/jembi/jempi/linker/backend/CustomLinkerProbabilistic.java @@ -86,21 +86,21 @@ static float matchNotificationProbabilisticScore( return 0.0F; } public static void updateMU(final CustomMU mu) { - if (mu.givenName().m() > mu.givenName().u() - && mu.familyName().m() > mu.familyName().u() - && mu.gender().m() > mu.gender().u() - && mu.dob().m() > mu.dob().u() - && mu.city().m() > mu.city().u() - && mu.phoneNumber().m() > mu.phoneNumber().u() - && mu.nationalId().m() > mu.nationalId().u()) { + if (mu.customLinkMU().givenName().m() > mu.customLinkMU().givenName().u() + && mu.customLinkMU().familyName().m() > mu.customLinkMU().familyName().u() + && mu.customLinkMU().gender().m() > mu.customLinkMU().gender().u() + && mu.customLinkMU().dob().m() > mu.customLinkMU().dob().u() + && mu.customLinkMU().city().m() > mu.customLinkMU().city().u() + && mu.customLinkMU().phoneNumber().m() > mu.customLinkMU().phoneNumber().u() + && mu.customLinkMU().nationalId().m() > mu.customLinkMU().nationalId().u()) { updatedLinkFields = new LinkFields( - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.givenName().m(), mu.givenName().u()), - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.familyName().m(), mu.familyName().u()), - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.gender().m(), mu.gender().u()), - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.dob().m(), mu.dob().u()), - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.city().m(), mu.city().u()), - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.phoneNumber().m(), mu.phoneNumber().u()), - new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.nationalId().m(), mu.nationalId().u())); + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().givenName().m(), mu.customLinkMU().givenName().u()), + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().familyName().m(), mu.customLinkMU().familyName().u()), + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().gender().m(), mu.customLinkMU().gender().u()), + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().dob().m(), mu.customLinkMU().dob().u()), + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().city().m(), mu.customLinkMU().city().u()), + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().phoneNumber().m(), mu.customLinkMU().phoneNumber().u()), + new LinkerProbabilistic.Field(JARO_WINKLER_SIMILARITY, List.of(0.92F), mu.customLinkMU().nationalId().m(), mu.customLinkMU().nationalId().u())); } } @@ -110,6 +110,7 @@ public static void checkUpdatedLinkMU() { CustomLinkerProbabilistic.currentLinkFields = updatedLinkFields; updatedLinkFields = null; } + } } diff --git a/devops/linux/docker/helper/log/logs-jempi-linker.sh b/devops/linux/docker/helper/log/logs-jempi-linker.sh index 451f24ccc..58e374a16 100755 --- a/devops/linux/docker/helper/log/logs-jempi-linker.sh +++ b/devops/linux/docker/helper/log/logs-jempi-linker.sh @@ -4,5 +4,6 @@ set -e set -u source ../../conf.env -docker service logs --follow --raw ${STACK_NAME}_linker +#docker service logs --follow --raw ${STACK_NAME}_linker +docker service logs --follow --raw ${STACK_NAME}_linker | lnav echo