{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"blink","owner":"cleanzr","isFork":false,"description":"This is main code for Steorts (2015), which is also on CRAN. Please cite the paper/code if you find this useful.","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-10T00:23:14.680Z"}},{"type":"Public","name":"exchanger","owner":"cleanzr","isFork":false,"description":"Bayesian Entity Resolution with Exchangeable Random Partition Priors","allTopics":["clustering","record-linkage","entity-resolution","bayesian-inference","r-package","mcmc"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-07T22:44:01.336Z"}},{"type":"Public","name":"clevr","owner":"cleanzr","isFork":false,"description":"Clustering and Link Prediction Evaluation in R","allTopics":["record-linkage","entity-resolution","r-package","evaluation-metrics","clustering-evaluation","link-prediction"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":1,"starsCount":10,"forksCount":3,"license":"GNU General Public License v2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-23T11:38:07.616Z"}},{"type":"Public","name":"representr","owner":"cleanzr","isFork":false,"description":"Create representative records post-record linkage","allTopics":["record-linkage","post-linkage-analysis","downstream-tasks"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-05T18:26:10.155Z"}},{"type":"Public","name":"exchanger-experiments","owner":"cleanzr","isFork":false,"description":"Scripts for reproducing the experiments in our JSSAM article on Bayesian Graphical Entity Resolution","allTopics":[],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-01-24T06:57:53.376Z"}},{"type":"Public","name":"microclustr","owner":"cleanzr","isFork":false,"description":"Package for Betancourt, Zanella, and Steorts","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":1,"issueCount":0,"starsCount":2,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-08-22T21:36:29.513Z"}},{"type":"Public","name":"dblink-experiments","owner":"cleanzr","isFork":false,"description":"Details for reproducing the experiments in our d-blink paper","allTopics":["entity-resolution","reproducible-experiments"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-10T05:24:02.002Z"}},{"type":"Public","name":"dblinkR","owner":"cleanzr","isFork":false,"description":"An R interface for the dblink Spark application","allTopics":["record-linkage","entity-resolution","r-package"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":2,"starsCount":5,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-10T05:20:01.937Z"}},{"type":"Public","name":"dblink","owner":"cleanzr","isFork":false,"description":"Distributed Bayesian Entity Resolution in Apache Spark","allTopics":["apache-spark","record-linkage","entity-resolution","bayesian-inference","mcmc","distributed-machine-learning"],"primaryLanguage":{"name":"Scala","color":"#c22d40"},"pullRequestCount":0,"issueCount":4,"starsCount":57,"forksCount":9,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-10T05:09:09.107Z"}},{"type":"Public","name":"italy","owner":"cleanzr","isFork":false,"description":"A sample survey conducted by the Bank of Italy every two years containing duplicated data.","allTopics":["data","linkage"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-04-19T16:09:05.694Z"}},{"type":"Public","name":"restaurant","owner":"cleanzr","isFork":false,"description":"Restaurant data set for entity resolution","allTopics":["data","linkage"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-04-19T16:07:05.194Z"}},{"type":"Public","name":"cora","owner":"cleanzr","isFork":false,"description":"Cora data set for Entity Resolution","allTopics":["data","linkage"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":2,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-04-19T16:05:59.568Z"}},{"type":"Public","name":"klsh","owner":"cleanzr","isFork":false,"description":"Blocking for record linkage","allTopics":[],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-04-19T16:03:08.810Z"}},{"type":"Public","name":"tlsh","owner":"cleanzr","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-04-19T15:59:02.889Z"}},{"type":"Public","name":"BDD","owner":"cleanzr","isFork":false,"description":"Duplicate detection in R using a Bayesian partitioning approach ","allTopics":["record-linkage","entity-resolution","bayesian-methods","r-package","deduplication","duplicate-detection"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":1,"starsCount":1,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-01-24T10:12:04.697Z"}},{"type":"Public","name":"record-linkage-tutorial","owner":"cleanzr","isFork":false,"description":"A tutorial on entity resolution (record linkage or de-duplication)","allTopics":[],"primaryLanguage":{"name":"TeX","color":"#3D6117"},"pullRequestCount":0,"issueCount":0,"starsCount":61,"forksCount":15,"license":"GNU General Public License v2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-06-30T02:32:12.693Z"}},{"type":"Public","name":"posters","owner":"cleanzr","isFork":false,"description":"Posters on Data Cleanzing","allTopics":[],"primaryLanguage":{"name":"TeX","color":"#3D6117"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-12-04T21:12:08.152Z"}},{"type":"Public","name":"fasthash","owner":"cleanzr","isFork":false,"description":"Performs unique entity estimation corresponding to Chen, Shrivastava, Steorts (2018). ","allTopics":["hashing","record-linkage","entity-resolution","blocking","entity-estimation"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":14,"forksCount":3,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-02-21T02:21:53.890Z"}},{"type":"Public","name":"RLdata","owner":"cleanzr","isFork":false,"description":"","allTopics":["entity-resolution","data-set"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":1,"issueCount":0,"starsCount":1,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-31T18:31:46.095Z"}},{"type":"Public","name":"cd","owner":"cleanzr","isFork":false,"description":"CD dataset for Entity Resolution","allTopics":["data","linkage"],"primaryLanguage":{"name":"R","color":"#198CE7"},"pullRequestCount":0,"issueCount":1,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-26T13:47:04.633Z"}},{"type":"Public","name":"smered","owner":"cleanzr","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"GNU Lesser General Public License v2.1","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-04-04T18:22:50.352Z"}}],"repositoryCount":21,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"cleanzr repositories"}