forked from larsjuhljensen/tagger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
environments.cxx
47 lines (40 loc) · 1.44 KB
/
environments.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include "document.h"
#include "match_handlers.h"
#include "batch_tagger.h"
using namespace std;
class SimpleBatchHandler : public BatchHandler
{
public:
void on_match(Document& document, Match* match)
{
char replaced = document.text[match->stop+1];
document.text[match->stop+1] = '\0';
Entity* entity = match->entities;
for (int i = 0; i < match->size; i++) {
if (entity->id.serial >= 10000 && entity->id.serial < 10200) {
printf("%s\t%d\t%d\t%s\tENVO:%07d\n", document.name, match->start, match->stop, (const char*)(document.text+match->start), entity->id.serial);
}
else {
printf("%s\t%d\t%d\t%s\tENVO:%08d\n", document.name, match->start, match->stop, (const char*)(document.text+match->start), entity->id.serial);
}
entity++;
}
document.text[match->stop+1] = replaced;
};
};
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char *argv[])
{
assert(argc >= 2);
BatchTagger batch_tagger;
batch_tagger.load_names(-27, "environments_names.tsv");
batch_tagger.load_groups(-27, "environments_groups.tsv");
batch_tagger.load_global("environments_global.tsv");
DirectoryDocumentReader document_reader = DirectoryDocumentReader(argv[1]);
GetMatchesParams params;
params.auto_detect = false;
params.entity_types.push_back(-27);
params.max_tokens = 6;
SimpleBatchHandler batch_handler;
batch_tagger.process(&document_reader, params, &batch_handler);
}