Skip to content

Commit

Permalink
Added a 'kingdom' filter (closes #8).
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav committed Oct 30, 2013
1 parent fd1a999 commit c29676f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
1 change: 1 addition & 0 deletions reconciliation-wrapper/README.mediawiki
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* 0.1-dev10 (October 29, 2013)
** Fixed URLs which still referred to uat.gbif.org.
** Went back to the API call which searches for the query name across all of GBIF.
** Added a 'kingdom'-based filter to pick a filter.
* 0.1-dev9 (October 15, 2013)
** Fixed a bug which caused 500 errors with names containing UTF-8 characters.
** Changed URLs to GBIF API v0.9.
Expand Down
40 changes: 38 additions & 2 deletions reconciliation-wrapper/agrew.pl
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,26 @@ sub process_query {
my %query = %$query_ref;

# Right now, we only use 'query'. Look up https://github.com/OpenRefine/OpenRefine/wiki/Reconciliation-Service-API#single-query-mode for other options.
# Ideas:
# - use 'Family' for high-level filtering.
my $name = $query{'query'};
say STDERR "Query: '$name'";

# Load up properties; once we're done, you can access
# $properties{lc 'p'} = $properties{lc 'pid'} = $value;
my %properties;
if(exists $query{'properties'}) {
my @props = @{$query{'properties'}};

foreach my $prop (@props) {
my $value = $prop->{'v'};
$properties{lc $prop->{'p'}} = $value
if exists $prop->{'p'};
$properties{lc $prop->{'pid'}} = $value
if exists $prop->{'pid'};
}
}

# say STDERR "Props: " . Dumper(\%properties);

my $request_time_start = time;
my @results = get_gbif_match_all($name);
my $time_taken = (time - $request_time_start);
Expand All @@ -133,10 +148,31 @@ sub process_query {
printf STDERR " Retrieved %d matches for '$name' in %.4f ms.\n", (scalar @results), $time_taken*1000;
}

# Filter out on the basis of kingdom.
my @filtered;
if(exists($properties{'kingdom'}) and ($properties{'kingdom'} ne '')) {
my $kingdom = $properties{'kingdom'};

foreach my $result (@results) {
if(not exists $result->{'kingdom'}) {
push @filtered, $result;
} elsif(lc($result->{'kingdom'}) eq lc($kingdom)) {
push @filtered, $result;
} else {
# Filter it out.
# push @filtered, $result;
}
}

@results = @filtered;
}

# Summarize results.
my @summarized = summarize_name_usages(@results);
$time_taken = (time - $request_time_start);
printf STDERR " Summarized '$name' to %d matches in %.4f ms.\n", (scalar @summarized), $time_taken*1000;

# Sort results.
my @sorted_results = sort { $b->{'score'} <=> $a->{'score'} } @summarized;

# Add a dummy result so we know that all results are getting through.
Expand Down

0 comments on commit c29676f

Please sign in to comment.