Skip to content

Commit

Permalink
[New] Make sure that only transformed datasets are deduplicated
Browse files Browse the repository at this point in the history
I.e. do not touch dataset that does not start with "http://onto.fel.cvut.cz/dataset--20"
  • Loading branch information
blcham committed Oct 10, 2023
1 parent 281386a commit 3e4ee07
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ NETRC_FILE=../private/netrc
SERVER_URL="https://graphdb.onto.fel.cvut.cz"
GRAPHDB_REPOSITORY_URL="${SERVER_URL}/repositories/${REPOSITORY_ID}"

curl --netrc-file "$NETRC_FILE" $GRAPHDB_REPOSITORY_URL/contexts 2>/dev/null | tail +2 | sed -e "s/\r//g" | sort -r > private/graphs.csv
curl --netrc-file "$NETRC_FILE" $GRAPHDB_REPOSITORY_URL/contexts 2>/dev/null | grep dataset--20 | tail +2 | sed -e "s/\r//g" | sort -r > private/graphs.csv

cat private/graphs.csv | while read GRAPH; do
rm ./queries/deduplicate-triples.uq
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ DELETE {
GRAPH ?olderG {
?s ?p ?o .
}
FILTER(strstarts(str(?olderG),"http://onto.fel.cvut.cz/dataset--20"))
FILTER(str(?olderG) < str(?newerG))
} LIMIT 100000
}
Expand Down

0 comments on commit 3e4ee07

Please sign in to comment.