diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f07556b3..a020b36b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,6 +15,11 @@ # * Uses site:stage to collect the documentation for multi-module projects. # * Publishes the documentation for `master` branch. + +services: + - name: docker:dind + command: ["--tls=false"] + variables: # This will suppress any download for dependencies and plugins or upload messages which would clutter the console log. # `showDateTime` will show the passed time in milliseconds. You need to specify `--batch-mode` to make this work. @@ -23,6 +28,12 @@ variables: # when running from the command line. # `installAtEnd` and `deployAtEnd` are only effective with recent version of the corresponding plugins. MAVEN_CLI_OPTS: "--batch-mode --errors --fail-at-end --show-version -DinstallAtEnd=false -DdeployAtEnd=false" + # Instruct Testcontainers to use the daemon of DinD, use port 2735 for non-tls connections. + DOCKER_HOST: "tcp://docker:2375" + # Instruct Docker not to start over TLS. + DOCKER_TLS_CERTDIR: "" + # Improve performance with overlayfs. + DOCKER_DRIVER: overlay2 # Cache downloaded dependencies and plugins between builds. # To keep cache across branches add 'key: "$CI_JOB_NAME"' @@ -30,7 +41,7 @@ cache: paths: - .m2/repository -# This will only the project. + # This will only the project. .build: &build stage: build script: diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b39326f..de5c19aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [2.5.0] - 2023-06-02 + +### Added +* Support for relational databases using JDBC. +* Parameter `parallelism` setting the maximum number of parallel operations. +* Script `change-version.sh` to update the version of RMLStreamer in required files. + +### Fixed +* Updated Function Agent to v1.1.0 +* Updated GREL Functions to v0.9.1 +* Updated IDLab Functions to v0.2.0 +* Use `` property in `pom.xml` to set Java version to 11. +* Allow a relative path (to the working dir) as output directory when writing to file. +* Fixed bug in extracting namespaces from XML element (internal [issue #161](https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/161)) + ## [2.4.2] - 2022-10-10 ### Fixed @@ -214,3 +229,4 @@ can be set with the program argument `--baseIRI`. [2.4.0]: https://github.com/RMLio/RMLStreamer/compare/v2.3.0...v2.4.0 [2.4.1]: https://github.com/RMLio/RMLStreamer/compare/v2.4.0...v2.4.1 [2.4.2]: https://github.com/RMLio/RMLStreamer/compare/v2.4.1...v2.4.2 +[2.5.0]: https://github.com/RMLio/RMLStreamer/compare/v2.4.2...v2.5.0 diff --git a/README.md b/README.md index 8d7e32e0..8a530a2f 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,24 @@ If you go to the directory where your data and mappings are, you can run something like (change tag to appropriate version): ``` -$ docker run -v $PWD:/data --rm rmlstreamer:2.4.1 toFile -m /data/mapping.ttl -o /data/output.ttl +$ docker run -v $PWD:/data --rm rmlstreamer:v2.5.0 toFile -m /data/mapping.ttl -o /data/output.ttl +``` + +There are more options for the script, if you want to use specific tags or push to Docker Hub: +``` +$ ./buildDocker.sh -h + +Build and push Docker images for RMLStreamer + +buildDocker.sh [-h] +buildDocker.sh [-a][-n][-p][-u ][-v ] +options: +-a Build for platforms linux/arm64 and linux/amd64. Default: perform a standard 'docker build' +-h Print this help and exit. +-n Do NOT (re)build RMLStreamer before building the Docker image. This is risky because the Docker build needs a stand-alone version of RMLStreamer. +-u Add an username name to the tag name as on Docker Hub, like /rmlstreamer:. +-p Push to Docker Hub repo. You must be logged in for this to succeed. +-v Override the version in the tag name, like /rmlstreamer:. If not given, use the current version found in pom.xml. ``` ### Moderately quick start (Docker - the recommended way) @@ -98,6 +115,9 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be $ mvn clean package -DskipTests -P 'stand-alone' ``` +**Note**: If you want to update the version of RMLStreamer (e.g. when developing or releasing), run the script +`change-version.sh `. It updates the version on relevant places in the repository. + ### Executing RML Mappings *This section assumes the use of a CLI. If you want to use Flink's web interface, check out @@ -134,22 +154,26 @@ $FLINK_BIN run toKafka --broker-list --top #### Complete RMLStreamer usage: ``` -Usage: RMLStreamer [toFile|toKafka|toTCPSocket|noOutput] [options] +Usage: RMLStreamer [toFile|toKafka|toTCPSocket|toMQTT|noOutput] [options] - -f, --function-descriptions ,... - An optional list of paths to function description files (in RDF using FnO). A path can be a file location or a URL. -j, --job-name The name to assign to the job on the Flink cluster. Put some semantics in here ;) -i, --base-iri The base IRI as defined in the R2RML spec. --disable-local-parallel By default input records are spread over the available task slots within a task manager to optimise parallel processing,at the cost of losing the order of the records throughout the process. This option disables this behaviour to guarantee that the output order is the same as the input order. + -p, --parallelism + Sets the maximum operator parallelism (~nr of task slots used) -m, --mapping-file REQUIRED. The path to an RML mapping file. The path must be accessible on the Flink cluster. --json-ld Write the output as JSON-LD instead of N-Quads. An object contains all RDF generated from one input record. Note: this is slower than using the default N-Quads format. --bulk Write all triples generated from one input record at once, instead of writing triples the moment they are generated. --checkpoint-interval