Skip to content

Commit

Permalink
Feature: Timestamp support for JSONSchema generated schemas (#95)
Browse files Browse the repository at this point in the history
Problem
=======

Closes: #93 

Solution
========

Add checks for `format == 'date-time`` inside the string type check
for`string` and `array of string`

Change summary:
---------------
* Added format check for string fields ( and string arrays )
* Check for JSONSchema property `format` with value `date-time`
* Updated jsonschema tests and updated the snapshots

Steps to Verify:
----------------
1. Generate a JSON Schema or use an existing one
2. Add `"format":"date-time"` to the field which should have a Date/Time
value
3. Ensure that the value is a valid `Date` object
4. Enjoy
  • Loading branch information
noxify authored Jul 25, 2023
1 parent 43732c5 commit ac5257d
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 12 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ var schema = new parquet.ParquetSchema.fromJsonSchema({
"type": "number"
},
"date": {
"type": "string"
"type": "string",
"format": "date-time"
},
"in_stock": {
"type": "boolean"
Expand Down
6 changes: 6 additions & 0 deletions lib/jsonSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ const fromJsonSchemaArray = (fieldValue: SupportedJSONSchema4, optionalFieldList

switch (fieldValue.items.type) {
case 'string':
if (fieldValue.items.format && fieldValue.items.format == 'date-time') {
return fields.createListField('TIMESTAMP_MILLIS', optionalFieldList);
}
return fields.createListField('UTF8', optionalFieldList);
case 'integer':
case 'number':
Expand All @@ -88,6 +91,9 @@ const fromJsonSchemaField = (jsonSchema: JSONSchema4) => (fieldName: string, fie

switch (fieldValue.type) {
case 'string':
if (fieldValue.format && fieldValue.format == 'date-time') {
return fields.createTimestampField(optional);
}
return fields.createStringField(optional);
case 'integer':
case 'number':
Expand Down
18 changes: 15 additions & 3 deletions test/jsonSchema.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,18 @@ describe("Json Schema Conversion Test File", async function () {
"items": { "type": "string" },
"additionalItems": false
},
"timestamp_field": { "type": "string" },
"timestamp_array_field": {
"type": "array",
"items": {
"type": "string",
"format": "date-time"
},
"additionalItems": false,
},
"timestamp_field": {
"type": "string",
"format": "date-time"
},
"obj_field": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -107,7 +118,9 @@ describe("Json Schema Conversion Test File", async function () {
const row1 = {
string_field: 'string value',
int_field: 10n,
timestamp_field: new Date("2023-01-01 GMT").toUTCString(),
timestamp_array_field: { list: [{ element: new Date("2023-01-01 GMT") }] },

timestamp_field: new Date("2023-01-01 GMT"),

array_field: {
list: [{ element: 'array_field val1' }, { element: 'array_field val2' }],
Expand Down Expand Up @@ -162,7 +175,6 @@ describe("Json Schema Conversion Test File", async function () {
const row = await cursor.next();
const rowData = {
...row1,
timestamp_field: "Sun, 01 Jan 2023 00:00:00 GMT",
};
assert.deepEqual(row, rowData);
});
Expand Down
42 changes: 39 additions & 3 deletions test/test-files/json-schema-test-file.result.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"type_length": null,
"repetition_type": null,
"name": "root",
"num_children": 6,
"num_children": 7,
"converted_type": null,
"scale": null,
"precision": null,
Expand Down Expand Up @@ -72,12 +72,48 @@
"logicalType": null
},
{
"type": 6,
"type": null,
"type_length": null,
"repetition_type": 1,
"name": "timestamp_array_field",
"num_children": 1,
"converted_type": 3,
"scale": null,
"precision": null,
"field_id": null,
"logicalType": null
},
{
"type": null,
"type_length": null,
"repetition_type": 2,
"name": "list",
"num_children": 1,
"converted_type": null,
"scale": null,
"precision": null,
"field_id": null,
"logicalType": null
},
{
"type": 2,
"type_length": null,
"repetition_type": 1,
"name": "element",
"num_children": null,
"converted_type": 9,
"scale": null,
"precision": null,
"field_id": null,
"logicalType": null
},
{
"type": 2,
"type_length": null,
"repetition_type": 1,
"name": "timestamp_field",
"num_children": null,
"converted_type": 0,
"converted_type": 9,
"scale": null,
"precision": null,
"field_id": null,
Expand Down
157 changes: 152 additions & 5 deletions test/test-files/json-schema-test-file.schema.result.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,26 @@
}
}
},
"timestamp_array_field": {
"type": "LIST",
"optional": true,
"fields": {
"list": {
"repeated": true,
"fields": {
"element": {
"optional": true,
"type": "TIMESTAMP_MILLIS",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED"
}
}
}
}
},
"timestamp_field": {
"optional": true,
"type": "UTF8",
"type": "TIMESTAMP_MILLIS",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED"
},
Expand Down Expand Up @@ -183,10 +200,53 @@
},
"originalType": "LIST"
},
"timestamp_array_field": {
"name": "timestamp_array_field",
"path": [
"timestamp_array_field"
],
"repetitionType": "OPTIONAL",
"rLevelMax": 0,
"dLevelMax": 1,
"isNested": true,
"fieldCount": 1,
"fields": {
"list": {
"name": "list",
"path": [
"timestamp_array_field",
"list"
],
"repetitionType": "REPEATED",
"rLevelMax": 1,
"dLevelMax": 2,
"isNested": true,
"fieldCount": 1,
"fields": {
"element": {
"name": "element",
"primitiveType": "INT64",
"originalType": "TIMESTAMP_MILLIS",
"path": [
"timestamp_array_field",
"list",
"element"
],
"repetitionType": "OPTIONAL",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 1,
"dLevelMax": 3
}
}
}
},
"originalType": "LIST"
},
"timestamp_field": {
"name": "timestamp_field",
"primitiveType": "BYTE_ARRAY",
"originalType": "UTF8",
"primitiveType": "INT64",
"originalType": "TIMESTAMP_MILLIS",
"path": [
"timestamp_field"
],
Expand Down Expand Up @@ -528,10 +588,97 @@
"rLevelMax": 1,
"dLevelMax": 3
},
{
"name": "timestamp_array_field",
"path": [
"timestamp_array_field"
],
"repetitionType": "OPTIONAL",
"rLevelMax": 0,
"dLevelMax": 1,
"isNested": true,
"fieldCount": 1,
"fields": {
"list": {
"name": "list",
"path": [
"timestamp_array_field",
"list"
],
"repetitionType": "REPEATED",
"rLevelMax": 1,
"dLevelMax": 2,
"isNested": true,
"fieldCount": 1,
"fields": {
"element": {
"name": "element",
"primitiveType": "INT64",
"originalType": "TIMESTAMP_MILLIS",
"path": [
"timestamp_array_field",
"list",
"element"
],
"repetitionType": "OPTIONAL",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 1,
"dLevelMax": 3
}
}
}
},
"originalType": "LIST"
},
{
"name": "list",
"path": [
"timestamp_array_field",
"list"
],
"repetitionType": "REPEATED",
"rLevelMax": 1,
"dLevelMax": 2,
"isNested": true,
"fieldCount": 1,
"fields": {
"element": {
"name": "element",
"primitiveType": "INT64",
"originalType": "TIMESTAMP_MILLIS",
"path": [
"timestamp_array_field",
"list",
"element"
],
"repetitionType": "OPTIONAL",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 1,
"dLevelMax": 3
}
}
},
{
"name": "element",
"primitiveType": "INT64",
"originalType": "TIMESTAMP_MILLIS",
"path": [
"timestamp_array_field",
"list",
"element"
],
"repetitionType": "OPTIONAL",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 1,
"dLevelMax": 3
},
{
"name": "timestamp_field",
"primitiveType": "BYTE_ARRAY",
"originalType": "UTF8",
"primitiveType": "INT64",
"originalType": "TIMESTAMP_MILLIS",
"path": [
"timestamp_field"
],
Expand Down

0 comments on commit ac5257d

Please sign in to comment.