summaryrefslogtreecommitdiffstats
path: root/docs/sections/components/data-formats.rst
diff options
context:
space:
mode:
Diffstat (limited to 'docs/sections/components/data-formats.rst')
-rwxr-xr-xdocs/sections/components/data-formats.rst418
1 files changed, 418 insertions, 0 deletions
diff --git a/docs/sections/components/data-formats.rst b/docs/sections/components/data-formats.rst
new file mode 100755
index 00000000..ecb019c2
--- /dev/null
+++ b/docs/sections/components/data-formats.rst
@@ -0,0 +1,418 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+.. _data-formats:
+
+Data Formats
+============
+
+| Because the DCAE designer composes your component with others at
+ service design time, in most cases you do not know what specific
+ component(s) your component will send data to during runtime. Thus, it
+ is vital that DCAE has a language of describing the data passed
+ between components, so that it is known which components are
+ composable with others. Data formats are descriptions of data—they are
+ the data contract between your component and other components. You
+ need to describe the available outputs and assumed inputs of your
+ components as data formats. These data descriptions are onboarded into
+ ASDC, and each receives a UUID. If component X outputs data format
+ DF-Y, and another component Z specifies DF-Y as their input data
+ format, then X is said to be *composable* with that component. The
+ data formats are referenced in the component specifications by the
+ data format’s id and version.
+| The vision is to have a repository of shared data formats that
+ developers and teams can re-use and also provide them the means to
+ extend and create new custom data formats.
+
+.. _dataformat_metadata:
+
+Meta Schema Definition
+----------------------
+
+The “Meta Schema” implementation defines how data format JSON schemas
+can be written to define user input. It is itself a JSON schema (thus it
+is a “meta schema”). It requires the name of the data format entry, the
+data format entry version and allows a description under “self” object.
+The meta schema version must be specified as the value of the
+“dataformatversion” key. Then the input schema itself is described.
+There are four types of schema descriptions objects - jsonschema for
+inline standard JSON Schema definitions of JSON inputs, delimitedschema
+for delimited data input using a defined JSON description, unstructured
+for unstructured text, and reference that allows a pointer to another
+artifact for a schema. The reference allows for XML schema, but can be
+used as a pointer to JSON, Delimited Format, and Unstructured schemas as
+well.
+
+The current Meta Schema implementation is defined below:
+
+::
+
+ {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "title": "Data format specification schema Version 1.0",
+ "type": "object",
+ "oneOf": [{
+ "properties": {
+ "self": {
+ "$ref": "#/definitions/self"
+ },
+ "dataformatversion": {
+ "$ref": "#/definitions/dataformatversion"
+ },
+ "reference": {
+
+ "type": "object",
+ "description": "A reference to an external schema - name/version is used to access the artifact",
+ "properties": {
+ "name": {
+ "$ref": "#/definitions/name"
+ },
+ "version": {
+ "$ref": "#/definitions/version"
+ },
+ "format": {
+ "$ref": "#/definitions/format"
+ }
+ },
+ "required": [
+ "name",
+ "version",
+ "format"
+ ],
+ "additionalProperties": false
+ }
+ },
+ "required": ["self", "dataformatversion", "reference"],
+ "additionalProperties": false
+ }, {
+ "properties": {
+ "self": {
+ "$ref": "#/definitions/self"
+ },
+ "dataformatversion": {
+ "$ref": "#/definitions/dataformatversion"
+ },
+ "jsonschema": {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "description": "The actual JSON schema for this data format"
+ }
+
+ },
+ "required": ["self", "dataformatversion", "jsonschema"],
+ "additionalProperties": false
+ }, {
+ "properties": {
+ "self": {
+ "$ref": "#/definitions/self"
+ },
+ "dataformatversion": {
+ "$ref": "#/definitions/dataformatversion"
+ },
+ "delimitedschema": {
+ "type": "object",
+ "description": "A JSON schema for delimited files",
+ "properties": {
+ "delimiter": {
+ "enum": [",", "|", "\t", ";"]
+ },
+ "fields": {
+ "type": "array",
+ "description": "Array of field descriptions",
+ "items": {
+ "$ref": "#/definitions/field"
+ }
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": ["self", "dataformatversion", "delimitedschema"],
+ "additionalProperties": false
+ }, {
+ "properties": {
+ "self": {
+ "$ref": "#/definitions/self"
+ },
+ "dataformatversion": {
+ "$ref": "#/definitions/dataformatversion"
+ },
+ "unstructured": {
+ "type": "object",
+ "description": "A JSON schema for unstructured text",
+ "properties": {
+ "encoding": {
+ "type": "string",
+ "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"]
+ }
+ },
+ "additionalProperties": false
+
+ }
+ },
+ "required": ["self", "dataformatversion", "unstructured"],
+ "additionalProperties": false
+ }],
+ "definitions": {
+ "name": {
+ "type": "string"
+ },
+ "version": {
+ "type": "string",
+ "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$"
+ },
+ "self": {
+ "description": "Identifying Information for the Data Format - name/version can be used to access the artifact",
+ "type": "object",
+ "properties": {
+ "name": {
+ "$ref": "#/definitions/name"
+ },
+ "version": {
+ "$ref": "#/definitions/version"
+ },
+ "description": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "name",
+ "version"
+ ],
+ "additionalProperties": false
+ },
+ "format": {
+ "description": "Reference schema type",
+ "type": "string",
+ "enum": [
+ "JSON",
+ "Delimited Format",
+ "XML",
+ "Unstructured"
+ ]
+ },
+ "field": {
+ "description": "A field definition for the delimited schema",
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "fieldtype": {
+ "description": "the field type - from the XML schema types",
+ "type": "string",
+ "enum": ["string", "boolean",
+ "decimal", "float", "double",
+ "duration", "dateTime", "time",
+ "date", "gYearMonth", "gYear",
+ "gMonthDay", "gDay", "gMonth",
+ "hexBinary", "base64Binary",
+ "anyURI", "QName", "NOTATION",
+ "normalizedString", "token",
+ "language", "IDREFS", "ENTITIES",
+ "NMTOKEN", "NMTOKENS", "Name",
+ "NCName", "ID", "IDREF", "ENTITY",
+ "integer", "nonPositiveInteger",
+ "negativeInteger", "long", "int",
+ "short", "byte",
+ "nonNegativeInteger", "unsignedLong",
+ "unsignedInt", "unsignedShort",
+ "unsignedByte", "positiveInteger"
+
+ ]
+ },
+ "fieldPattern": {
+ "description": "Regular expression that defines the field format",
+ "type": "integer"
+ },
+ "fieldMaxLength": {
+ "description": "The maximum length of the field",
+ "type": "integer"
+ },
+ "fieldMinLength": {
+ "description": "The minimum length of the field",
+ "type": "integer"
+ },
+ "fieldMinimum": {
+ "description": "The minimum numeric value of the field",
+ "type": "integer"
+ },
+ "fieldMaximum": {
+ "description": "The maximum numeric value of the field",
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false
+ },
+ "dataformatversion": {
+ "type": "string",
+ "enum": ["1.0.0"]
+ }
+ }
+ }
+
+Examples
+-----------
+
+By reference example - Common Event Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First the full JSON schema description of the Common Event Format would
+be loaded with a name of “Common Event Format” and the current version
+of “25.0.0”.
+
+Then the data format description is loaded by this schema:
+
+::
+
+ {
+ "self": {
+ "name": "Common Event Format Definition",
+ "version": "25.0.0",
+ "description": "Common Event Format Definition"
+
+ },
+ "dataformatversion": "1.0.0",
+ "reference": {
+ "name": "Common Event Format",
+ "format": "JSON",
+ "version": "25.0.0"
+ }
+ }
+
+
+
+Simple JSON Example
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+::
+
+ {
+ "self": {
+ "name": "Simple JSON Example",
+ "version": "1.0.0",
+ "description": "An example of unnested JSON schema for Input and output"
+
+ },
+ "dataformatversion": "1.0.0",
+ "jsonschema": {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "type": "object",
+ "properties": {
+ "raw-text": {
+ "type": "string"
+ }
+ },
+ "required": ["raw-text"],
+ "additionalProperties": false
+ }
+ }
+
+Nested JSON Example
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ {
+ "self": {
+ "name": "Nested JSON Example",
+ "version": "1.0.0",
+ "description": "An example of nested JSON schema for Input and output"
+
+ },
+ "dataformatversion": "1.0.0",
+ "jsonschema": {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "properties": {
+ "numFound": {
+ "type": "integer"
+ },
+ "start": {
+ "type": "integer"
+ },
+ "engagements": {
+ "type": "array",
+ "items": {
+ "properties": {
+ "engagementID": {
+ "type": "string",
+ "transcript": {
+ "type": "array",
+ "items": {
+ "type": {
+ "type": "string"
+ },
+ "content": {
+ "type": "string"
+ },
+ "senderName": {
+ "type": "string"
+ },
+ "iso": {
+ "type": "string"
+ },
+ "timestamp": {
+ "type": "integer"
+ },
+ "senderId": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "additionalProperties": false
+ }
+ }
+
+Unstructured Example
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ {
+ "self": {
+ "name": "Unstructured Text Example",
+ "version": "25.0.0",
+ "description": "An example of a unstructured text used for both input and output for "
+
+ },
+ "dataformatversion": "1.0.0",
+ "unstructured": {
+ "encoding": "UTF-8"
+ }
+ }
+
+
+An example of a delimited schema
+--------------------------------
+
+::
+
+ {
+ "self": {
+ "name": "Delimited Format Example",
+ "version": "1.0.0",
+ "description": "Delimited format example just for testing"
+
+ },
+ "dataformatversion": "1.0.0",
+ "delimitedschema": {
+ "delimiter": "|",
+ "fields": [{
+ "name": "field1",
+ "description": "test field1",
+ "fieldtype": "string"
+ }, {
+ "name": "field2",
+ "description": "test field2",
+ "fieldtype": "boolean"
+ }]
+ }
+ }