From 1fca6acb2918beef86a5a78dc659683830908cd2 Mon Sep 17 00:00:00 2001 From: Ralph Knag Date: Tue, 5 Dec 2017 12:05:57 -0500 Subject: DCAE Controller documentation DCAEGEN2-213 Issue-ID: DCAEGEN2-213 Change-Id: I7f2023b7f88b73eef852eca0bbf9086e14903cd6 Signed-off-by: Ralph Knag --- docs/sections/components/data-formats.rst | 418 ++++++++++++++++++++++++++++++ 1 file changed, 418 insertions(+) create mode 100755 docs/sections/components/data-formats.rst (limited to 'docs/sections/components/data-formats.rst') diff --git a/docs/sections/components/data-formats.rst b/docs/sections/components/data-formats.rst new file mode 100755 index 00000000..ecb019c2 --- /dev/null +++ b/docs/sections/components/data-formats.rst @@ -0,0 +1,418 @@ +.. This work is licensed under a Creative Commons Attribution 4.0 International License. +.. http://creativecommons.org/licenses/by/4.0 + +.. _data-formats: + +Data Formats +============ + +| Because the DCAE designer composes your component with others at + service design time, in most cases you do not know what specific + component(s) your component will send data to during runtime. Thus, it + is vital that DCAE has a language of describing the data passed + between components, so that it is known which components are + composable with others. Data formats are descriptions of data—they are + the data contract between your component and other components. You + need to describe the available outputs and assumed inputs of your + components as data formats. These data descriptions are onboarded into + ASDC, and each receives a UUID. If component X outputs data format + DF-Y, and another component Z specifies DF-Y as their input data + format, then X is said to be *composable* with that component. The + data formats are referenced in the component specifications by the + data format’s id and version. +| The vision is to have a repository of shared data formats that + developers and teams can re-use and also provide them the means to + extend and create new custom data formats. + +.. _dataformat_metadata: + +Meta Schema Definition +---------------------- + +The “Meta Schema” implementation defines how data format JSON schemas +can be written to define user input. It is itself a JSON schema (thus it +is a “meta schema”). It requires the name of the data format entry, the +data format entry version and allows a description under “self” object. +The meta schema version must be specified as the value of the +“dataformatversion” key. Then the input schema itself is described. +There are four types of schema descriptions objects - jsonschema for +inline standard JSON Schema definitions of JSON inputs, delimitedschema +for delimited data input using a defined JSON description, unstructured +for unstructured text, and reference that allows a pointer to another +artifact for a schema. The reference allows for XML schema, but can be +used as a pointer to JSON, Delimited Format, and Unstructured schemas as +well. + +The current Meta Schema implementation is defined below: + +:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Data format specification schema Version 1.0", + "type": "object", + "oneOf": [{ + "properties": { + "self": { + "$ref": "#/definitions/self" + }, + "dataformatversion": { + "$ref": "#/definitions/dataformatversion" + }, + "reference": { + + "type": "object", + "description": "A reference to an external schema - name/version is used to access the artifact", + "properties": { + "name": { + "$ref": "#/definitions/name" + }, + "version": { + "$ref": "#/definitions/version" + }, + "format": { + "$ref": "#/definitions/format" + } + }, + "required": [ + "name", + "version", + "format" + ], + "additionalProperties": false + } + }, + "required": ["self", "dataformatversion", "reference"], + "additionalProperties": false + }, { + "properties": { + "self": { + "$ref": "#/definitions/self" + }, + "dataformatversion": { + "$ref": "#/definitions/dataformatversion" + }, + "jsonschema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "The actual JSON schema for this data format" + } + + }, + "required": ["self", "dataformatversion", "jsonschema"], + "additionalProperties": false + }, { + "properties": { + "self": { + "$ref": "#/definitions/self" + }, + "dataformatversion": { + "$ref": "#/definitions/dataformatversion" + }, + "delimitedschema": { + "type": "object", + "description": "A JSON schema for delimited files", + "properties": { + "delimiter": { + "enum": [",", "|", "\t", ";"] + }, + "fields": { + "type": "array", + "description": "Array of field descriptions", + "items": { + "$ref": "#/definitions/field" + } + } + }, + "additionalProperties": false + } + }, + "required": ["self", "dataformatversion", "delimitedschema"], + "additionalProperties": false + }, { + "properties": { + "self": { + "$ref": "#/definitions/self" + }, + "dataformatversion": { + "$ref": "#/definitions/dataformatversion" + }, + "unstructured": { + "type": "object", + "description": "A JSON schema for unstructured text", + "properties": { + "encoding": { + "type": "string", + "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"] + } + }, + "additionalProperties": false + + } + }, + "required": ["self", "dataformatversion", "unstructured"], + "additionalProperties": false + }], + "definitions": { + "name": { + "type": "string" + }, + "version": { + "type": "string", + "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$" + }, + "self": { + "description": "Identifying Information for the Data Format - name/version can be used to access the artifact", + "type": "object", + "properties": { + "name": { + "$ref": "#/definitions/name" + }, + "version": { + "$ref": "#/definitions/version" + }, + "description": { + "type": "string" + } + }, + "required": [ + "name", + "version" + ], + "additionalProperties": false + }, + "format": { + "description": "Reference schema type", + "type": "string", + "enum": [ + "JSON", + "Delimited Format", + "XML", + "Unstructured" + ] + }, + "field": { + "description": "A field definition for the delimited schema", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "fieldtype": { + "description": "the field type - from the XML schema types", + "type": "string", + "enum": ["string", "boolean", + "decimal", "float", "double", + "duration", "dateTime", "time", + "date", "gYearMonth", "gYear", + "gMonthDay", "gDay", "gMonth", + "hexBinary", "base64Binary", + "anyURI", "QName", "NOTATION", + "normalizedString", "token", + "language", "IDREFS", "ENTITIES", + "NMTOKEN", "NMTOKENS", "Name", + "NCName", "ID", "IDREF", "ENTITY", + "integer", "nonPositiveInteger", + "negativeInteger", "long", "int", + "short", "byte", + "nonNegativeInteger", "unsignedLong", + "unsignedInt", "unsignedShort", + "unsignedByte", "positiveInteger" + + ] + }, + "fieldPattern": { + "description": "Regular expression that defines the field format", + "type": "integer" + }, + "fieldMaxLength": { + "description": "The maximum length of the field", + "type": "integer" + }, + "fieldMinLength": { + "description": "The minimum length of the field", + "type": "integer" + }, + "fieldMinimum": { + "description": "The minimum numeric value of the field", + "type": "integer" + }, + "fieldMaximum": { + "description": "The maximum numeric value of the field", + "type": "integer" + } + }, + "additionalProperties": false + }, + "dataformatversion": { + "type": "string", + "enum": ["1.0.0"] + } + } + } + +Examples +----------- + +By reference example - Common Event Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First the full JSON schema description of the Common Event Format would +be loaded with a name of “Common Event Format” and the current version +of “25.0.0”. + +Then the data format description is loaded by this schema: + +:: + + { + "self": { + "name": "Common Event Format Definition", + "version": "25.0.0", + "description": "Common Event Format Definition" + + }, + "dataformatversion": "1.0.0", + "reference": { + "name": "Common Event Format", + "format": "JSON", + "version": "25.0.0" + } + } + + + +Simple JSON Example +~~~~~~~~~~~~~~~~~~~~~~~~ + + +:: + + { + "self": { + "name": "Simple JSON Example", + "version": "1.0.0", + "description": "An example of unnested JSON schema for Input and output" + + }, + "dataformatversion": "1.0.0", + "jsonschema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "raw-text": { + "type": "string" + } + }, + "required": ["raw-text"], + "additionalProperties": false + } + } + +Nested JSON Example +~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + { + "self": { + "name": "Nested JSON Example", + "version": "1.0.0", + "description": "An example of nested JSON schema for Input and output" + + }, + "dataformatversion": "1.0.0", + "jsonschema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "numFound": { + "type": "integer" + }, + "start": { + "type": "integer" + }, + "engagements": { + "type": "array", + "items": { + "properties": { + "engagementID": { + "type": "string", + "transcript": { + "type": "array", + "items": { + "type": { + "type": "string" + }, + "content": { + "type": "string" + }, + "senderName": { + "type": "string" + }, + "iso": { + "type": "string" + }, + "timestamp": { + "type": "integer" + }, + "senderId": { + "type": "string" + } + } + } + } + } + } + } + }, + "additionalProperties": false + } + } + +Unstructured Example +~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + { + "self": { + "name": "Unstructured Text Example", + "version": "25.0.0", + "description": "An example of a unstructured text used for both input and output for " + + }, + "dataformatversion": "1.0.0", + "unstructured": { + "encoding": "UTF-8" + } + } + + +An example of a delimited schema +-------------------------------- + +:: + + { + "self": { + "name": "Delimited Format Example", + "version": "1.0.0", + "description": "Delimited format example just for testing" + + }, + "dataformatversion": "1.0.0", + "delimitedschema": { + "delimiter": "|", + "fields": [{ + "name": "field1", + "description": "test field1", + "fieldtype": "string" + }, { + "name": "field2", + "description": "test field2", + "fieldtype": "boolean" + }] + } + } -- cgit 1.2.3-korg