diff options
Diffstat (limited to 'dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink')
4 files changed, 358 insertions, 0 deletions
diff --git a/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMROutputFormat.java b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMROutputFormat.java new file mode 100644 index 0000000..c89f424 --- /dev/null +++ b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMROutputFormat.java @@ -0,0 +1,94 @@ +/* + * ===============================LICENSE_START====================================== + * dcae-analytics + * ================================================================================ + * Copyright © 2017 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================LICENSE_END=========================================== + */ + +package org.openecomp.dcae.apod.analytics.cdap.plugins.batch.sink.dmaap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.openecomp.dcae.apod.analytics.cdap.plugins.utils.DMaaPSinkConfigMapper; +import org.openecomp.dcae.apod.analytics.dmaap.DMaaPMRFactory; +import org.openecomp.dcae.apod.analytics.dmaap.domain.config.DMaaPMRPublisherConfig; +import org.openecomp.dcae.apod.analytics.dmaap.service.publisher.DMaaPMRPublisher; + +import java.io.IOException; + +/** + * DMaaP MR Output format used by DMaaP MR Sink Plugin to create a MR Publisher and pass to custom {@link + * DMaaPMRRecordWriter} + * <p> + * @author Rajiv Singla . Creation Date: 1/27/2017. + */ +public class DMaaPMROutputFormat extends OutputFormat<String, NullWritable> { + + @Override + public RecordWriter<String, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, + InterruptedException { + final Configuration configuration = context.getConfiguration(); + final DMaaPMRPublisherConfig publisherConfig = DMaaPSinkConfigMapper.map(configuration); + final DMaaPMRPublisher publisher = DMaaPMRFactory.create().createPublisher(publisherConfig); + return new DMaaPMRRecordWriter(publisher); + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + // do nothing + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { + return new NoOpOutputCommitter(); + } + + /** + * A dummy implementation for {@link OutputCommitter} that does nothing. + */ + protected static class NoOpOutputCommitter extends OutputCommitter { + + @Override + public void setupJob(JobContext jobContext) throws IOException { + // no op + } + + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException { + // no op + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { + return false; + } + + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + // no op + } + + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + // no op + } + } +} diff --git a/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMROutputFormatProvider.java b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMROutputFormatProvider.java new file mode 100644 index 0000000..a78d42f --- /dev/null +++ b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMROutputFormatProvider.java @@ -0,0 +1,116 @@ +/* + * ===============================LICENSE_START====================================== + * dcae-analytics + * ================================================================================ + * Copyright © 2017 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================LICENSE_END=========================================== + */ + +package org.openecomp.dcae.apod.analytics.cdap.plugins.batch.sink.dmaap; + +import co.cask.cdap.api.data.batch.OutputFormatProvider; +import org.openecomp.dcae.apod.analytics.cdap.common.CDAPPluginConstants.DMaaPMRSinkHadoopConfigFields; +import org.openecomp.dcae.apod.analytics.cdap.common.utils.ValidationUtils; +import org.openecomp.dcae.apod.analytics.cdap.plugins.domain.config.dmaap.DMaaPMRSinkPluginConfig; +import org.openecomp.dcae.apod.analytics.common.AnalyticsConstants; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * DMaaP MR Output Format Provider used to create Batch Sink Plugin + * <p> + * @author Rajiv Singla . Creation Date: 1/27/2017. + */ +public class DMaaPMROutputFormatProvider implements OutputFormatProvider { + + private final Map<String, String> sinkConfig; + + + public DMaaPMROutputFormatProvider(DMaaPMRSinkPluginConfig sinkPluginConfig) { + + // initialize Sink Config - with DMaaP MR Publisher config values + sinkConfig = new LinkedHashMap<>(); + + // Required fields for sink config + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.HOST_NAME, sinkPluginConfig.getHostName()); + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.TOPIC_NAME, sinkPluginConfig.getTopicName()); + + final Integer configPortNumber = sinkPluginConfig.getPortNumber(); + if (configPortNumber != null) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.PORT_NUMBER, configPortNumber.toString()); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.PORT_NUMBER, + AnalyticsConstants.DEFAULT_PORT_NUMBER.toString()); + } + + final String configProtocol = sinkPluginConfig.getProtocol(); + if (ValidationUtils.isPresent(configProtocol)) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.PROTOCOL, configProtocol); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.PROTOCOL, AnalyticsConstants.DEFAULT_PROTOCOL); + } + + + final String configUserName = sinkPluginConfig.getUserName(); + if (ValidationUtils.isPresent(configUserName)) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.USER_NAME, configUserName); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.USER_NAME, AnalyticsConstants.DEFAULT_USER_NAME); + } + + final String configUserPass = sinkPluginConfig.getUserPassword(); + if (ValidationUtils.isPresent(configUserPass)) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.USER_PASS, configUserPass); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.USER_PASS, AnalyticsConstants.DEFAULT_USER_PASSWORD); + } + + final String configContentType = sinkPluginConfig.getContentType(); + if (ValidationUtils.isPresent(configContentType)) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.CONTENT_TYPE, configContentType); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.CONTENT_TYPE, AnalyticsConstants.DEFAULT_CONTENT_TYPE); + } + + + final Integer configMaxBatchSize = sinkPluginConfig.getMaxBatchSize(); + if (configMaxBatchSize != null) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.MAX_BATCH_SIZE, configMaxBatchSize.toString()); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.MAX_BATCH_SIZE, + String.valueOf(AnalyticsConstants.DEFAULT_PUBLISHER_MAX_BATCH_SIZE)); + } + + final Integer configMaxRecoveryQueueSize = sinkPluginConfig.getMaxRecoveryQueueSize(); + if (configMaxRecoveryQueueSize != null) { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.MAX_RECOVER_QUEUE_SIZE, configMaxRecoveryQueueSize.toString()); + } else { + sinkConfig.put(DMaaPMRSinkHadoopConfigFields.MAX_RECOVER_QUEUE_SIZE, + String.valueOf(AnalyticsConstants.DEFAULT_PUBLISHER_MAX_RECOVERY_QUEUE_SIZE)); + } + + } + + @Override + public String getOutputFormatClassName() { + return DMaaPMROutputFormat.class.getName(); + } + + @Override + public Map<String, String> getOutputFormatConfiguration() { + return sinkConfig; + } +} diff --git a/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMRRecordWriter.java b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMRRecordWriter.java new file mode 100644 index 0000000..ec0aded --- /dev/null +++ b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMRRecordWriter.java @@ -0,0 +1,58 @@ +/* + * ===============================LICENSE_START====================================== + * dcae-analytics + * ================================================================================ + * Copyright © 2017 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================LICENSE_END=========================================== + */ + +package org.openecomp.dcae.apod.analytics.cdap.plugins.batch.sink.dmaap; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.openecomp.dcae.apod.analytics.dmaap.service.publisher.DMaaPMRPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; + +/** + * A simple implementation of {@link RecordWriter} which writes messages to DMaaP MR topic + * <p> + * @author Rajiv Singla . Creation Date: 1/27/2017. + */ +public class DMaaPMRRecordWriter extends RecordWriter<String, NullWritable> { + + private static final Logger LOG = LoggerFactory.getLogger(DMaaPMRRecordWriter.class); + + private final DMaaPMRPublisher dMaaPMRPublisher; + + public DMaaPMRRecordWriter(DMaaPMRPublisher dMaaPMRPublisher) { + this.dMaaPMRPublisher = dMaaPMRPublisher; + } + + @Override + public void write(String message, NullWritable value) throws IOException, InterruptedException { + LOG.debug("Writing message to DMaaP MR Topic: {}", message); + dMaaPMRPublisher.publish(Arrays.asList(message)); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + dMaaPMRPublisher.flush(); + } +} diff --git a/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMRSink.java b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMRSink.java new file mode 100644 index 0000000..32ec251 --- /dev/null +++ b/dcae-analytics-cdap-plugins/src/main/java/org/openecomp/dcae/apod/analytics/cdap/plugins/batch/sink/dmaap/DMaaPMRSink.java @@ -0,0 +1,90 @@ +/* + * ===============================LICENSE_START====================================== + * dcae-analytics + * ================================================================================ + * Copyright © 2017 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================LICENSE_END=========================================== + */ + +package org.openecomp.dcae.apod.analytics.cdap.plugins.batch.sink.dmaap; + +import co.cask.cdap.api.annotation.Description; +import co.cask.cdap.api.annotation.Name; +import co.cask.cdap.api.annotation.Plugin; +import co.cask.cdap.api.data.batch.Output; +import co.cask.cdap.api.data.format.StructuredRecord; +import co.cask.cdap.api.data.schema.Schema; +import co.cask.cdap.api.dataset.lib.KeyValue; +import co.cask.cdap.etl.api.Emitter; +import co.cask.cdap.etl.api.PipelineConfigurer; +import co.cask.cdap.etl.api.batch.BatchSink; +import co.cask.cdap.etl.api.batch.BatchSinkContext; +import org.apache.hadoop.io.NullWritable; +import org.openecomp.dcae.apod.analytics.cdap.common.utils.ValidationUtils; +import org.openecomp.dcae.apod.analytics.cdap.plugins.domain.config.dmaap.DMaaPMRSinkPluginConfig; +import org.openecomp.dcae.apod.analytics.cdap.plugins.utils.CDAPPluginUtils; +import org.openecomp.dcae.apod.analytics.cdap.plugins.validator.DMaaPMRSinkPluginConfigValidator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author Rajiv Singla . Creation Date: 1/26/2017. + */ +@Plugin(type = BatchSink.PLUGIN_TYPE) +@Name("DMaaPMRSink") +@Description("A batch sink Plugin that publishes messages to DMaaP MR Topic.") +public class DMaaPMRSink extends BatchSink<StructuredRecord, String, NullWritable> { + + private static final Logger LOG = LoggerFactory.getLogger(DMaaPMRSink.class); + + private final DMaaPMRSinkPluginConfig pluginConfig; + + public DMaaPMRSink(final DMaaPMRSinkPluginConfig pluginConfig) { + LOG.debug("Creating DMaaP MR Sink Plugin with plugin Config: {}", pluginConfig); + this.pluginConfig = pluginConfig; + } + + @Override + public void configurePipeline(final PipelineConfigurer pipelineConfigurer) { + super.configurePipeline(pipelineConfigurer); + ValidationUtils.validateSettings(pluginConfig, new DMaaPMRSinkPluginConfigValidator()); + // validates that input schema contains the field provided in Sink Message Column Name property + final Schema inputSchema = pipelineConfigurer.getStageConfigurer().getInputSchema(); + CDAPPluginUtils.validateSchemaContainsFields(inputSchema, pluginConfig.getMessageColumnName()); + } + + + @Override + public void prepareRun(BatchSinkContext context) throws Exception { + context.addOutput(Output.of(pluginConfig.getReferenceName(), new DMaaPMROutputFormatProvider(pluginConfig))); + } + + @Override + public void transform(StructuredRecord structuredRecord, + Emitter<KeyValue<String, NullWritable>> emitter) throws Exception { + // get incoming message from structured record + final String incomingMessage = structuredRecord.get(pluginConfig.getMessageColumnName()); + + // if incoming messages does not have message column name log warning as it should not happen + if (incomingMessage == null) { + LOG.warn("Column Name: {}, contains no message.Skipped for DMaaP MR Publishing....", + pluginConfig.getMessageColumnName()); + } else { + + // emit the messages as key + emitter.emit(new KeyValue<String, NullWritable>(incomingMessage, null)); + } + } +} |