diff options
19 files changed, 162 insertions, 27 deletions
diff --git a/cdap3vm/config/cdap-config-template/cdap-site.xml b/cdap3vm/config/cdap-config-template/cdap-site.xml index c0eabda..1ce75bd 100644 --- a/cdap3vm/config/cdap-config-template/cdap-site.xml +++ b/cdap3vm/config/cdap-config-template/cdap-site.xml @@ -57,5 +57,23 @@ </description> </property> +<!-- [171660] Tuning parameter changes for log saver and master memory --> +<property> + <name>log.saver.run.memory.megs</name> + <value>1024</value> + <description>Memory in megabytes allocated for log saver instances to run in YARN</description> +</property> + +<property> + <name>log.saver.num.instances</name> + <value>2</value> + <description>Number of log saver instances to run in YARN</description> +</property> + +<property> + <name>master.service.memory.mb</name> + <value>2048</value> + <description>Memory in megabytes for each master service instance</description> +</property> </configuration> diff --git a/cdap3vm/config/cdap-config-template/common/common.sh b/cdap3vm/config/cdap-config-template/common/common.sh index c58fb6a..394602e 100755 --- a/cdap3vm/config/cdap-config-template/common/common.sh +++ b/cdap3vm/config/cdap-config-template/common/common.sh @@ -15,11 +15,10 @@ # License for the specific language governing permissions and limitations under # the License. -# checks if there exists a PID that is already running. return 0 idempotently - export JAVA_HOME=__JAVA_HOME__ PATH=$PATH:__NODEJS_BIN__ +# checks if there exists a PID that is already running. return 0 idempotently cdap_check_before_start() { if [ -f ${pid} ]; then if kill -0 $(<${pid}) > /dev/null 2>&1; then @@ -169,7 +168,10 @@ cdap_set_hbase() { hbasecompat="${CDAP_HOME}/hbase-compat-1.0/lib/*" ;; 1.1*) - hbasecompat="$CDAP_HOME/hbase-compat-1.1/lib/*" + hbasecompat="${CDAP_HOME}/hbase-compat-1.1/lib/*" + ;; + 1.2-cdh*) + hbasecompat="${CDAP_HOME}/hbase-compat-1.2-cdh5.7.0/lib/*" ;; *) echo "ERROR: Unknown/unsupported version of HBase found: ${HBASE_VERSION}" @@ -207,7 +209,7 @@ cdap_set_classpath() { # In order to ensure that we can do hacks, need to make sure classpath is sorted # so that cdap jars are placed earlier in the classpath than twill or hadoop jars - COMP_LIB=$(find -L "${COMP_HOME}/lib" -type f | sort | tr '\n' ':') + COMP_LIB=$(find -L "${COMP_HOME}/lib" -type f 2>/dev/null | sort | tr '\n' ':') if [ -n "${HBASE_CP}" ]; then CP="${COMP_LIB}:${HBASE_CP}:${CCONF}/:${COMP_HOME}/conf/:${EXTRA_CLASSPATH}" @@ -242,9 +244,16 @@ cdap_set_hive_classpath() { cdap_kinit || return 1 fi - if [[ $(which hive 2>/dev/null) ]]; then + # Use ${HIVE_HOME} if set + if [ -n "${HIVE_HOME}" ]; then + HIVE_CMD=${HIVE_HOME}/bin/hive + else + HIVE_CMD=hive + fi + + if [[ $(which ${HIVE_CMD} 2>/dev/null) ]]; then ERR_FILE=$(mktemp) - HIVE_VAR_OUT=$(hive -e 'set -v' 2>${ERR_FILE}) + HIVE_VAR_OUT=$(${HIVE_CMD} -e 'set -v' 2>${ERR_FILE}) __ret=$? HIVE_ERR_MSG=$(< ${ERR_FILE}) rm ${ERR_FILE} @@ -261,6 +270,7 @@ cdap_set_hive_classpath() { HIVE_HOME=${HIVE_HOME:-$(echo -e "${HIVE_VARS}" | grep '^env:HIVE_HOME=' | cut -d= -f2)} HIVE_CONF_DIR=${HIVE_CONF_DIR:-$(echo -e "${HIVE_VARS}" | grep '^env:HIVE_CONF_DIR=' | cut -d= -f2)} HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$(echo -e "${HIVE_VARS}" | grep '^env:HADOOP_CONF_DIR=' | cut -d= -f2)} + HIVE_EXEC_ENGINE=${HIVE_EXEC_ENGINE:-$(echo -e "${HIVE_VARS}" | grep '^hive.execution.engine=' | cut -d= -f2)} fi fi @@ -269,11 +279,49 @@ cdap_set_hive_classpath() { if [ -n "${HIVE_HOME}" -a -n "${HIVE_CONF_DIR}" -a -n "${HADOOP_CONF_DIR}" ]; then EXPLORE_CONF_FILES=$(ls -1dF ${HIVE_CONF_DIR}/* ${HADOOP_CONF_DIR}/* | sed -e '/\/$/d' | tr '\n' ':') EXPLORE_CLASSPATH=$(ls -1 ${HIVE_HOME}/lib/hive-exec-* ${HIVE_HOME}/lib/*.jar | tr '\n' ':') + if [ -n "${TEZ_HOME}" -a -n "${TEZ_CONF_DIR}" ]; then + # tez-site.xml also need to be passed to explore service + EXPLORE_CONF_FILES=${EXPLORE_CONF_FILES}:${TEZ_CONF_DIR}/tez-site.xml: + fi + if [[ "${HIVE_EXEC_ENGINE}" == "spark" ]]; then + # We require SPARK_HOME to be set for CDAP to include the Spark assembly JAR for Explore + cdap_set_spark || die "Unable to get SPARK_HOME, but default Hive engine is Spark" + fi export EXPLORE_CONF_FILES EXPLORE_CLASSPATH fi fi } +# Get SPARK_HOME +cdap_set_spark() { + local readonly __saved_stty=$(stty -g 2>/dev/null) + # First, see if we're set to something sane + if [ -n "${SPARK_HOME}" -a -d "${SPARK_HOME}" ]; then + export SPARK_HOME + return 0 # SPARK_HOME is set, already + else + if [[ $(which spark-shell 2>/dev/null) ]]; then + ERR_FILE=$(mktemp) + SPARK_VAR_OUT=$(echo 'for ((key, value) <- sys.env) println (key + "=" + value); exit' | spark-shell --master local 2>${ERR_FILE}) + __ret=$? + # spark-shell invocation above does not properly restore the stty. + stty ${__saved_stty} + SPARK_ERR_MSG=$(< ${ERR_FILE}) + rm ${ERR_FILE} + if [ ${__ret} -ne 0 ]; then + echo "ERROR - While determining Spark home, failed to get Spark settings using: spark-shell --master local" + echo "stderr:" + echo "${SPARK_ERR_MSG}" + return 1 + fi + SPARK_HOME=$(echo -e "${SPARK_VAR_OUT}" | grep ^SPARK_HOME= | cut -d= -f2) + export SPARK_HOME + return 0 + fi + return 1 + fi +} + # Check that directory /var/tmp/cdap exists in the master node, or create it cdap_check_or_create_master_local_dir() { mkdir -p "${LOCAL_DIR}" diff --git a/cdap3vm/config/cdap-config-template/logback-container.xml b/cdap3vm/config/cdap-config-template/logback-container.xml index f1957d8..c693ace 100644 --- a/cdap3vm/config/cdap-config-template/logback-container.xml +++ b/cdap3vm/config/cdap-config-template/logback-container.xml @@ -2,13 +2,13 @@ <!-- Copyright © 2015 Cask Data, Inc. - + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -43,19 +43,18 @@ <logger name="Explore.stdout" level="INFO"/> <logger name="Explore.stderr" level="INFO"/> - - <!-- quick workaround suggested by CASK for ticket #666 [DE257314] --> - <logger name="org.apache.hadoop.io.retry.RetryInvocationHandler" level="ERROR"/> + <!-- quick workaround suggested by CASK for ticket #666 [DE257314] --> + <logger name="org.apache.hadoop.io.retry.RetryInvocationHandler" level="ERROR"/> <appender name="Rolling" class="ch.qos.logback.core.rolling.RollingFileAppender"> - <!-- LOG_DIRS is the environment variable set by YARN for container logs --> - <file>${LOG_DIRS}/program.log</file> + <!-- CDAP_LOG_DIR is the environment variable set by CDAP for logs --> + <file>${CDAP_LOG_DIR}/program.log</file> <encoder> <pattern>%d{ISO8601} - %-5p [%t:%logger{1}@%L] - %m%n</pattern> </encoder> <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> <!-- Daily rollover at midnight--> - <fileNamePattern>${LOG_DIRS}/program.%d.log</fileNamePattern> + <fileNamePattern>${CDAP_LOG_DIR}/program.%d.log</fileNamePattern> <!-- Keep 2 weeks of history --> <maxHistory>14</maxHistory> diff --git a/cdap3vm/config/hadoop-cluster-conf-file.sh b/cdap3vm/config/hadoop-cluster-conf-file.sh index a5fcf3b..22c8600 100644 --- a/cdap3vm/config/hadoop-cluster-conf-file.sh +++ b/cdap3vm/config/hadoop-cluster-conf-file.sh @@ -57,7 +57,7 @@ __YARN_PID_DIR__=/var/run/hadoop/yarn # # Directory to store the MapReduce daemon logs. -__MAPRED_LOG_DIR__=/opt/data/log/hadoop/mapred +__MAPRED_LOG_DIR__=/opt/data/log/mapred # Directory to store the mapreduce jobhistory process ID. __MAPRED_PID_DIR__=/var/run/hadoop/mapred diff --git a/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hadoop-env.sh b/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hadoop-env.sh index f4a200c..fc30e92 100644 --- a/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hadoop-env.sh +++ b/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hadoop-env.sh @@ -59,7 +59,8 @@ export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR" export HADOOP_LOG_DIR=${HADOOP_LOG_MAIN}/$USER # History server logs -export HADOOP_MAPRED_LOG_DIR=${HADOOP_LOG_MAIN}-mapreduce/$USER +# [173931] now using __MAPRED_USER__ directory +export HADOOP_MAPRED_LOG_DIR=${HADOOP_LOG_MAIN}/__MAPRED_USER__ # Where log files are stored in the secure data environment. export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_MAIN}/$HADOOP_SECURE_DN_USER @@ -80,7 +81,8 @@ export HADOOP_PID_DIR=/var/run/hadoop/$USER export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop/$HADOOP_SECURE_DN_USER # History server pid -export HADOOP_MAPRED_PID_DIR=/var/run/hadoop-mapreduce/$USER +# [173931] now using __MAPRED_USER__ directory +export HADOOP_MAPRED_PID_DIR=/var/run/hadoop/__MAPRED_USER__ YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY" @@ -123,3 +125,7 @@ export HADOOP_LIBEXEC_DIR=/usr/hdp/current/hadoop-client/libexec export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH} export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS" + +# [US171516] Fix Hadoop Spark config +export SPARK_HOME="/usr/hdp/current/spark-historyserver/" + diff --git a/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hdfs-site.xml b/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hdfs-site.xml index a9c9c0f..8aa483d 100644 --- a/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hdfs-site.xml +++ b/cdap3vm/config/hadoop-cluster-config-template/core_hadoop/hdfs-site.xml @@ -244,7 +244,8 @@ <property> <name>dfs.replication</name> - <value>3</value> + <!-- [DE261906] set replication to 2 on 3-node cluster --> + <value>2</value> </property> <property> diff --git a/cdap3vm/config/services-setup-templates/cdap-start.sh b/cdap3vm/config/services-setup-templates/cdap-start.sh index 5b7965b..0efdf3b 100644 --- a/cdap3vm/config/services-setup-templates/cdap-start.sh +++ b/cdap3vm/config/services-setup-templates/cdap-start.sh @@ -4,8 +4,10 @@ export JAVA_HOME=__JAVA_HOME__ /etc/init.d/cdap-auth-server start /etc/init.d/cdap-kafka-server start -/etc/init.d/cdap-master start + +# [186049] start router before master /etc/init.d/cdap-router start +/etc/init.d/cdap-master start PATH=$PATH:__NODEJS_BIN__ /etc/init.d/cdap-ui start diff --git a/cdap3vm/config/services-setup-templates/job-history-server-start.sh b/cdap3vm/config/services-setup-templates/job-history-server-start.sh new file mode 100644 index 0000000..1e0a30e --- /dev/null +++ b/cdap3vm/config/services-setup-templates/job-history-server-start.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# [173931] Start MapReduce History Server during Hadoop install + +__HDP_CURRENT_FOLDER__/hadoop-mapreduce-historyserver/sbin/mr-jobhistory-daemon.sh --config __HADOOP_CONF_DIR__ start historyserver + diff --git a/cdap3vm/config/services-templates/boot-time-cdap-vm-N0.sh b/cdap3vm/config/services-templates/boot-time-cdap-vm-N0.sh index c5c04ca..c8dc46d 100644 --- a/cdap3vm/config/services-templates/boot-time-cdap-vm-N0.sh +++ b/cdap3vm/config/services-templates/boot-time-cdap-vm-N0.sh @@ -25,6 +25,9 @@ MAINDIR=__SERVICE_CONFIG_FOLDER__ \. $MAINDIR/utility-scripts.sh +# set umask expected for log creation +umask 0022 + ACTION="$1" case "$ACTION" in diff --git a/cdap3vm/config/services-templates/boot-time-cdap-vm-N1.sh b/cdap3vm/config/services-templates/boot-time-cdap-vm-N1.sh index 83145ad..e440f6e 100644 --- a/cdap3vm/config/services-templates/boot-time-cdap-vm-N1.sh +++ b/cdap3vm/config/services-templates/boot-time-cdap-vm-N1.sh @@ -24,6 +24,8 @@ MAINDIR=__SERVICE_CONFIG_FOLDER__ \. $MAINDIR/utility-scripts.sh +# set umask expected for log creation +umask 0022 ACTION="$1" diff --git a/cdap3vm/config/services-templates/boot-time-cdap-vm-N2.sh b/cdap3vm/config/services-templates/boot-time-cdap-vm-N2.sh index 7eebfa0..ffa5fab 100644 --- a/cdap3vm/config/services-templates/boot-time-cdap-vm-N2.sh +++ b/cdap3vm/config/services-templates/boot-time-cdap-vm-N2.sh @@ -24,6 +24,9 @@ MAINDIR=__SERVICE_CONFIG_FOLDER__ \. $MAINDIR/utility-scripts.sh +# set umask expected for log creation +umask 0022 + ACTION="$1" case "$ACTION" in diff --git a/cdap3vm/config/services-templates/cdap.sh b/cdap3vm/config/services-templates/cdap.sh index 24c79df..2a210ff 100644 --- a/cdap3vm/config/services-templates/cdap.sh +++ b/cdap3vm/config/services-templates/cdap.sh @@ -26,8 +26,9 @@ case "$ACTION" in start|stop|status ) /etc/init.d/cdap-auth-server $ACTION /etc/init.d/cdap-kafka-server $ACTION - /etc/init.d/cdap-master $ACTION + # [173581] start router before master /etc/init.d/cdap-router $ACTION + /etc/init.d/cdap-master $ACTION /etc/init.d/cdap-ui $ACTION ;; * ) echo "Usage: $0 <start|stop|status>" diff --git a/cdap3vm/config/services-templates/job-history-server.sh b/cdap3vm/config/services-templates/job-history-server.sh index 8f39aba..ba1296c 100644 --- a/cdap3vm/config/services-templates/job-history-server.sh +++ b/cdap3vm/config/services-templates/job-history-server.sh @@ -20,11 +20,13 @@ # Job History Server +# [173931] now running as __MAPRED_USER__ + ACTION="$1" case "$ACTION" in start|stop ) - su "__HDFS_USER__" -c "__HDP_CURRENT_FOLDER__/hadoop-mapreduce-historyserver/sbin/mr-jobhistory-daemon.sh --config __HADOOP_CONF_DIR__ $ACTION historyserver" ;; + su "__MAPRED_USER__" -c "__HDP_CURRENT_FOLDER__/hadoop-mapreduce-historyserver/sbin/mr-jobhistory-daemon.sh --config __HADOOP_CONF_DIR__ $ACTION historyserver" ;; * ) echo "Usage: $0 <start|stop>" exit -1 ;; diff --git a/cdap3vm/install-steps/01-generate-host-ids-configs.sh b/cdap3vm/install-steps/01-generate-host-ids-configs.sh index 3a53078..b71ca79 100644 --- a/cdap3vm/install-steps/01-generate-host-ids-configs.sh +++ b/cdap3vm/install-steps/01-generate-host-ids-configs.sh @@ -139,7 +139,7 @@ process_hadoop_service_setup_scripts \ resource-manager-start.sh node-manager-start.sh datanode-start.sh \ job-history-setup-01-as-root.sh job-history-setup-02-as-hdfs.sh \ datanode-start.sh hbase-master-start.sh hbase-regionserver-start.sh \ - cdap-setup.sh cdap-start.sh + cdap-setup.sh cdap-start.sh job-history-server-start.sh chmod -R o+r ${HADOOP_SCRIPT_FOLDER}/.. diff --git a/cdap3vm/install-steps/04-folder-creation.sh b/cdap3vm/install-steps/04-folder-creation.sh index 59ca5cf..3b68fd2 100644 --- a/cdap3vm/install-steps/04-folder-creation.sh +++ b/cdap3vm/install-steps/04-folder-creation.sh @@ -68,4 +68,14 @@ chmod +x /etc/init.d/cdap-vm-services # update-rc.d cdap-vm-services defaults +# Disable CDAP component init.d entries installed by Debian packages +# [171525] Remove init.d run level links for CDAP processes +# +echo '### Disable CDAP component init.d entries installed by Debian packages:' +for init in cdap-auth-server cdap-kafka-server cdap-master cdap-router cdap-ui + do echo Disabling $init... + echo update-rc.d -n -f $init remove + sudo update-rc.d -f $init remove +done + \. ./utils/cdap-nodetype-${NODETYPE}.sh diff --git a/cdap3vm/install-steps/install-hortonworks-hadoop.sh b/cdap3vm/install-steps/install-hortonworks-hadoop.sh index 9e73eeb..b855d41 100755 --- a/cdap3vm/install-steps/install-hortonworks-hadoop.sh +++ b/cdap3vm/install-steps/install-hortonworks-hadoop.sh @@ -37,6 +37,7 @@ sudo apt-get -y update # sudo apt-cache search hadoop sudo apt-cache search hbase +sudo apt-cache search spark sudo apt-cache search zookeeper ## Install HortonWorks Hadoop packages: @@ -50,6 +51,7 @@ sudo JAVA_HOME=/opt/app/java/jdk/jdk170 apt-get -y install \ hadoop-hdfs-zkfc hadoop-mapreduce hadoop-mapreduce-historyserver \ hadoop-yarn hadoop-yarn-nodemanager \ hbase hbase-master hbase-regionserver \ + spark-master spark-python \ zookeeper libhdfs0 ## Fix file permissions for domain sockets diff --git a/cdap3vm/install-steps/utils/folder-creation-utils.sh b/cdap3vm/install-steps/utils/folder-creation-utils.sh index 8a8f82a..33d4ac1 100644 --- a/cdap3vm/install-steps/utils/folder-creation-utils.sh +++ b/cdap3vm/install-steps/utils/folder-creation-utils.sh @@ -1,5 +1,3 @@ -#!/bin/bash - # ============LICENSE_START========================================== # =================================================================== # Copyright © 2017 AT&T Intellectual Property. All rights reserved. @@ -18,6 +16,8 @@ # ============LICENSE_END============================================ # ECOMP and OpenECOMP are trademarks and service marks of AT&T Intellectual Property. +#!/bin/bash + copy_hadoop_conf_files() { srcfolder=${HADOOP_CONF_FOLDER}/"$1" destfolder="$2" @@ -151,6 +151,8 @@ inst_job_history_server() { run_service_setup_script job-history-setup-01-as-root.sh # no need for username then run_service_setup_script job-history-setup-02-as-hdfs.sh ${__HDFS_USER__} setup_hadoop_service_scripts job-history-server.sh + # [173931] Start MapReduce History Server during Hadoop install + run_service_setup_script job-history-server-start.sh ${__MAPRED_USER__} } inst_cdap() { diff --git a/cdap3vm/install.sh b/cdap3vm/install.sh index d2947ac..290540c 100644 --- a/cdap3vm/install.sh +++ b/cdap3vm/install.sh @@ -21,12 +21,41 @@ # Default will be a production installation # Any test installation requires passing arguments to different steps +umask 0022 +CDAPLOG=$(dirname $0)/cdap-hadoop-install.log + +( +echo -n "### Start at: " +date + cd $(dirname $0)/install-steps +pwd +echo +echo '## 01-generate-host-ids-configs.sh' bash 01-generate-host-ids-configs.sh + +echo +echo '## 02-user-creation.sh' bash 02-user-creation.sh + #bash 03-hadoop-rpms.sh + +echo +echo '## install-hortonworks-hadoop.sh' bash install-hortonworks-hadoop.sh + +echo +echo '## install-cdap-pkgs.sh' bash install-cdap-pkgs.sh + +echo +echo '## 04-folder-creation.sh' bash 04-folder-creation.sh +echo +echo -n "### End at: " +date + +) 2>&1 | tee -a $CDAPLOG + diff --git a/cdap3vm/pkgs/ubuntu-files/create_pid_dirs.sh b/cdap3vm/pkgs/ubuntu-files/create_pid_dirs.sh index 2812b31..3e18417 100644 --- a/cdap3vm/pkgs/ubuntu-files/create_pid_dirs.sh +++ b/cdap3vm/pkgs/ubuntu-files/create_pid_dirs.sh @@ -43,6 +43,7 @@ sudo chown yarn:hadoop /var/run/hadoop/yarn/yarn sudo mkdir -p -m0755 /var/run/hadoop/mapred sudo chown mapred:hadoop /var/run/hadoop/mapred +# [173931] Not used # mapreduce: -sudo mkdir -p -m0755 /var/run/hadoop/mapreduce -sudo chown mapred:hadoop /var/run/hadoop/mapreduce +# sudo mkdir -p -m0755 /var/run/hadoop/mapreduce +# sudo chown mapred:hadoop /var/run/hadoop/mapreduce |