#!/bin/bash

set -x # uncomment for bash script debugging
echo "c2m -------------------------------------------------------------"
### ============================================================================
### Licensed under the Apache License, Version 2.0 (the "License");
### you may not use this file except in compliance with the License.
### You may obtain a copy of the License at
###
###       http://www.apache.org/licenses/LICENSE-2.0
###
### Unless required by applicable law or agreed to in writing, software
### distributed under the License is distributed on an "AS IS" BASIS,
### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
### See the License for the specific language governing permissions and
### limitations under the License.
### ============LICENSE_END=====================================================


###
### c2m
###
### AUTHOR(S):
### Thomas Kulik, Deutsche Telekom AG, 2020
###
### DESCRIPTION:
### c2m automates additional tasks required in case you want to export and
### convert a set of wiki pages. the export and first conversion to markdown is
### done by confluence2md, provided by viaboxx.
### c2m processes a list of (to be exported) wiki pages, creates corresponding
### export directories, exports and converts pages (in various formats if
### required), opens an editor and cleans up afterwards.
### c2m checks also for problematic content in the export and creates a warning
### in case of detection.
###
### ISSUES:
### - markdown (md) output of confluence2md contains sometimes tags that are
###   somehow "merged" with the topic headline; manual edit is required here
###
### OPEN:
### - confluence2md does not support all of the currently used confluence page
###   types (structured-macros) - result for unsupported pages is
###   "not satisfying"; enhancements (java) are required
### - opt: toc creation in root document in case you export a tree of documents
###   to separate files
### - opt: remove wiki credentials from script
###
### REQUIRED:
### - pandoc, retext, confluence2md, java (older version for confluence2md),
###   login for the confluence wiki
###
### SEE ALSO:
### - https://www.viaboxx.de/code/confluence2md/
### - https://github.com/viaboxxsystems/confluence2md
###


###
### CHANGELOG (LATEST ON TOP)
###
### 1.2.0 (2021-08-02) Corrections to http/https proxy handling and support to
###                    get Confluence credentials from env variables instead of
###                    directly from the code.
### 1.1.0 (2020-03-10) added support for http/https proxy and anonymous wiki
###                    access. thx to eric, nicolas and sylvain (orange, france)
###                    confluence2md jar file now has to be in the same path as
###                    c2m.
### 1.0.0 (2020-03-09) initial release
###


###
### c2m example pagelist
###
### example pagelist (field descriptions below); it uses the delimiter "|" for
### the four fields per line.
### copy/paste page id and title from wiki; to get the wiki page_id you have to
### login to the wiki, open the page and choose e.g. the history.
### depth: use depth to follow down the child-pages hierarchy if required:
### -1=infinte, 0=no children, #=number of child-pages to follow.
### every hierarchy "0" entry will lead into the creation of a dedicated working
### directory where the page and child-pages are stored.
### for better readability you can add spaces to the list, but use "|" as a
### delimiter. lines starting with a # are filtered by c2m.
###
### hierarchy | page_id  | page_title                      | depth
###
### 0         |  1018748 | ONAP Portal                     |  0
### 1.1       |  1018759 | ONAP Portal for users           |  0
### 1.2       |  1018762 | ONAP Portal for administrators  |  0
### 1.2.1     |  1018764 | Admins                          |  0
### 1.2.2     |  1018811 | Users                           |  0
### 1.2.3     |  1018821 | Portal Admins                   |  0
### 1.2.4     |  1018826 | Application Onboarding          |  0
### 1.2.5     |  1018832 | Widget Onboarding               |  0
### 1.2.6     |  1018835 | Edit Functional Menu            |  0
### 1.2.7     | 16004953 | Portal Microservices Onboarding |  0
###
### in case you want to export to only one single output page (that contains all
### child-pages of the above example) use:
###
### 0         |  1018748 | ONAP Portal                     | -1
###


###
### some initial variables
###

script_version="1.2.0 (2021-08-02)"

if [[ -z "$CONFLUENCE_USERNAME" || -z "$CONFLUENCE_PASSWORD" ]]
then
    echo "Mandatory environment variables:"
    echo "  CONFLUENCE_USERNAME: Confluence username"
    echo "  CONFLUENCE_PASSWORD: Confluence password."
    echo "Be aware! Setting bash debuging on will print credentials."
    exit
fi

user="${CONFLUENCE_USERNAME}";
passwd="${CONFLUENCE_PASSWORD}";
credentials="${user}":"${passwd}";
server="https://wiki.onap.org";
[ -z "$rst_editor" ] && rst_editor="retext --preview";

# remove credentials for those using anonymous access
test "${credentials}" = "*****:*****" && credentials=""

# explicit script dir to locate jar file
basedir="$(cd "$(dirname "$0")"; pwd)"

###
### some inital tasks after script has been started
###

###
### print script version, date and time
###

echo "INFO ***************************************************************************"
echo "INFO c2m Version ${script_version}, started $(date)";

###
### simple script argument handling
###

page_list=$1;

# check if there is an argument at all
if [[ "$page_list" == "" ]] ; then
    echo 'Usage: c2m [PAGELIST]'
    exit 1
fi

# check if argument is a file
if [ ! -f $page_list ] ; then
    echo "Error: can't find pagelist \"$page_list\""
    exit 1
fi

###
### declare the functions of this script
###

###
### function: create working directory; save (only the last) existing one; remove older versions; do some error handling
###

function create_working_dir {

  base_dir="output"
  [ ! -d $base_dir ] && mkdir $base_dir

  # compose name for working directory
  #working_dir="${page_id}-${page_title}";
  #working_dir="${page_title}-id${page_id}";
  working_dir="${base_dir}/${page_title}";
  echo "INFO ***************************************************************************"
  echo "INFO working directory \"$working_dir\" will be created"

  # check if current working directory is already in the list
  if [[ " ${existing_working_dirs[@]} " =~ " ${working_dir} " ]]; then
    echo "ERRR ***************************************************************************"
    echo "ERRR working directory \"${working_dir}\" already exists - check entries in page_list for duplicates"
    echo "ERRR exiting ..."
    exit -1
  else
    # store working_dir name for error handling
    existing_working_dirs+=(${working_dir})
  fi

  # sample code
  #if [[ ! " ${array[@]} " =~ " ${value} " ]]; then
  #    # whatever you want to do when arr doesn't contain value
  #fi

  # check existence of working directory
  if [ -d "$working_dir" ]; then
    # check existence of old saved working directory
    if [ -d "${working_dir}.old" ]; then
      # remove the old saved working directory
      rm -r "${working_dir}.old";
    fi
    # save (only) the latest working directory
    mv $working_dir "$working_dir.old";
  fi
  # finally create the working directory and cd into it
  mkdir $working_dir;
  cd $working_dir;
}

###
### function: pull pages from wiki - currently we are testing some export variations
###

function pull_pages_from_wiki {

  # define outfile name
  #out_file="${page_title}-id${page_id}";
  out_file="${page_title}";

  # set proxy if needed  
  if [[ -v http_proxy && ! -z "$http_proxy" ]]; then
    proxy_to_parse="${http_proxy/http:\/\//""}";
    echo "http_proxy is set to \"${proxy_to_parse}\"";
  elif [[ -v https_proxy && ! -z "$https_proxy" ]]; then
    proxy_to_parse="${https_proxy/https:\/\//""}";
    echo "https_proxy is set to \"${proxy_to_parse}\"";
  fi
  
  #java_options="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED --add-opens java.base/java.lang.annotation=ALL-UNNAMED --add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/java.net=ALL-UNNAMED"
  
  if [[ $proxy_to_parse =~ ^([\.0-9]+) ]]; then
    java_options="${java_options} -Dhttps.proxyHost=${BASH_REMATCH[1]} -Dhttp.proxyHost=${BASH_REMATCH[1]}"
    echo "${java_options}"
  fi
  if [[ $proxy_to_parse =~ .*:([0-9]+) ]]; then
    java_options="${java_options} -Dhttps.proxyPort=${BASH_REMATCH[1]} -Dhttp.proxyPort=${BASH_REMATCH[1]}"
    echo "${java_options}"
  fi

  # TODO: -depth
  # pull pages from wiki and convert to markdown (as a source for conversion by pandoc)
  java $java_options -jar $basedir/confluence2md-2.1-fat.jar +H true +T false +RootPageTitle false +FootNotes true -maxHeaderDepth 7 -depth $depth -v true -o ${out_file}.md -u "${credentials}" -server $server $page_id
}

###
### function: simple search and (red colored) warning if special terms are detected in the md output file
###

function detect_unwanted_content_in_md_outfile {
for search_term in "ecomp" "wiki.onap.com" "10.53.199.7" "at&t"
do
  if grep $search_term ${out_file}.md; then
    echo -e "\e[31mWARN ***************************************************************************\e[39m";
    echo -e "\e[31mWARN term \"${search_term}\" detected in ${out_file}.md\e[39m";
  fi
done
}

###
### function: pandoc conversion from md (variants) to rst - currenty testing some conversion formats
###

function convert_md_outfile_to_rst {
  #depending on the given source format (--from) the results may vary
  #pandoc -s --toc --toc-depth=5 --from markdown_mmd      --to rst "${out_file}.md" -o "${out_file}-markdown_mmd.rst"
  #pandoc -s --toc --toc-depth=5 --from markdown_strict   --to rst "${out_file}.md" -o "${out_file}-markdown_strict.rst"
  #pandoc -s --toc --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}-markdown_phpextra.rst"
  #pandoc -s --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}-markdown_phpextra.rst"
  pandoc -s --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}.rst"
}

###
### function: check results in rst editor
###

function open_rst_editor {
  #echo "DBUG ***************************************************************************"
  #echo "DBUG open \"${out_file}\*.rst\" with rst editor"
  $rst_editor ${out_file}*.rst &
}

###
### function: clean up export directories from files no longer needed
###

function clean_up {
  rm *.md                2>/dev/null
  rm attachments/*.json  2>/dev/null
  rm attachments/.*.json 2>/dev/null
}

###
### main: let's start the work ...
###

# read in pagelist file, filter lines starting with a comment and create an array that contains all (uncommented) lines of the file

# sample code
# IFS=',' read -r -a page_array <<< "$page_list" # in case $page_list was defined as a varable in this script; use "," as the delimiter
#readarray -t page_array < $page_list; # old version

readarray -t page_array < <(grep -v "^#" $page_list); # new version which skips line with comments

# INFO: show list of pages by printing every line of the array
echo "INFO ***************************************************************************"
for line in "${page_array[@]}"
do
    echo "INFO $line"
done

# the main loop reads the page_array line by line and processes the content
for line in "${page_array[@]}"
do
    echo "INFO - bupp $line"
    # cut out values from the current line (delimiter is now the "|") and assign them to the correct variables
    hierarchy=$(echo $line | cut -f1 -d\|)
      page_id=$(echo $line | cut -f2 -d\|)
   page_title=$(echo $line | cut -f3 -d\|)
        depth=$(echo $line | cut -f4 -d\|)

    # remove leading and trailing spaces from variables
    hierarchy="$(echo -e "${hierarchy}"  | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
      page_id="$(echo -e "${page_id}"    | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
   page_title="$(echo -e "${page_title}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
        depth="$(echo -e "${depth}"      | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";

    # substitude all blanks in page_title with a minus sign
    page_title=$(echo -e ${page_title} | tr '[:blank:]' '-');
    echo "DBUG page_title=\"$page_title\""

    # convert page_title to lowercase
    page_title=$(echo -e ${page_title} | tr '[:upper:]' '[:lower:]');
    #echo "DBUG page_title=\"$page_title\""

    # remove all characters from page_title which may cause problems in the shell ... or are reserved by conventions of this script
    #page_title="$(echo -e "${page_title}" | sed -e 's/[^A-Za-z0-9._-]//g')"; # a less strict version
    page_title="$(echo -e "${page_title}" | sed -e 's/[^A-Za-z0-9-]//g')";
    echo "DBUG page_title=\"$page_title\""

    # INFO: print variables to check content
    echo "INFO ***************************************************************************"
    echo "INFO hierarchy  = \"$hierarchy\""
    echo "INFO page_id    = \"$page_id\""
    echo "INFO page_title = \"$page_title\""
    echo "INFO depth      = \"$depth\""
           
    # create working directory - done for every! "hierarchy 0" entry of page_list
    if [ "$hierarchy" == "0" ]
    then
      create_working_dir
    fi

    # call functions to process page
    pull_pages_from_wiki
    detect_unwanted_content_in_md_outfile
    convert_md_outfile_to_rst
    open_rst_editor
    clean_up

# main loop end
done

# bye!
echo "INFO ***************************************************************************"
echo "INFO c2m Version ${script_version}, ended $(date)"
echo ""
exit 0