summaryrefslogtreecommitdiffstats
path: root/tools/c2m.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tools/c2m.sh')
-rwxr-xr-xtools/c2m.sh338
1 files changed, 338 insertions, 0 deletions
diff --git a/tools/c2m.sh b/tools/c2m.sh
new file mode 100755
index 000000000..0861eda84
--- /dev/null
+++ b/tools/c2m.sh
@@ -0,0 +1,338 @@
+#!/bin/bash
+
+#set -x # uncomment for bash script debugging
+
+### ============================================================================
+### Licensed under the Apache License, Version 2.0 (the "License");
+### you may not use this file except in compliance with the License.
+### You may obtain a copy of the License at
+###
+### http://www.apache.org/licenses/LICENSE-2.0
+###
+### Unless required by applicable law or agreed to in writing, software
+### distributed under the License is distributed on an "AS IS" BASIS,
+### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+### See the License for the specific language governing permissions and
+### limitations under the License.
+### ============LICENSE_END=====================================================
+
+
+###
+### c2m
+###
+### AUTHOR(S):
+### Thomas Kulik, Deutsche Telekom AG, 2020
+###
+### DESCRIPTION:
+### c2m automates additional tasks required in case you want to export and
+### convert a set of wiki pages. the export and first conversion to markdown is
+### done by confluence2md, provided by viaboxx.
+### c2m processes a list of (to be exported) wiki pages, creates corresponding
+### export directories, exports and converts pages (in various formats if
+### required), opens an editor and cleans up afterwards.
+### c2m checks also for problematic content in the export and creates a warning
+### in case of detection.
+###
+### ISSUES:
+### - markdown (md) output of confluence2md contains sometimes tags that are
+### somehow "merged" with the topic headline; manual edit is required here
+###
+### OPEN:
+### - confluence2md does not support all of the currently used confluence page
+### types (structured-macros) - result for unsupported pages is
+### "not satisfying"; enhancements (java) are required
+### - opt: toc creation in root document in case you export a tree of documents
+### to separate files
+### - opt: remove wiki credentials from script
+###
+### REQUIRED:
+### - pandoc, retext, confluence2md, java (older version for confluence2md),
+### login for the confluence wiki
+###
+### SEE ALSO:
+### - https://www.viaboxx.de/code/confluence2md/
+### - https://github.com/viaboxxsystems/confluence2md
+###
+
+
+###
+### CHANGELOG (LATEST ON TOP)
+###
+### 1.1.0 (2020-03-10) added support for http/https proxy and anonymous wiki
+### access. thx to eric, nicolas and sylvain (orange, france)
+### confluence2md jar file now has to be in the same path as
+### c2m.
+### 1.0.0 (2020-03-09) initial release
+###
+
+
+###
+### c2m example pagelist
+###
+### example pagelist (field descriptions below); it uses the delimiter "|" for
+### the four fields per line.
+### copy/paste page id and title from wiki; to get the wiki page_id you have to
+### login to the wiki, open the page and choose e.g. the history.
+### depth: use depth to follow down the child-pages hierarchy if required:
+### -1=infinte, 0=no children, #=number of child-pages to follow.
+### every hierarchy "0" entry will lead into the creation of a dedicated working
+### directory where the page and child-pages are stored.
+### for better readability you can add spaces to the list, but use "|" as a
+### delimiter. lines starting with a # are filtered by c2m.
+###
+### hierarchy | page_id | page_title | depth
+###
+### 0 | 1018748 | ONAP Portal | 0
+### 1.1 | 1018759 | ONAP Portal for users | 0
+### 1.2 | 1018762 | ONAP Portal for administrators | 0
+### 1.2.1 | 1018764 | Admins | 0
+### 1.2.2 | 1018811 | Users | 0
+### 1.2.3 | 1018821 | Portal Admins | 0
+### 1.2.4 | 1018826 | Application Onboarding | 0
+### 1.2.5 | 1018832 | Widget Onboarding | 0
+### 1.2.6 | 1018835 | Edit Functional Menu | 0
+### 1.2.7 | 16004953 | Portal Microservices Onboarding | 0
+###
+### in case you want to export to only one single output page (that contains all
+### child-pages of the above example) use:
+###
+### 0 | 1018748 | ONAP Portal | -1
+###
+
+
+###
+### some initial variables
+###
+
+script_version="1.1.0 (2020-03-10)"
+
+ user="*****"; # replace ***** with your wiki login name
+ passwd="*****"; # replace ***** with your wiki password
+ credentials="${user}":"${passwd}";
+ server="https://wiki.onap.org";
+ rst_editor="retext --preview";
+
+# remove credentials for those using anonymous access
+test "${credentials}" = "*****:*****" && credentials=""
+
+# explicit script dir to locate jar file
+basedir="$(cd "$(dirname "$0")"; pwd)"
+
+###
+### some inital tasks after script has been started
+###
+
+###
+### print script version, date and time
+###
+
+echo "INFO ***************************************************************************"
+echo "INFO c2m Version ${script_version}, started $(date)";
+
+###
+### simple script argument handling
+###
+
+page_list=$1;
+
+# check if there is an argument at all
+if [[ "$page_list" == "" ]] ; then
+ echo 'Usage: c2m [PAGELIST]'
+ exit 1
+fi
+
+# check if argument is a file
+if [ ! -f $page_list ] ; then
+ echo "Error: can't find pagelist \"$page_list\""
+ exit 1
+fi
+
+###
+### declare the functions of this script
+###
+
+###
+### function: create working directory; save (only the last) existing one; remove older versions; do some error handling
+###
+
+function create_working_dir {
+
+ # compose name for working directory
+ #working_dir="${page_id}-${page_title}";
+ #working_dir="${page_title}-id${page_id}";
+ working_dir="${page_title}";
+ echo "INFO ***************************************************************************"
+ echo "INFO working directory \"$working_dir\" will be created"
+
+ # check if current working directory is already in the list
+ if [[ " ${existing_working_dirs[@]} " =~ " ${working_dir} " ]]; then
+ echo "ERRR ***************************************************************************"
+ echo "ERRR working directory \"${working_dir}\" already exists - check entries in page_list for duplicates"
+ echo "ERRR exiting ..."
+ exit -1
+ else
+ # store working_dir name for error handling
+ existing_working_dirs+=(${working_dir})
+ fi
+
+ # sample code
+ #if [[ ! " ${array[@]} " =~ " ${value} " ]]; then
+ # # whatever you want to do when arr doesn't contain value
+ #fi
+
+ # check existence of working directory
+ if [ -d "$working_dir" ]; then
+ # check existence of old saved working directory
+ if [ -d "${working_dir}.old" ]; then
+ # remove the old saved working directory
+ rm -r "${working_dir}.old";
+ fi
+ # save (only) the latest working directory
+ mv $working_dir "$working_dir.old";
+ fi
+ # finally create the working directory and cd into it
+ mkdir $working_dir;
+ cd $working_dir;
+}
+
+###
+### function: pull pages from wiki - currently we are testing some export variations
+###
+
+function pull_pages_from_wiki {
+
+ # define outfile name
+ #out_file="${page_title}-id${page_id}";
+ out_file="${page_title}";
+
+ # set proxy for those who need
+ test -n "${http_proxy}" && proxy="$(echo $http_proxy |sed -e 's,http://,-Dhttp.proxyHost=,' -e 's/:/ -Dhttp.proxyPort=/' -e 's:/$::')"
+ test -n "${https_proxy}" && proxy="$proxy $(echo $https_proxy |sed -e 's,http://,-Dhttps.proxyHost=,' -e 's/:/ -Dhttps.proxyPort=/' -e 's:/$::')"
+
+ # pull pages from wiki and convert to markdown (as a source for conversion by pandoc)
+ java $proxy -jar "${basedir}"/confluence2md-2.1-fat.jar +H true +T false +RootPageTitle false +FootNotes true -maxHeaderDepth 7 -depth $depth -v true -o ${out_file}.md -u "${credentials}" -server $server $page_id
+}
+
+###
+### function: simple search and (red colored) warning if special terms are detected in the md output file
+###
+
+function detect_unwanted_content_in_md_outfile {
+for search_term in "ecomp" "wiki.onap.com" "10.53.199.7" "at&t"
+do
+ if grep $search_term ${out_file}.md; then
+ echo -e "\e[31mWARN ***************************************************************************\e[39m";
+ echo -e "\e[31mWARN term \"${search_term}\" detected in ${out_file}.md\e[39m";
+ fi
+done
+}
+
+###
+### function: pandoc conversion from md (variants) to rst - currenty testing some conversion formats
+###
+
+function convert_md_outfile_to_rst {
+ #depending on the given source format (--from) the results may vary
+ #pandoc -s --toc --toc-depth=5 --from markdown_mmd --to rst "${out_file}.md" -o "${out_file}-markdown_mmd.rst"
+ #pandoc -s --toc --toc-depth=5 --from markdown_strict --to rst "${out_file}.md" -o "${out_file}-markdown_strict.rst"
+ #pandoc -s --toc --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}-markdown_phpextra.rst"
+ #pandoc -s --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}-markdown_phpextra.rst"
+ pandoc -s --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}.rst"
+}
+
+###
+### function: check results in rst editor
+###
+
+function open_rst_editor {
+ #echo "DBUG ***************************************************************************"
+ #echo "DBUG open \"${out_file}\*.rst\" with rst editor"
+ $rst_editor ${out_file}*.rst &
+}
+
+###
+### function: clean up export directories from files no longer needed
+###
+
+function clean_up {
+ rm *.md 2>/dev/null
+ rm attachments/*.json 2>/dev/null
+ rm attachments/.*.json 2>/dev/null
+}
+
+###
+### main: let's start the work ...
+###
+
+# read in pagelist file, filter lines starting with a comment and create an array that contains all (uncommented) lines of the file
+
+# sample code
+# IFS=',' read -r -a page_array <<< "$page_list" # in case $page_list was defined as a varable in this script; use "," as the delimiter
+#readarray -t page_array < $page_list; # old version
+
+readarray -t page_array < <(grep -v "^#" $page_list); # new version which skips line with comments
+
+# INFO: show list of pages by printing every line of the array
+echo "INFO ***************************************************************************"
+for line in "${page_array[@]}"
+do
+ echo "INFO $line"
+done
+
+# the main loop reads the page_array line by line and processes the content
+for line in "${page_array[@]}"
+do
+
+ # cut out values from the current line (delimiter is now the "|") and assign them to the correct variables
+ hierarchy=$(echo $line | cut -f1 -d\|)
+ page_id=$(echo $line | cut -f2 -d\|)
+ page_title=$(echo $line | cut -f3 -d\|)
+ depth=$(echo $line | cut -f4 -d\|)
+
+ # remove leading and trailing spaces from variables
+ hierarchy="$(echo -e "${hierarchy}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
+ page_id="$(echo -e "${page_id}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
+ page_title="$(echo -e "${page_title}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
+ depth="$(echo -e "${depth}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
+
+ # substitude all blanks in page_title with a minus sign
+ page_title=$(echo -e ${page_title} | tr '[:blank:]' '-');
+ echo "DBUG page_title=\"$page_title\""
+
+ # convert page_title to lowercase
+ page_title=$(echo -e ${page_title} | tr '[:upper:]' '[:lower:]');
+ #echo "DBUG page_title=\"$page_title\""
+
+ # remove all characters from page_title which may cause problems in the shell ... or are reserved by conventions of this script
+ #page_title="$(echo -e "${page_title}" | sed -e 's/[^A-Za-z0-9._-]//g')"; # a less strict version
+ page_title="$(echo -e "${page_title}" | sed -e 's/[^A-Za-z0-9-]//g')";
+ echo "DBUG page_title=\"$page_title\""
+
+ # INFO: print variables to check content
+ echo "INFO ***************************************************************************"
+ echo "INFO hierarchy = \"$hierarchy\""
+ echo "INFO page_id = \"$page_id\""
+ echo "INFO page_title = \"$page_title\""
+ echo "INFO depth = \"$depth\""
+
+ # create working directory - done for every! "hierarchy 0" entry of page_list
+ if [ "$hierarchy" == "0" ]
+ then
+ create_working_dir
+ fi
+
+ # call functions to process page
+ pull_pages_from_wiki
+ detect_unwanted_content_in_md_outfile
+ convert_md_outfile_to_rst
+ open_rst_editor
+ clean_up
+
+# main loop end
+done
+
+# bye!
+echo "INFO ***************************************************************************"
+echo "INFO c2m Version ${script_version}, ended $(date)"
+echo ""
+exit 0