1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
|
#!/bin/bash
#set -x # uncomment for bash script debugging
### ============================================================================
### Licensed under the Apache License, Version 2.0 (the "License");
### you may not use this file except in compliance with the License.
### You may obtain a copy of the License at
###
### http://www.apache.org/licenses/LICENSE-2.0
###
### Unless required by applicable law or agreed to in writing, software
### distributed under the License is distributed on an "AS IS" BASIS,
### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
### See the License for the specific language governing permissions and
### limitations under the License.
### ============LICENSE_END=====================================================
###
### c2m
###
### AUTHOR(S):
### Thomas Kulik, Deutsche Telekom AG, 2020
###
### DESCRIPTION:
### c2m automates additional tasks required in case you want to export and
### convert a set of wiki pages. the export and first conversion to markdown is
### done by confluence2md, provided by viaboxx.
### c2m processes a list of (to be exported) wiki pages, creates corresponding
### export directories, exports and converts pages (in various formats if
### required), opens an editor and cleans up afterwards.
### c2m checks also for problematic content in the export and creates a warning
### in case of detection.
###
### ISSUES:
### - markdown (md) output of confluence2md contains sometimes tags that are
### somehow "merged" with the topic headline; manual edit is required here
###
### OPEN:
### - confluence2md does not support all of the currently used confluence page
### types (structured-macros) - result for unsupported pages is
### "not satisfying"; enhancements (java) are required
### - opt: toc creation in root document in case you export a tree of documents
### to separate files
### - opt: remove wiki credentials from script
###
### REQUIRED:
### - pandoc, retext, confluence2md, java (older version for confluence2md),
### login for the confluence wiki
###
### SEE ALSO:
### - https://www.viaboxx.de/code/confluence2md/
### - https://github.com/viaboxxsystems/confluence2md
###
###
### CHANGELOG (LATEST ON TOP)
###
### 1.1.0 (2020-03-10) added support for http/https proxy and anonymous wiki
### access. thx to eric, nicolas and sylvain (orange, france)
### confluence2md jar file now has to be in the same path as
### c2m.
### 1.0.0 (2020-03-09) initial release
###
###
### c2m example pagelist
###
### example pagelist (field descriptions below); it uses the delimiter "|" for
### the four fields per line.
### copy/paste page id and title from wiki; to get the wiki page_id you have to
### login to the wiki, open the page and choose e.g. the history.
### depth: use depth to follow down the child-pages hierarchy if required:
### -1=infinte, 0=no children, #=number of child-pages to follow.
### every hierarchy "0" entry will lead into the creation of a dedicated working
### directory where the page and child-pages are stored.
### for better readability you can add spaces to the list, but use "|" as a
### delimiter. lines starting with a # are filtered by c2m.
###
### hierarchy | page_id | page_title | depth
###
### 0 | 1018748 | ONAP Portal | 0
### 1.1 | 1018759 | ONAP Portal for users | 0
### 1.2 | 1018762 | ONAP Portal for administrators | 0
### 1.2.1 | 1018764 | Admins | 0
### 1.2.2 | 1018811 | Users | 0
### 1.2.3 | 1018821 | Portal Admins | 0
### 1.2.4 | 1018826 | Application Onboarding | 0
### 1.2.5 | 1018832 | Widget Onboarding | 0
### 1.2.6 | 1018835 | Edit Functional Menu | 0
### 1.2.7 | 16004953 | Portal Microservices Onboarding | 0
###
### in case you want to export to only one single output page (that contains all
### child-pages of the above example) use:
###
### 0 | 1018748 | ONAP Portal | -1
###
###
### some initial variables
###
script_version="1.1.0 (2020-03-10)"
user="*****"; # replace ***** with your wiki login name
passwd="*****"; # replace ***** with your wiki password
credentials="${user}":"${passwd}";
server="https://wiki.onap.org";
rst_editor="retext --preview";
# remove credentials for those using anonymous access
test "${credentials}" = "*****:*****" && credentials=""
# explicit script dir to locate jar file
basedir="$(cd "$(dirname "$0")"; pwd)"
###
### some inital tasks after script has been started
###
###
### print script version, date and time
###
echo "INFO ***************************************************************************"
echo "INFO c2m Version ${script_version}, started $(date)";
###
### simple script argument handling
###
page_list=$1;
# check if there is an argument at all
if [[ "$page_list" == "" ]] ; then
echo 'Usage: c2m [PAGELIST]'
exit 1
fi
# check if argument is a file
if [ ! -f $page_list ] ; then
echo "Error: can't find pagelist \"$page_list\""
exit 1
fi
###
### declare the functions of this script
###
###
### function: create working directory; save (only the last) existing one; remove older versions; do some error handling
###
function create_working_dir {
# compose name for working directory
#working_dir="${page_id}-${page_title}";
#working_dir="${page_title}-id${page_id}";
working_dir="${page_title}";
echo "INFO ***************************************************************************"
echo "INFO working directory \"$working_dir\" will be created"
# check if current working directory is already in the list
if [[ " ${existing_working_dirs[@]} " =~ " ${working_dir} " ]]; then
echo "ERRR ***************************************************************************"
echo "ERRR working directory \"${working_dir}\" already exists - check entries in page_list for duplicates"
echo "ERRR exiting ..."
exit -1
else
# store working_dir name for error handling
existing_working_dirs+=(${working_dir})
fi
# sample code
#if [[ ! " ${array[@]} " =~ " ${value} " ]]; then
# # whatever you want to do when arr doesn't contain value
#fi
# check existence of working directory
if [ -d "$working_dir" ]; then
# check existence of old saved working directory
if [ -d "${working_dir}.old" ]; then
# remove the old saved working directory
rm -r "${working_dir}.old";
fi
# save (only) the latest working directory
mv $working_dir "$working_dir.old";
fi
# finally create the working directory and cd into it
mkdir $working_dir;
cd $working_dir;
}
###
### function: pull pages from wiki - currently we are testing some export variations
###
function pull_pages_from_wiki {
# define outfile name
#out_file="${page_title}-id${page_id}";
out_file="${page_title}";
# set proxy for those who need
test -n "${http_proxy}" && proxy="$(echo $http_proxy |sed -e 's,http://,-Dhttp.proxyHost=,' -e 's/:/ -Dhttp.proxyPort=/' -e 's:/$::')"
test -n "${https_proxy}" && proxy="$proxy $(echo $https_proxy |sed -e 's,http://,-Dhttps.proxyHost=,' -e 's/:/ -Dhttps.proxyPort=/' -e 's:/$::')"
# pull pages from wiki and convert to markdown (as a source for conversion by pandoc)
java $proxy -jar "${basedir}"/confluence2md-2.1-fat.jar +H true +T false +RootPageTitle false +FootNotes true -maxHeaderDepth 7 -depth $depth -v true -o ${out_file}.md -u "${credentials}" -server $server $page_id
}
###
### function: simple search and (red colored) warning if special terms are detected in the md output file
###
function detect_unwanted_content_in_md_outfile {
for search_term in "ecomp" "wiki.onap.com" "10.53.199.7" "at&t"
do
if grep $search_term ${out_file}.md; then
echo -e "\e[31mWARN ***************************************************************************\e[39m";
echo -e "\e[31mWARN term \"${search_term}\" detected in ${out_file}.md\e[39m";
fi
done
}
###
### function: pandoc conversion from md (variants) to rst - currenty testing some conversion formats
###
function convert_md_outfile_to_rst {
#depending on the given source format (--from) the results may vary
#pandoc -s --toc --toc-depth=5 --from markdown_mmd --to rst "${out_file}.md" -o "${out_file}-markdown_mmd.rst"
#pandoc -s --toc --toc-depth=5 --from markdown_strict --to rst "${out_file}.md" -o "${out_file}-markdown_strict.rst"
#pandoc -s --toc --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}-markdown_phpextra.rst"
#pandoc -s --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}-markdown_phpextra.rst"
pandoc -s --toc-depth=5 --from markdown_phpextra --to rst "${out_file}.md" -o "${out_file}.rst"
}
###
### function: check results in rst editor
###
function open_rst_editor {
#echo "DBUG ***************************************************************************"
#echo "DBUG open \"${out_file}\*.rst\" with rst editor"
$rst_editor ${out_file}*.rst &
}
###
### function: clean up export directories from files no longer needed
###
function clean_up {
rm *.md 2>/dev/null
rm attachments/*.json 2>/dev/null
rm attachments/.*.json 2>/dev/null
}
###
### main: let's start the work ...
###
# read in pagelist file, filter lines starting with a comment and create an array that contains all (uncommented) lines of the file
# sample code
# IFS=',' read -r -a page_array <<< "$page_list" # in case $page_list was defined as a varable in this script; use "," as the delimiter
#readarray -t page_array < $page_list; # old version
readarray -t page_array < <(grep -v "^#" $page_list); # new version which skips line with comments
# INFO: show list of pages by printing every line of the array
echo "INFO ***************************************************************************"
for line in "${page_array[@]}"
do
echo "INFO $line"
done
# the main loop reads the page_array line by line and processes the content
for line in "${page_array[@]}"
do
# cut out values from the current line (delimiter is now the "|") and assign them to the correct variables
hierarchy=$(echo $line | cut -f1 -d\|)
page_id=$(echo $line | cut -f2 -d\|)
page_title=$(echo $line | cut -f3 -d\|)
depth=$(echo $line | cut -f4 -d\|)
# remove leading and trailing spaces from variables
hierarchy="$(echo -e "${hierarchy}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
page_id="$(echo -e "${page_id}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
page_title="$(echo -e "${page_title}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
depth="$(echo -e "${depth}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')";
# substitude all blanks in page_title with a minus sign
page_title=$(echo -e ${page_title} | tr '[:blank:]' '-');
echo "DBUG page_title=\"$page_title\""
# convert page_title to lowercase
page_title=$(echo -e ${page_title} | tr '[:upper:]' '[:lower:]');
#echo "DBUG page_title=\"$page_title\""
# remove all characters from page_title which may cause problems in the shell ... or are reserved by conventions of this script
#page_title="$(echo -e "${page_title}" | sed -e 's/[^A-Za-z0-9._-]//g')"; # a less strict version
page_title="$(echo -e "${page_title}" | sed -e 's/[^A-Za-z0-9-]//g')";
echo "DBUG page_title=\"$page_title\""
# INFO: print variables to check content
echo "INFO ***************************************************************************"
echo "INFO hierarchy = \"$hierarchy\""
echo "INFO page_id = \"$page_id\""
echo "INFO page_title = \"$page_title\""
echo "INFO depth = \"$depth\""
# create working directory - done for every! "hierarchy 0" entry of page_list
if [ "$hierarchy" == "0" ]
then
create_working_dir
fi
# call functions to process page
pull_pages_from_wiki
detect_unwanted_content_in_md_outfile
convert_md_outfile_to_rst
open_rst_editor
clean_up
# main loop end
done
# bye!
echo "INFO ***************************************************************************"
echo "INFO c2m Version ${script_version}, ended $(date)"
echo ""
exit 0
|