aboutsummaryrefslogtreecommitdiffstats
path: root/src/main/scripts/dataGrooming.sh
blob: a6b5f4f481a9295131bb876b294b9ed0188f156c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/ksh
#
# The script invokes the dataGrooming java class to run some tests and generate a report and
#     potentially do some auto-deleteing.
#
# Here are the allowed Parameters.  Note - they are all optional and can be mixed and matched.
#
#  -f oldFileName  (see note below)
#  -autoFix 
#  -sleepMinutes nn
#  -edgesOnly
#  -skipEdges
#  -timeWindowMinutes nn
#  -dontFixOrphans
#  -maxFix
#  -skipHostCheck
#  -singleCommits
#  -dupeCheckOff
#  -dupeFixOn
#  -ghost2CheckOff
#  -ghost2FixOn
#
#
#
#
# NOTES:
# -f  The name of a previous report can optionally be passed in with the "-f" option. 
#     Just the filename --  ie. "dataGrooming.sh -f dataGrooming.201504272106.out"   
#     The file will be assumed to be in the directory that it was created in.
#     If a filename is passed, then the "deleteCandidate" vertex-id's and bad edges
#     listed inside that report file will be deleted on this run if they are encountered as
#     bad nodes/edges again.
#     
# -autoFix  If you don't use the "-f" option, you could choose to use "-autofix" which will
#           automatically run the script twice: once to look for problems, then after 
#           sleeping for a few minutes, it will re-run with the inital-run's output as
#           an input file.  
#
# -maxFix   When using autoFix, you might want to limit how many 'bad' records get fixed.
#           This is a safeguard against accidently deleting too many records automatically.
#           It has a default value set in AAIConstants:  AAI_GROOMING_DEFAULT_MAX_FIX = 15;
#           If there are more than maxFix candidates found -- then none will be deleted (ie. 
#           someone needs to look into it)
# 
# -sleepMinutes   When using autoFix, this defines how many minutes we sleep before the second run.
#           It has a default value set in AAIConstants:  AAI_GROOMING_DEFAULT_SLEEP_MINUTES = 7;
#           The reason we sleep at all between runs is that our DB is "eventually consistant", so
#           we want to give it time to resolve itself if possible.
#
# -edgesOnly    Can be used any time you want to limit this tool so it only looks at edges.
#            Note - as of 1710, we have not been seeing many purely bad edges, 
#            (ie. not associated with a phantom node) so this option is not used often.
#           
# -skipEdgeChecks  Use it to bypass checks for bad Edges (which are pretty rare).
#
# -timeWindowMinutes   Use it to limit the nodes looked at to ones whose update-timestamp tells us that it was last updated less than this many minutes ago.  Note this is usually used along with the skipEdgeChecks option.
#
# -dontFixOrphans   Since there can sometimes be a lot of orphan nodes, and they don't 
#           harm processing as much as phantom-nodes or bad-edges, it is useful to be
#           able to ignore them when fixing things.  
#
# -skipHostCheck    By default, the grooming tool will check to see that it is running
#           on the host that is the first one in the list found in:
#               aaiconfig.properties  aai.primary.filetransfer.serverlist
#           This is so that when run from the cron, it only runs on one machine.
#           This option lets you turn that checking off.
#
# -singleCommits    By default, the grooming tool will do all of its processing and then do
#           a commit of all the changes at once.  This option (maybe could have been named better)
#           is letting the user override the default behavior and do a commit for each
#           individual 'remove" one by one as they are encountered by the grooming logic. 
#           NOTE - this only applies when using either the "-f" or "-autoFix" options since 
#           those are the only two that make changes to the database.
#
# -dupeCheckOff    By default, we will check all of our nodes for duplicates.  This parameter lets
#           us turn this check off if we don't want to do it for some reason.
#
# -dupeFixOn    When we're fixing data, by default we will NOT fix duplicates  This parameter lets us turn 
#           that fixing ON when we are comfortable that it can pick the correct duplicate to preserve. 
#
# -ghost2CheckOff    By default, we will check for the "new" kind of ghost that we saw on
#           Production in early February 2016.  This parameter lets us turn this check off if we 
#           don't want to do it for some reason.
#
# -ghost2FixOn    When we're fixing data, by default we will NOT try to fix the "new" ghost nodes.  
#           This parameter lets us turn that fixing ON if we want to try to fix them. 
#
#
COMMON_ENV_PATH=$( cd "$(dirname "$0")" ; pwd -P )
. ${COMMON_ENV_PATH}/common_functions.sh

# TODO: There is a better way where you can pass in the function
# and then let the common functions check if the function exist and invoke it
# So this all can be templated out
start_date;
check_user;

processStat=$(ps -ef | grep '[D]ataGrooming');
if [ "$processStat" != "" ]
	then
	echo "Found dataGrooming is already running: " $processStat
	exit 1
fi

# Make sure that it's not already running
processStat=`ps -ef|grep aaiadmin|grep -E "org.onap.aai.dbgen.DataGrooming"|grep -v grep`
if [ "$processStat" != "" ]
	then
	echo "Found dataGrooming is already running: " $processStat
	exit 1
fi

source_profile;
execute_spring_jar org.onap.aai.datagrooming.DataGrooming $PROJECT_HOME/resources/logback.xml "$@"
end_date;
exit 0