aboutsummaryrefslogtreecommitdiffstats
path: root/policyhandler/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'policyhandler/utils.py')
-rw-r--r--policyhandler/utils.py303
1 files changed, 303 insertions, 0 deletions
diff --git a/policyhandler/utils.py b/policyhandler/utils.py
new file mode 100644
index 0000000..d728e48
--- /dev/null
+++ b/policyhandler/utils.py
@@ -0,0 +1,303 @@
+# ================================================================================
+# Copyright (c) 2018-2019 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+#
+
+"""utils and conversions"""
+
+import json
+import logging
+import os
+from copy import deepcopy
+from typing import Pattern
+
+class ToBeImplementedException(Exception):
+ """exception for to be implemented features of policy-handler"""
+ pass
+
+
+class Utils(object):
+ """general purpose utils"""
+ _logger = logging.getLogger("policy_handler.utils")
+
+ @staticmethod
+ def get_logger(file_path):
+ """get the logger for the file_path == __file__"""
+ logger_path = []
+ file_path = os.path.realpath(file_path)
+ logger_path.append(os.path.basename(file_path)[:-3])
+ while file_path:
+ file_path = os.path.dirname(file_path)
+ folder_name = os.path.basename(file_path)
+ if folder_name == "policyhandler" or len(logger_path) > 5:
+ break
+ if folder_name == "tests":
+ logger_path.append("unit_test")
+ break
+ logger_path.append(folder_name)
+
+ logger_path.append("policy_handler")
+ return logging.getLogger(".".join(reversed(logger_path)))
+
+ @staticmethod
+ def safe_json_parse(json_str):
+ """try parsing json without exception - returns the json_str back if fails"""
+ if not json_str:
+ return json_str
+ try:
+ return json.loads(json_str)
+ except (ValueError, TypeError) as err:
+ Utils._logger.warning("unexpected json error(%s): len(%s) str[:100]: (%s)",
+ str(err), len(json_str), str(json_str)[:100])
+ return json_str
+
+ @staticmethod
+ def are_the_same(body_1, body_2, json_dumps=None):
+ """check whether both objects are the same"""
+ if not json_dumps:
+ json_dumps = json.dumps
+ if (body_1 and not body_2) or (not body_1 and body_2):
+ Utils._logger.debug("only one is empty %s != %s", body_1, body_2)
+ return False
+
+ if body_1 is None and body_2 is None:
+ return True
+
+ if isinstance(body_1, list) and isinstance(body_2, list):
+ if len(body_1) != len(body_2):
+ Utils._logger.debug("len %s != %s", json_dumps(body_1), json_dumps(body_2))
+ return False
+
+ for val_1, val_2 in zip(body_1, body_2):
+ if not Utils.are_the_same(val_1, val_2, json_dumps):
+ return False
+ return True
+
+ if isinstance(body_1, dict) and isinstance(body_2, dict):
+ if body_1.keys() ^ body_2.keys():
+ Utils._logger.debug("keys %s != %s", json_dumps(body_1), json_dumps(body_2))
+ return False
+
+ for key, val_1 in body_1.items():
+ if not Utils.are_the_same(val_1, body_2[key], json_dumps):
+ return False
+ return True
+
+ # ... here when primitive values or mismatched types ...
+ the_same_values = (body_1 == body_2)
+ if not the_same_values:
+ Utils._logger.debug("values %s != %s", body_1, body_2)
+ return the_same_values
+
+class RegexCoarser(object):
+ """
+ utility to combine or coarse the collection of regex patterns
+ into a single regex that is at least not narrower (wider or the same)
+ than the collection regexes
+
+ inspired by https://github.com/spadgos/regex-combiner in js
+ """
+ ENDER = '***'
+ GROUPERS = {'{': '}', '[': ']', '(': ')'}
+ MODIFIERS = '*?+'
+ CHOICE_STARTER = '('
+ HIDDEN_CHOICE_STARTER = '(?:'
+ ANY_CHARS = '.*'
+ LINE_START = '^'
+
+ def __init__(self, regex_patterns=None):
+ """regex coarser"""
+ self.trie = {}
+ self.patterns = []
+ self.add_regex_patterns(regex_patterns)
+
+ def get_combined_regex_pattern(self):
+ """gets the pattern for the combined regex"""
+ trie = deepcopy(self.trie)
+ RegexCoarser._compress(trie)
+ return RegexCoarser._trie_to_pattern(trie)
+
+ def get_coarse_regex_patterns(self, max_length=100):
+ """gets the patterns for the coarse regex"""
+ trie = deepcopy(self.trie)
+ RegexCoarser._compress(trie)
+ patterns = RegexCoarser._trie_to_pattern(trie, True)
+
+ root_patterns = []
+ for pattern in patterns:
+ left, _, choice = pattern.partition(RegexCoarser.CHOICE_STARTER)
+ if choice and left and left.strip() != RegexCoarser.LINE_START and not left.isspace():
+ pattern = left + RegexCoarser.ANY_CHARS
+ root_patterns.append(pattern)
+ root_patterns = RegexCoarser._join_patterns(root_patterns, max_length)
+
+ if not root_patterns or root_patterns == ['']:
+ return []
+ return root_patterns
+
+
+ def add_regex_patterns(self, new_regex_patterns):
+ """adds the new_regex patterns to RegexPatternCoarser"""
+ if not new_regex_patterns or not isinstance(new_regex_patterns, list):
+ return
+ for new_regex_pattern in new_regex_patterns:
+ self.add_regex_pattern(new_regex_pattern)
+
+ def add_regex_pattern(self, new_regex_pattern):
+ """adds the new_regex to RegexPatternCoarser"""
+ new_regex_pattern = RegexCoarser._regex_pattern_to_string(new_regex_pattern)
+ if not new_regex_pattern:
+ return
+
+ self.patterns.append(new_regex_pattern)
+
+ tokens = RegexCoarser._tokenize(new_regex_pattern)
+ last_token_idx = len(tokens) - 1
+ trie_node = self.trie
+ for idx, token in enumerate(tokens):
+ if token not in trie_node:
+ trie_node[token] = {}
+ if idx == last_token_idx:
+ trie_node[token][RegexCoarser.ENDER] = {}
+ trie_node = trie_node[token]
+
+ @staticmethod
+ def _regex_pattern_to_string(regex_pattern):
+ """convert regex pattern to string"""
+ if not regex_pattern:
+ return ''
+
+ if isinstance(regex_pattern, str):
+ return regex_pattern
+
+ if isinstance(regex_pattern, Pattern):
+ return regex_pattern.pattern
+ return None
+
+ @staticmethod
+ def _tokenize(regex_pattern):
+ """tokenize the regex pattern for trie assignment"""
+ tokens = []
+ token = ''
+ group_ender = None
+ use_next = False
+
+ for char in regex_pattern:
+ if use_next:
+ use_next = False
+ token += char
+ char = None
+
+ if char == '\\':
+ use_next = True
+ token += char
+ continue
+
+ if not group_ender and char in RegexCoarser.GROUPERS:
+ group_ender = RegexCoarser.GROUPERS[char]
+ token = char
+ char = None
+
+ if char is None:
+ pass
+ elif char == group_ender:
+ token += char
+ group_ender = None
+ if char == '}': # this group is a modifier
+ tokens[len(tokens) - 1] += token
+ token = ''
+ continue
+ elif char in RegexCoarser.MODIFIERS:
+ if group_ender:
+ token += char
+ else:
+ tokens[len(tokens) - 1] += char
+ continue
+ else:
+ token += char
+
+ if not group_ender:
+ tokens.append(token)
+ token = ''
+
+ if token:
+ tokens.append(token)
+ return tokens
+
+ @staticmethod
+ def _compress(trie):
+ """compress trie into shortest leaves"""
+ for key, subtrie in trie.items():
+ RegexCoarser._compress(subtrie)
+ subkeys = list(subtrie.keys())
+ if len(subkeys) == 1:
+ trie[key + subkeys[0]] = subtrie[subkeys[0]]
+ del trie[key]
+
+ @staticmethod
+ def _trie_to_pattern(trie, top_keep=False):
+ """convert trie to the regex pattern"""
+ patterns = [
+ key.replace(RegexCoarser.ENDER, '') + RegexCoarser._trie_to_pattern(subtrie)
+ for key, subtrie in trie.items()
+ ]
+
+ if top_keep:
+ return patterns
+
+ return RegexCoarser._join_patterns(patterns)[0]
+
+ @staticmethod
+ def _join_patterns(patterns, max_length=0):
+ """convert list of patterns to the segmented list of dense regex patterns"""
+ if not patterns:
+ return ['']
+
+ if len(patterns) == 1:
+ return patterns
+
+ if not max_length:
+ return [RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns) + ')']
+
+ long_patterns = []
+ join_patterns = []
+ for pattern in patterns:
+ len_pattern = len(pattern)
+ if not len_pattern:
+ continue
+ if len_pattern >= max_length:
+ long_patterns.append(pattern)
+ continue
+
+ for idx, patterns_to_join in enumerate(join_patterns):
+ patterns_to_join, len_patterns_to_join = patterns_to_join
+ if len_pattern + len_patterns_to_join < max_length:
+ patterns_to_join.append(pattern)
+ len_patterns_to_join += len_pattern
+ join_patterns[idx] = (patterns_to_join, len_patterns_to_join)
+ len_pattern = 0
+ break
+ if len_pattern:
+ join_patterns.append(([pattern], len_pattern))
+ join_patterns.sort(key=lambda x: x[1])
+
+ if join_patterns:
+ # pattern, _, choice = pattern.endswith(RegexCoarser.ANY_CHARS)
+ join_patterns = [
+ RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns_to_join) + ')'
+ for patterns_to_join, _ in join_patterns
+ ]
+
+ return join_patterns + long_patterns