# ================================================================================ # Copyright (c) 2018-2020 AT&T Intellectual Property. All rights reserved. # Copyright 2020 Deutsche Telekom. All rights reserved. # ================================================================================ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============LICENSE_END========================================================= # """utils and conversions""" import json import logging import os from copy import deepcopy from typing import Pattern class Utils(object): """general purpose utils""" _logger = logging.getLogger("policy_handler.utils") @staticmethod def get_logger(file_path): """get the logger for the file_path == __file__""" logger_path = [] file_path = os.path.realpath(file_path) logger_path.append(os.path.basename(file_path)[:-3]) while file_path: file_path = os.path.dirname(file_path) folder_name = os.path.basename(file_path) if folder_name == "policyhandler" or len(logger_path) > 5: break if folder_name == "tests": logger_path.append("unit_test") break logger_path.append(folder_name) logger_path.append("policy_handler") return logging.getLogger(".".join(reversed(logger_path))) @staticmethod def safe_json_parse(json_str): """try parsing json without exception - returns the json_str back if fails""" if not json_str: return json_str try: return json.loads(json_str) except (ValueError, TypeError) as err: Utils._logger.warning("unexpected json error(%s): len(%s) str[:100]: (%s)", str(err), len(json_str), str(json_str)[:100]) return json_str @staticmethod def are_the_same(body_1, body_2, json_dumps=None): """check whether both objects are the same""" if not json_dumps: json_dumps = json.dumps if (body_1 and not body_2) or (not body_1 and body_2): Utils._logger.debug("only one is empty %s != %s", body_1, body_2) return False if body_1 is None and body_2 is None: return True if isinstance(body_1, list) and isinstance(body_2, list): if len(body_1) != len(body_2): Utils._logger.debug("len %s != %s", json_dumps(body_1), json_dumps(body_2)) return False for val_1, val_2 in zip(body_1, body_2): if not Utils.are_the_same(val_1, val_2, json_dumps): return False return True if isinstance(body_1, dict) and isinstance(body_2, dict): if body_1.keys() ^ body_2.keys(): Utils._logger.debug("keys %s != %s", json_dumps(body_1), json_dumps(body_2)) return False for key, val_1 in body_1.items(): val_2 = body_2[key] if isinstance(val_1, str) or isinstance(val_2, str): if val_1 != val_2: Utils._logger.debug("key-values %s != %s", json_dumps({key: val_1}), json_dumps({key: val_2})) return False continue if not Utils.are_the_same(val_1, body_2[key], json_dumps): return False return True # ... here when primitive values or mismatched types ... the_same_values = (body_1 == body_2) if not the_same_values: Utils._logger.debug("values %s != %s", body_1, body_2) return the_same_values class RegexCoarser(object): """ utility to combine or coarse the collection of regex patterns into a single regex that is at least not narrower (wider or the same) than the collection regexes inspired by https://github.com/spadgos/regex-combiner in js """ ENDER = '***' GROUPERS = {'{': '}', '[': ']', '(': ')'} MODIFIERS = '*?+' CHOICE_STARTER = '(' HIDDEN_CHOICE_STARTER = '(?:' ANY_CHARS = '.*' LINE_START = '^' def __init__(self, regex_patterns=None): """regex coarser""" self.trie = {} self.patterns = [] self.add_regex_patterns(regex_patterns) def get_combined_regex_pattern(self): """gets the pattern for the combined regex""" trie = deepcopy(self.trie) RegexCoarser._compress(trie) return RegexCoarser._trie_to_pattern(trie) def get_coarse_regex_patterns(self, max_length=100): """gets the patterns for the coarse regex""" trie = deepcopy(self.trie) RegexCoarser._compress(trie) patterns = RegexCoarser._trie_to_pattern(trie, True) root_patterns = [] for pattern in patterns: left, _, choice = pattern.partition(RegexCoarser.CHOICE_STARTER) if choice and left and left.strip() != RegexCoarser.LINE_START and not left.isspace(): pattern = left + RegexCoarser.ANY_CHARS root_patterns.append(pattern) root_patterns = RegexCoarser._join_patterns(root_patterns, max_length) if not root_patterns or root_patterns == ['']: return [] return root_patterns def add_regex_patterns(self, new_regex_patterns): """adds the new_regex patterns to RegexPatternCoarser""" if not new_regex_patterns or not isinstance(new_regex_patterns, list): return for new_regex_pattern in new_regex_patterns: self.add_regex_pattern(new_regex_pattern) def add_regex_pattern(self, new_regex_pattern): """adds the new_regex to RegexPatternCoarser""" new_regex_pattern = RegexCoarser._regex_pattern_to_string(new_regex_pattern) if not new_regex_pattern: return self.patterns.append(new_regex_pattern) tokens = RegexCoarser._tokenize(new_regex_pattern) last_token_idx = len(tokens) - 1 trie_node = self.trie for idx, token in enumerate(tokens): if token not in trie_node: trie_node[token] = {} if idx == last_token_idx: trie_node[token][RegexCoarser.ENDER] = {} trie_node = trie_node[token] @staticmethod def _regex_pattern_to_string(regex_pattern): """convert regex pattern to string""" if not regex_pattern: return '' if isinstance(regex_pattern, str): return regex_pattern if isinstance(regex_pattern, Pattern): return regex_pattern.pattern return None @staticmethod def _tokenize(regex_pattern): """tokenize the regex pattern for trie assignment""" tokens = [] token = '' group_ender = None use_next = False for char in regex_pattern: if use_next: use_next = False token += char char = None if char == '\\': use_next = True token += char continue if not group_ender and char in RegexCoarser.GROUPERS: group_ender = RegexCoarser.GROUPERS[char] token = char char = None if char is None: pass elif char == group_ender: token += char group_ender = None if char == '}': # this group is a modifier tokens[len(tokens) - 1] += token token = '' continue elif char in RegexCoarser.MODIFIERS: if group_ender: token += char else: tokens[len(tokens) - 1] += char continue else: token += char if not group_ender: tokens.append(token) token = '' if token: tokens.append(token) return tokens @staticmethod def _compress(trie): """compress trie into shortest leaves""" for key, subtrie in trie.copy().items(): RegexCoarser._compress(subtrie) subkeys = list(subtrie.keys()) if len(subkeys) == 1: trie[key + subkeys[0]] = subtrie[subkeys[0]] del trie[key] @staticmethod def _trie_to_pattern(trie, top_keep=False): """convert trie to the regex pattern""" patterns = [ key.replace(RegexCoarser.ENDER, '') + RegexCoarser._trie_to_pattern(subtrie) for key, subtrie in trie.items() ] if top_keep: return patterns return RegexCoarser._join_patterns(patterns)[0] @staticmethod def _join_patterns(patterns, max_length=0): """convert list of patterns to the segmented list of dense regex patterns""" if not patterns: return [''] if len(patterns) == 1: return patterns if not max_length: return [RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns) + ')'] long_patterns = [] join_patterns = [] for pattern in patterns: len_pattern = len(pattern) if not len_pattern: continue if len_pattern >= max_length: long_patterns.append(pattern) continue for idx, patterns_to_join in enumerate(join_patterns): patterns_to_join, len_patterns_to_join = patterns_to_join if len_pattern + len_patterns_to_join < max_length: patterns_to_join.append(pattern) len_patterns_to_join += len_pattern join_patterns[idx] = (patterns_to_join, len_patterns_to_join) len_pattern = 0 break if len_pattern: join_patterns.append(([pattern], len_pattern)) join_patterns.sort(key=lambda x: x[1]) if join_patterns: # pattern, _, choice = pattern.endswith(RegexCoarser.ANY_CHARS) join_patterns = [ RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns_to_join) + ')' for patterns_to_join, _ in join_patterns ] return join_patterns + long_patterns