View file File name : dictdiffer.py Content :""" Calculate the difference between two dictionaries as: (1) items added (2) items removed (3) keys same in both but changed values (4) keys same in both and unchanged values Originally posted at http://stackoverflow.com/questions/1165352/fast-comparison-between-two-python-dictionary/1165552#1165552 Available at repository: https://github.com/hughdbrown/dictdiffer Added the ability to recursively compare dictionaries """ import copy from collections.abc import Mapping def diff(current_dict, past_dict): return DictDiffer(current_dict, past_dict) class DictDiffer: """ Calculate the difference between two dictionaries as: (1) items added (2) items removed (3) keys same in both but changed values (4) keys same in both and unchanged values """ def __init__(self, current_dict, past_dict): self.current_dict, self.past_dict = current_dict, past_dict self.set_current, self.set_past = set(list(current_dict)), set(list(past_dict)) self.intersect = self.set_current.intersection(self.set_past) def added(self): return self.set_current - self.intersect def removed(self): return self.set_past - self.intersect def changed(self): return {o for o in self.intersect if self.past_dict[o] != self.current_dict[o]} def unchanged(self): return {o for o in self.intersect if self.past_dict[o] == self.current_dict[o]} def deep_diff(old, new, ignore=None): ignore = ignore or [] res = {} old = copy.deepcopy(old) new = copy.deepcopy(new) stack = [(old, new, False)] while stack: tmps = [] tmp_old, tmp_new, reentrant = stack.pop() for key in set(list(tmp_old) + list(tmp_new)): if key in tmp_old and key in tmp_new and tmp_old[key] == tmp_new[key]: del tmp_old[key] del tmp_new[key] continue if not reentrant: if key in tmp_old and key in ignore: del tmp_old[key] if key in tmp_new and key in ignore: del tmp_new[key] if isinstance(tmp_old.get(key), Mapping) and isinstance( tmp_new.get(key), Mapping ): tmps.append((tmp_old[key], tmp_new[key], False)) if tmps: stack.extend([(tmp_old, tmp_new, True)] + tmps) if old: res["old"] = old if new: res["new"] = new return res def recursive_diff(past_dict, current_dict, ignore_missing_keys=True): """ Returns a RecursiveDictDiffer object that computes the recursive diffs between two dictionaries past_dict Past dictionary current_dict Current dictionary ignore_missing_keys Flag specifying whether to ignore keys that no longer exist in the current_dict, but exist in the past_dict. If true, the diff will not contain the missing keys. Default is True. """ return RecursiveDictDiffer(past_dict, current_dict, ignore_missing_keys) class RecursiveDictDiffer(DictDiffer): """ Calculates a recursive diff between the current_dict and the past_dict creating a diff in the format {'new': new_value, 'old': old_value} It recursively searches differences in common keys whose values are dictionaries creating a diff dict in the format {'common_key' : {'new': new_value, 'old': old_value} The class overrides all DictDiffer methods, returning lists of keys and subkeys using the . notation (i.e 'common_key1.common_key2.changed_key') The class provides access to: (1) the added, removed, changes keys and subkeys (using the . notation) ``added``, ``removed``, ``changed`` methods (2) the diffs in the format above (diff property) ``diffs`` property (3) a dict with the new changed values only (new_values property) ``new_values`` property (4) a dict with the old changed values only (old_values property) ``old_values`` property (5) a string representation of the changes in the format: ``changes_str`` property Note: The <_null_> value is a reserved value .. code-block:: text common_key1: common_key2: changed_key1 from '<old_str>' to '<new_str>' changed_key2 from '[<old_elem1>, ..]' to '[<new_elem1>, ..]' common_key3: changed_key3 from <old_int> to <new_int> """ NONE_VALUE = "<_null_>" def __init__(self, past_dict, current_dict, ignore_missing_keys): """ past_dict Past dictionary. current_dict Current dictionary. ignore_missing_keys Flag specifying whether to ignore keys that no longer exist in the current_dict, but exist in the past_dict. If true, the diff will not contain the missing keys. """ super().__init__(current_dict, past_dict) self._diffs = self._get_diffs( self.current_dict, self.past_dict, ignore_missing_keys ) # Ignores unet values when assessing the changes self.ignore_unset_values = True @classmethod def _get_diffs(cls, dict1, dict2, ignore_missing_keys): """ Returns a dict with the differences between dict1 and dict2 Notes: Keys that only exist in dict2 are not included in the diff if ignore_missing_keys is True, otherwise they are Simple compares are done on lists """ ret_dict = {} for p in dict1.keys(): if p not in dict2: ret_dict.update({p: {"new": dict1[p], "old": cls.NONE_VALUE}}) elif dict1[p] != dict2[p]: if isinstance(dict1[p], dict) and isinstance(dict2[p], dict): sub_diff_dict = cls._get_diffs( dict1[p], dict2[p], ignore_missing_keys ) if sub_diff_dict: ret_dict.update({p: sub_diff_dict}) else: ret_dict.update({p: {"new": dict1[p], "old": dict2[p]}}) if not ignore_missing_keys: for p in dict2.keys(): if p not in dict1.keys(): ret_dict.update({p: {"new": cls.NONE_VALUE, "old": dict2[p]}}) return ret_dict @classmethod def _get_values(cls, diff_dict, type="new"): """ Returns a dictionaries with the 'new' values in a diff dict. type Which values to return, 'new' or 'old' """ ret_dict = {} for p in diff_dict.keys(): if type in diff_dict[p].keys(): ret_dict.update({p: diff_dict[p][type]}) else: ret_dict.update({p: cls._get_values(diff_dict[p], type=type)}) return ret_dict @classmethod def _get_changes(cls, diff_dict): """ Returns a list of string message with the differences in a diff dict. Each inner difference is tabulated two space deeper """ changes_strings = [] for p in sorted(diff_dict.keys()): if sorted(diff_dict[p].keys()) == ["new", "old"]: # Some string formatting old_value = diff_dict[p]["old"] if diff_dict[p]["old"] == cls.NONE_VALUE: old_value = "nothing" elif isinstance(diff_dict[p]["old"], str): old_value = "'{}'".format(diff_dict[p]["old"]) elif isinstance(diff_dict[p]["old"], list): old_value = "'{}'".format(", ".join(diff_dict[p]["old"])) new_value = diff_dict[p]["new"] if diff_dict[p]["new"] == cls.NONE_VALUE: new_value = "nothing" elif isinstance(diff_dict[p]["new"], str): new_value = "'{}'".format(diff_dict[p]["new"]) elif isinstance(diff_dict[p]["new"], list): new_value = "'{}'".format(", ".join(diff_dict[p]["new"])) changes_strings.append( "{} from {} to {}".format(p, old_value, new_value) ) else: sub_changes = cls._get_changes(diff_dict[p]) if sub_changes: changes_strings.append("{}:".format(p)) changes_strings.extend([" {}".format(c) for c in sub_changes]) return changes_strings def added(self): """ Returns all keys that have been added. If the keys are in child dictionaries they will be represented with . notation """ def _added(diffs, prefix): keys = [] for key in diffs.keys(): if isinstance(diffs[key], dict) and "old" not in diffs[key]: keys.extend(_added(diffs[key], prefix="{}{}.".format(prefix, key))) elif diffs[key]["old"] == self.NONE_VALUE: if isinstance(diffs[key]["new"], dict): keys.extend( _added( diffs[key]["new"], prefix="{}{}.".format(prefix, key) ) ) else: keys.append("{}{}".format(prefix, key)) return keys return sorted(_added(self._diffs, prefix="")) def removed(self): """ Returns all keys that have been removed. If the keys are in child dictionaries they will be represented with . notation """ def _removed(diffs, prefix): keys = [] for key in diffs.keys(): if isinstance(diffs[key], dict) and "old" not in diffs[key]: keys.extend( _removed(diffs[key], prefix="{}{}.".format(prefix, key)) ) elif diffs[key]["new"] == self.NONE_VALUE: keys.append("{}{}".format(prefix, key)) elif isinstance(diffs[key]["new"], dict): keys.extend( _removed(diffs[key]["new"], prefix="{}{}.".format(prefix, key)) ) return keys return sorted(_removed(self._diffs, prefix="")) def changed(self): """ Returns all keys that have been changed. If the keys are in child dictionaries they will be represented with . notation """ def _changed(diffs, prefix): keys = [] for key in diffs.keys(): if not isinstance(diffs[key], dict): continue if isinstance(diffs[key], dict) and "old" not in diffs[key]: keys.extend( _changed(diffs[key], prefix="{}{}.".format(prefix, key)) ) continue if self.ignore_unset_values: if ( "old" in diffs[key] and "new" in diffs[key] and diffs[key]["old"] != self.NONE_VALUE and diffs[key]["new"] != self.NONE_VALUE ): if isinstance(diffs[key]["new"], dict): keys.extend( _changed( diffs[key]["new"], prefix="{}{}.".format(prefix, key), ) ) else: keys.append("{}{}".format(prefix, key)) elif isinstance(diffs[key], dict): keys.extend( _changed(diffs[key], prefix="{}{}.".format(prefix, key)) ) else: if "old" in diffs[key] and "new" in diffs[key]: if isinstance(diffs[key]["new"], dict): keys.extend( _changed( diffs[key]["new"], prefix="{}{}.".format(prefix, key), ) ) else: keys.append("{}{}".format(prefix, key)) elif isinstance(diffs[key], dict): keys.extend( _changed(diffs[key], prefix="{}{}.".format(prefix, key)) ) return keys return sorted(_changed(self._diffs, prefix="")) def unchanged(self): """ Returns all keys that have been unchanged. If the keys are in child dictionaries they will be represented with . notation """ def _unchanged(current_dict, diffs, prefix): keys = [] for key in current_dict.keys(): if key not in diffs: keys.append("{}{}".format(prefix, key)) elif isinstance(current_dict[key], dict): if "new" in diffs[key]: # There is a diff continue else: keys.extend( _unchanged( current_dict[key], diffs[key], prefix="{}{}.".format(prefix, key), ) ) return keys return sorted(_unchanged(self.current_dict, self._diffs, prefix="")) @property def diffs(self): """Returns a dict with the recursive diffs current_dict - past_dict""" return self._diffs @property def new_values(self): """Returns a dictionary with the new values""" return self._get_values(self._diffs, type="new") @property def old_values(self): """Returns a dictionary with the old values""" return self._get_values(self._diffs, type="old") @property def changes_str(self): """Returns a string describing the changes""" return "\n".join(self._get_changes(self._diffs))