Benjamin Renard commited on 2015-05-26 15:53:00
Showing 1 changed files, with 174 additions and 0 deletions.
| ... | ... |
@@ -0,0 +1,174 @@ |
| 1 |
+#!/usr/bin/python |
|
| 2 |
+# -*- coding: utf-8 -*- |
|
| 3 |
+# |
|
| 4 |
+# My hash mapping library |
|
| 5 |
+# |
|
| 6 |
+# Mapping configuration |
|
| 7 |
+# {
|
|
| 8 |
+# '[dst key 1]': { # Key name in the result
|
|
| 9 |
+# |
|
| 10 |
+# 'order': [int], # Processing order between destinations keys |
|
| 11 |
+# |
|
| 12 |
+# # Source values |
|
| 13 |
+# 'other_key': [key], # Other key of the destination to use as source of values |
|
| 14 |
+# 'key' : '[src key]', # Key of source hash to get source values |
|
| 15 |
+# 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values |
|
| 16 |
+# |
|
| 17 |
+# # Clean / convert values |
|
| 18 |
+# 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+] |
|
| 19 |
+# 'convert': [function], # Function to use to convert value : Original value will be passed |
|
| 20 |
+# # as argument and the value retrieve will replace source value in |
|
| 21 |
+# # the result |
|
| 22 |
+# # Ex : |
|
| 23 |
+# # lambda x: x.strip() |
|
| 24 |
+# # lambda x: "myformat : %s" % x |
|
| 25 |
+# # Deduplicate / check values |
|
| 26 |
+# 'deduplicate': [bool], # If True, sources values will be depluplicated |
|
| 27 |
+# 'check': [function], # Function to use to check source value : Source value will be passed |
|
| 28 |
+# # as argument and if function return True, the value will be preserved |
|
| 29 |
+# # Ex : |
|
| 30 |
+# # lambda x: x in my_global_hash |
|
| 31 |
+# # Join values |
|
| 32 |
+# 'join': '[glue]', # If present, sources values will be join using the "glue" |
|
| 33 |
+# |
|
| 34 |
+# # Alternative mapping |
|
| 35 |
+# 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s)
|
|
| 36 |
+# # with this other mapping configuration |
|
| 37 |
+# }, |
|
| 38 |
+# '[dst key 2]': {
|
|
| 39 |
+# [...] |
|
| 40 |
+# } |
|
| 41 |
+# } |
|
| 42 |
+# |
|
| 43 |
+# Return format : |
|
| 44 |
+# {
|
|
| 45 |
+# '[dst key 1]': ['v1','v2', ...], |
|
| 46 |
+# '[dst key 2]': [ ... ], |
|
| 47 |
+# [...] |
|
| 48 |
+# } |
|
| 49 |
+ |
|
| 50 |
+import logging, re |
|
| 51 |
+ |
|
| 52 |
+def clean_value(value): |
|
| 53 |
+ if isinstance(value, int): |
|
| 54 |
+ value=str(value) |
|
| 55 |
+ return value.encode('utf8')
|
|
| 56 |
+ |
|
| 57 |
+def map(map_keys,src,dst={}):
|
|
| 58 |
+ |
|
| 59 |
+ def get_values(dst_key,src,m): |
|
| 60 |
+ # Extract sources values |
|
| 61 |
+ values=[] |
|
| 62 |
+ if 'other_key' in m: |
|
| 63 |
+ if m['other_key'] in dst: |
|
| 64 |
+ values=dst[m['other_key']] |
|
| 65 |
+ if 'key' in m: |
|
| 66 |
+ if m['key'] in src and src[m['key']]!='': |
|
| 67 |
+ values.append(clean_value(src[m['key']])) |
|
| 68 |
+ |
|
| 69 |
+ if 'keys' in m: |
|
| 70 |
+ for key in m['keys']: |
|
| 71 |
+ if key in src and src[key]!='': |
|
| 72 |
+ values.append(clean_value(src[key])) |
|
| 73 |
+ |
|
| 74 |
+ # Clean and convert values |
|
| 75 |
+ if 'cleanRegex' in m and len(values)>0: |
|
| 76 |
+ new_values=[] |
|
| 77 |
+ for v in values: |
|
| 78 |
+ nv=re.sub(m['cleanRegex'],'',v) |
|
| 79 |
+ if nv!='': |
|
| 80 |
+ new_values.append(nv) |
|
| 81 |
+ values=new_values |
|
| 82 |
+ |
|
| 83 |
+ if 'convert' in m and len(values)>0: |
|
| 84 |
+ new_values=[] |
|
| 85 |
+ for v in values: |
|
| 86 |
+ nv=m['convert'](v) |
|
| 87 |
+ if nv!='': |
|
| 88 |
+ new_values.append(nv) |
|
| 89 |
+ values=new_values |
|
| 90 |
+ |
|
| 91 |
+ # Deduplicate values |
|
| 92 |
+ if m.get('deduplicate') and len(values)>1:
|
|
| 93 |
+ new_values=[] |
|
| 94 |
+ for v in values: |
|
| 95 |
+ if v not in new_values: |
|
| 96 |
+ new_values.append(v) |
|
| 97 |
+ values=new_values |
|
| 98 |
+ |
|
| 99 |
+ # Check values |
|
| 100 |
+ if 'check' in m and len(values)>0: |
|
| 101 |
+ new_values=[] |
|
| 102 |
+ for v in values: |
|
| 103 |
+ if m['check'](v): |
|
| 104 |
+ new_values.append(v) |
|
| 105 |
+ else: |
|
| 106 |
+ logging.debug('Invalid value %s for key %s' % (v,dst_key))
|
|
| 107 |
+ if dst_key not in invalid_values: |
|
| 108 |
+ invalid_values[dst_key]=[] |
|
| 109 |
+ if v not in invalid_values[dst_key]: |
|
| 110 |
+ invalid_values[dst_key].append(v) |
|
| 111 |
+ values=new_values |
|
| 112 |
+ |
|
| 113 |
+ # Join values |
|
| 114 |
+ if 'join' in m and len(values)>1: |
|
| 115 |
+ values=[m['join'].join(values)] |
|
| 116 |
+ |
|
| 117 |
+ # Manage alternative mapping case |
|
| 118 |
+ if len(values)==0 and 'or' in m: |
|
| 119 |
+ values=get_values(dst_key,src,m['or']) |
|
| 120 |
+ |
|
| 121 |
+ |
|
| 122 |
+ return values |
|
| 123 |
+ |
|
| 124 |
+ for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']): |
|
| 125 |
+ values=get_values(dst_key,src,map_keys[dst_key]) |
|
| 126 |
+ |
|
| 127 |
+ if len(values)==0: |
|
| 128 |
+ if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']: |
|
| 129 |
+ logging.debug('Destination key %s could not be filled from source but is required' % dst_key)
|
|
| 130 |
+ return False |
|
| 131 |
+ continue |
|
| 132 |
+ |
|
| 133 |
+ dst[dst_key]=values |
|
| 134 |
+ return dst |
|
| 135 |
+ |
|
| 136 |
+ |
|
| 137 |
+if __name__ == '__main__': |
|
| 138 |
+ logging.basicConfig(level=logging.DEBUG) |
|
| 139 |
+ |
|
| 140 |
+ src={
|
|
| 141 |
+ 'uid': 'hmartin', |
|
| 142 |
+ 'firstname': 'Martin', |
|
| 143 |
+ 'lastname': 'Martin', |
|
| 144 |
+ 'disp_name': 'Henri Martin', |
|
| 145 |
+ 'line_1': '3 rue de Paris', |
|
| 146 |
+ 'line_2': 'Pour Pierre', |
|
| 147 |
+ 'zip_text': '92 120', |
|
| 148 |
+ 'city_text': 'Montrouge', |
|
| 149 |
+ 'line_city': '92120 Montrouge', |
|
| 150 |
+ 'tel1': '01 00 00 00 00', |
|
| 151 |
+ 'tel2': '09 00 00 00 00', |
|
| 152 |
+ 'mobile': '06 00 00 00 00', |
|
| 153 |
+ 'fax': '01 00 00 00 00', |
|
| 154 |
+ 'email': 'H.MARTIN@GMAIL.COM', |
|
| 155 |
+ } |
|
| 156 |
+ |
|
| 157 |
+ map_c={
|
|
| 158 |
+ 'uid': {'order': 0, 'key': 'uid','required': True},
|
|
| 159 |
+ 'givenName': {'order': 1, 'key': 'firstname'},
|
|
| 160 |
+ 'sn': {'order': 2, 'key': 'lastname'},
|
|
| 161 |
+ 'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}},
|
|
| 162 |
+ 'displayName': {'order': 4, 'other_key': 'displayName'},
|
|
| 163 |
+ 'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']},
|
|
| 164 |
+ 'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'},
|
|
| 165 |
+ 'l': {'order': 7, 'key': 'city_text'},
|
|
| 166 |
+ 'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']},
|
|
| 167 |
+ 'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True},
|
|
| 168 |
+ 'mobile': {'order': 10,'key': 'mobile'},
|
|
| 169 |
+ 'facsimileTelephoneNumber': {'order': 11,'key': 'fax'},
|
|
| 170 |
+ 'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()}
|
|
| 171 |
+ } |
|
| 172 |
+ |
|
| 173 |
+ logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c))
|
|
| 174 |
+ logging.debug('[TEST] Result : %s' % map(map_c,src))
|
|
| 0 | 175 |