Benjamin Renard commited on 2015-05-26 15:53:00
Showing 1 changed files, with 174 additions and 0 deletions.
... | ... |
@@ -0,0 +1,174 @@ |
1 |
+#!/usr/bin/python |
|
2 |
+# -*- coding: utf-8 -*- |
|
3 |
+# |
|
4 |
+# My hash mapping library |
|
5 |
+# |
|
6 |
+# Mapping configuration |
|
7 |
+# { |
|
8 |
+# '[dst key 1]': { # Key name in the result |
|
9 |
+# |
|
10 |
+# 'order': [int], # Processing order between destinations keys |
|
11 |
+# |
|
12 |
+# # Source values |
|
13 |
+# 'other_key': [key], # Other key of the destination to use as source of values |
|
14 |
+# 'key' : '[src key]', # Key of source hash to get source values |
|
15 |
+# 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values |
|
16 |
+# |
|
17 |
+# # Clean / convert values |
|
18 |
+# 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+] |
|
19 |
+# 'convert': [function], # Function to use to convert value : Original value will be passed |
|
20 |
+# # as argument and the value retrieve will replace source value in |
|
21 |
+# # the result |
|
22 |
+# # Ex : |
|
23 |
+# # lambda x: x.strip() |
|
24 |
+# # lambda x: "myformat : %s" % x |
|
25 |
+# # Deduplicate / check values |
|
26 |
+# 'deduplicate': [bool], # If True, sources values will be depluplicated |
|
27 |
+# 'check': [function], # Function to use to check source value : Source value will be passed |
|
28 |
+# # as argument and if function return True, the value will be preserved |
|
29 |
+# # Ex : |
|
30 |
+# # lambda x: x in my_global_hash |
|
31 |
+# # Join values |
|
32 |
+# 'join': '[glue]', # If present, sources values will be join using the "glue" |
|
33 |
+# |
|
34 |
+# # Alternative mapping |
|
35 |
+# 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s) |
|
36 |
+# # with this other mapping configuration |
|
37 |
+# }, |
|
38 |
+# '[dst key 2]': { |
|
39 |
+# [...] |
|
40 |
+# } |
|
41 |
+# } |
|
42 |
+# |
|
43 |
+# Return format : |
|
44 |
+# { |
|
45 |
+# '[dst key 1]': ['v1','v2', ...], |
|
46 |
+# '[dst key 2]': [ ... ], |
|
47 |
+# [...] |
|
48 |
+# } |
|
49 |
+ |
|
50 |
+import logging, re |
|
51 |
+ |
|
52 |
+def clean_value(value): |
|
53 |
+ if isinstance(value, int): |
|
54 |
+ value=str(value) |
|
55 |
+ return value.encode('utf8') |
|
56 |
+ |
|
57 |
+def map(map_keys,src,dst={}): |
|
58 |
+ |
|
59 |
+ def get_values(dst_key,src,m): |
|
60 |
+ # Extract sources values |
|
61 |
+ values=[] |
|
62 |
+ if 'other_key' in m: |
|
63 |
+ if m['other_key'] in dst: |
|
64 |
+ values=dst[m['other_key']] |
|
65 |
+ if 'key' in m: |
|
66 |
+ if m['key'] in src and src[m['key']]!='': |
|
67 |
+ values.append(clean_value(src[m['key']])) |
|
68 |
+ |
|
69 |
+ if 'keys' in m: |
|
70 |
+ for key in m['keys']: |
|
71 |
+ if key in src and src[key]!='': |
|
72 |
+ values.append(clean_value(src[key])) |
|
73 |
+ |
|
74 |
+ # Clean and convert values |
|
75 |
+ if 'cleanRegex' in m and len(values)>0: |
|
76 |
+ new_values=[] |
|
77 |
+ for v in values: |
|
78 |
+ nv=re.sub(m['cleanRegex'],'',v) |
|
79 |
+ if nv!='': |
|
80 |
+ new_values.append(nv) |
|
81 |
+ values=new_values |
|
82 |
+ |
|
83 |
+ if 'convert' in m and len(values)>0: |
|
84 |
+ new_values=[] |
|
85 |
+ for v in values: |
|
86 |
+ nv=m['convert'](v) |
|
87 |
+ if nv!='': |
|
88 |
+ new_values.append(nv) |
|
89 |
+ values=new_values |
|
90 |
+ |
|
91 |
+ # Deduplicate values |
|
92 |
+ if m.get('deduplicate') and len(values)>1: |
|
93 |
+ new_values=[] |
|
94 |
+ for v in values: |
|
95 |
+ if v not in new_values: |
|
96 |
+ new_values.append(v) |
|
97 |
+ values=new_values |
|
98 |
+ |
|
99 |
+ # Check values |
|
100 |
+ if 'check' in m and len(values)>0: |
|
101 |
+ new_values=[] |
|
102 |
+ for v in values: |
|
103 |
+ if m['check'](v): |
|
104 |
+ new_values.append(v) |
|
105 |
+ else: |
|
106 |
+ logging.debug('Invalid value %s for key %s' % (v,dst_key)) |
|
107 |
+ if dst_key not in invalid_values: |
|
108 |
+ invalid_values[dst_key]=[] |
|
109 |
+ if v not in invalid_values[dst_key]: |
|
110 |
+ invalid_values[dst_key].append(v) |
|
111 |
+ values=new_values |
|
112 |
+ |
|
113 |
+ # Join values |
|
114 |
+ if 'join' in m and len(values)>1: |
|
115 |
+ values=[m['join'].join(values)] |
|
116 |
+ |
|
117 |
+ # Manage alternative mapping case |
|
118 |
+ if len(values)==0 and 'or' in m: |
|
119 |
+ values=get_values(dst_key,src,m['or']) |
|
120 |
+ |
|
121 |
+ |
|
122 |
+ return values |
|
123 |
+ |
|
124 |
+ for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']): |
|
125 |
+ values=get_values(dst_key,src,map_keys[dst_key]) |
|
126 |
+ |
|
127 |
+ if len(values)==0: |
|
128 |
+ if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']: |
|
129 |
+ logging.debug('Destination key %s could not be filled from source but is required' % dst_key) |
|
130 |
+ return False |
|
131 |
+ continue |
|
132 |
+ |
|
133 |
+ dst[dst_key]=values |
|
134 |
+ return dst |
|
135 |
+ |
|
136 |
+ |
|
137 |
+if __name__ == '__main__': |
|
138 |
+ logging.basicConfig(level=logging.DEBUG) |
|
139 |
+ |
|
140 |
+ src={ |
|
141 |
+ 'uid': 'hmartin', |
|
142 |
+ 'firstname': 'Martin', |
|
143 |
+ 'lastname': 'Martin', |
|
144 |
+ 'disp_name': 'Henri Martin', |
|
145 |
+ 'line_1': '3 rue de Paris', |
|
146 |
+ 'line_2': 'Pour Pierre', |
|
147 |
+ 'zip_text': '92 120', |
|
148 |
+ 'city_text': 'Montrouge', |
|
149 |
+ 'line_city': '92120 Montrouge', |
|
150 |
+ 'tel1': '01 00 00 00 00', |
|
151 |
+ 'tel2': '09 00 00 00 00', |
|
152 |
+ 'mobile': '06 00 00 00 00', |
|
153 |
+ 'fax': '01 00 00 00 00', |
|
154 |
+ 'email': 'H.MARTIN@GMAIL.COM', |
|
155 |
+ } |
|
156 |
+ |
|
157 |
+ map_c={ |
|
158 |
+ 'uid': {'order': 0, 'key': 'uid','required': True}, |
|
159 |
+ 'givenName': {'order': 1, 'key': 'firstname'}, |
|
160 |
+ 'sn': {'order': 2, 'key': 'lastname'}, |
|
161 |
+ 'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}}, |
|
162 |
+ 'displayName': {'order': 4, 'other_key': 'displayName'}, |
|
163 |
+ 'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']}, |
|
164 |
+ 'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'}, |
|
165 |
+ 'l': {'order': 7, 'key': 'city_text'}, |
|
166 |
+ 'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']}, |
|
167 |
+ 'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True}, |
|
168 |
+ 'mobile': {'order': 10,'key': 'mobile'}, |
|
169 |
+ 'facsimileTelephoneNumber': {'order': 11,'key': 'fax'}, |
|
170 |
+ 'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()} |
|
171 |
+ } |
|
172 |
+ |
|
173 |
+ logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c)) |
|
174 |
+ logging.debug('[TEST] Result : %s' % map(map_c,src)) |
|
0 | 175 |