-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_data.py
61 lines (49 loc) · 1.69 KB
/
extract_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import psycopg2
import os
import pandas as pd
from pdb import set_trace as b
import json
import base64
import threading
fields_query = "SELECT table_name, column_name, data_type FROM information_schema.columns WHERE table_name = 'zzz_export_ud_w_passport';"
conn = psycopg2.connect("dbname=police user=postgres")
cur = conn.cursor()
cur.execute(fields_query)
tl = cur.fetchall()
fields = [f[1] for f in tl]
select_query = "SELECT * from zzz_export_ud_w_passport;"
#select_query = "SELECT * from zzz_export_ud_w_passport where identif='3010154M086PB0'"
cur.execute(select_query)
tuples = cur.fetchall()
df = pd.DataFrame(tuples, columns=fields)
base_dir = "/mnt/f/projects/pokemongo_aws/celeb_db"
not_decodable = 0
lock = threading.Lock()
def process_frame(row):
#get id
id = row.identif
#make directory
record_dir = os.path.join(base_dir,id)
os.makedirs(record_dir,exist_ok=True)
#decode base64 and create jpg
try:
bin_data = base64.b64decode(row.image)
except:
row['image']='Could not be decoded'
lock.acquire()
not_decodable += 1
lock.release()
pass
else:
row['image']='Present'
pass
with open(os.path.join(record_dir,"original.jpg"),'wb') as f:
f.write(bin_data)
#aggregate all non-null fields and create json
json_object = json.dumps([row.dropna().to_dict()],indent=4,sort_keys=True, default=str,ensure_ascii=False)
with open(os.path.join(record_dir,"personal_data.json"), "w") as j:
j.write(json_object)
print("Done:"+id)
pass
df.apply(process_frame,axis=1)
print(f"Unable to decode:{not_decodable} images")