-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcaptcha_generate.py
79 lines (62 loc) · 3.03 KB
/
captcha_generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
from pathlib import Path
# from captcha.image import ImageCaptcha
from captcha_generator.image import ImageCaptcha
import string
import numpy as np
import threading
import logging
font_case_sensitive_dir = Path('data/fonts/case_sensitive')
font_non_case_sensitive_dir = Path('data/fonts/non_case_sensitive')
font_case_sensitive_paths = [os.path.join(font_case_sensitive_dir, fn) for fn in os.listdir(font_case_sensitive_dir)]
font_non_case_sensitive_paths = [os.path.join(font_non_case_sensitive_dir, fn) for fn in
os.listdir(font_non_case_sensitive_dir)]
all_case_sensitive_chars = np.array(list(string.ascii_lowercase + string.digits))
all_non_case_sensitive_chars = np.array(list(string.ascii_uppercase + string.digits))
only_digits_chars = np.array(list(string.digits))
def gen_captcha(fonts: tuple, chars: tuple, n_captcha, output_dir, min_n_chars=5, max_n_chars=15):
n = max_n_chars - min_n_chars + 1
n_captcha_per_n = (n_captcha - 1) // (n * len([f for f in fonts if len(f) != 0])) + 1
for f, c in zip(fonts, chars):
if len(f) == 0: continue
n_chars = c.shape[0]
for n_char in range(min_n_chars, max_n_chars + 1):
# img_captcha_gen = ImageCaptcha(width=n_char * 50, height=50, fonts=f, font_sizes=[50])
img_captcha_gen = ImageCaptcha(height=40, pad_height=16, pad_width=20, fonts=f)
i = 0
while i < n_captcha_per_n:
text = ''.join(
c[np.random.randint(0, n_chars, size=n_char)]).strip()
img_captcha_gen.write(text, output=os.path.join(output_dir, f'{text}.png'))
i += 1
def test_font(font: str):
text = string.ascii_letters + string.digits
font_name = font.split('/')[-1]
img_captcha_gen = ImageCaptcha(width=len(text) * 15, fonts=[font])
img_captcha_gen.write(text, f'data/test_images/{font_name}.png')
def gen_data(output_dir, n_captcha):
logging.basicConfig(format="%(asctime)s: %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
n_threads = 25
n_captcha_per_thread = (n_captcha - 1) // n_threads + 1
logging.info(f'Creating {n_threads} threads')
threads = [threading.Thread(target=gen_captcha, kwargs={
'fonts': (font_case_sensitive_paths, font_non_case_sensitive_paths, font_case_sensitive_paths),
'chars': (all_case_sensitive_chars, all_non_case_sensitive_chars, only_digits_chars),
'n_captcha': n_captcha_per_thread,
'min_n_chars': 5,
'max_n_chars': 8,
'output_dir': output_dir,
}) for _ in range(n_threads)]
for i, thread in enumerate(threads):
logging.info(f'Starting thread {i}')
thread.start()
for i, thread in enumerate(threads):
logging.info(f'Joining thread {i}')
thread.join()
def test_data():
for font in font_case_sensitive_paths:
test_font(font)
if __name__ == '__main__':
# test_data()
gen_data(output_dir=Path('data/images'), n_captcha=5000)
gen_data(output_dir=Path('data/test_images'), n_captcha=1000)