-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
252 lines (212 loc) · 6.12 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# -*- coding: utf-8 -*-
# import modules
import time
# import argparse
from argparse import (
ArgumentParser, RawTextHelpFormatter, SUPPRESS
)
# import utils
from utils import get_config_attrs, verify_date_argument, \
create_output_data_path
# SerpAPI collector
from serpapi_client import SerpAPICollector
# video downloader
from connections import VideoDownloader
if __name__ == '__main__':
'''
Arguments
'''
formatter = lambda prog: RawTextHelpFormatter(
prog,
indent_increment=2,
max_help_position=52,
width=None
)
parser = ArgumentParser(
prog='TikSpyder',
description='Command Line Arguments.',
formatter_class=formatter,
add_help=False
)
# help arguments
help_arguments = parser.add_argument_group('Help options')
help_arguments.add_argument(
'-h',
'--help',
action='help',
default=SUPPRESS,
help='Show this help message and exit.'
)
# SerpAPI arguments
serpapi_arguments = parser.add_argument_group('SerpAPI options')
''' query '''
serpapi_arguments.add_argument(
'--q',
type=str,
required=True,
metavar='',
help='The search term of phrase for which to retrieve TikTok data.'
)
''' user '''
serpapi_arguments.add_argument(
'--user',
type=str,
required=False,
metavar='',
help='Specify a TikTok user to search for videos from.'
)
''' google domain '''
serpapi_arguments.add_argument(
'--google-domain',
type=str,
required=False,
default='google.com',
metavar='',
help='Defines the Google domain to use. It defaults to google.com.'
)
''' gl > country '''
serpapi_arguments.add_argument(
'--gl',
type=str,
required=False,
metavar='',
help=(
"Defines the country to use for the search. Two-letter country "
"code."
)
)
''' hl > language '''
serpapi_arguments.add_argument(
'--hl',
type=str,
required=False,
metavar='',
help=(
"Defines the language to use for the search. Two-letter language "
"code."
)
)
''' cr > multiple countries '''
serpapi_arguments.add_argument(
'--cr',
type=str,
required=False,
metavar='',
help='Defines one or multiple countries to limit the search to.'
)
''' lr > one or multiple languages '''
serpapi_arguments.add_argument(
'--lr',
type=str,
required=False,
metavar='',
help='Defines one or multiple languages to limit the search to.'
)
''' depth > defines number of iterations for related content '''
serpapi_arguments.add_argument(
'--depth',
type=int,
required=False,
default=3,
metavar='',
help='Depth of iterations to follow related content links.'
)
# Google advanced search arguments
google_advanced_search_arguments = parser.add_argument_group(
'Google advanced search options'
)
''' search for posts before a given date '''
google_advanced_search_arguments.add_argument(
'--before',
type=str,
required=False,
metavar='',
help=(
"Limit results to posts published before the specified date. "
"Format: YYYY-MM-DD."
)
)
''' search for posts after a given date '''
google_advanced_search_arguments.add_argument(
'--after',
type=str,
required=False,
metavar='',
help=(
"Limit results to posts published after the specified date. "
"Format: YYYY-MM-DD."
)
)
# optional arguments
optional_arguments = parser.add_argument_group(
'Optional arguments and parameters'
)
''' output '''
optional_arguments.add_argument(
'-o',
'--output',
type=str,
required=False,
default=f'./data/{int(time.time())}',
metavar='',
help=(
"Specify the output directory path. If not provided, data is "
"saved in a timestamped subdirectory within the './data/' "
"directory."
)
)
''' max workers > maximum number of threads '''
optional_arguments.add_argument(
'-w',
'--max-workers',
type=int,
required=False,
metavar='',
help=(
"Specify the maximum number of threads to use for downloading "
"TikTok videos."
)
)
''' download TikTok results '''
optional_arguments.add_argument(
'-d',
'--download',
action='store_true',
required=False,
help='Specify whether to download TikTok videos from SerpAPI response.'
)
# parse arguments
args = vars(parser.parse_args())
# merging SerpAPI configuration attrs with the existing arguments
config_attrs = get_config_attrs()
args = {**args, **config_attrs}
# verify provided dates
for date_key in ['before', 'after']:
if args[date_key] is not None:
verify_date_argument(args, date_key)
# Start process
log_text = f'''
> Starting program at: {time.ctime()}
'''
print ('\n\n' + ' '.join(log_text.split()).strip())
# create the output data path if not exists
output = args['output']
create_output_data_path(output)
# # SerpAPICollector instance
serp_api_collector = SerpAPICollector(args=args)
# SerpAPI call
serp_api_collector.collect_search_data()
# read SQL database and generate a csv file
serp_api_collector.generate_data_files()
# download videos
if args['download']:
downloader = VideoDownloader(output=output)
# get tiktok urls
collected_videos = serp_api_collector.get_collected_videos()
# start download
downloader.start_download(urls=collected_videos)
# End process
log_text = f'''
> Ending program at: {time.ctime()}
'''
print ('\n\n' + ' '.join(log_text.split()).strip())