Skip to content

Commit

Permalink
Merge pull request #70 from PeARSearch/cli-indexing
Browse files Browse the repository at this point in the history
Cli indexing
  • Loading branch information
minimalparts authored Feb 1, 2025
2 parents e4f896c + a5066cf commit f1ebbb4
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
9 changes: 8 additions & 1 deletion app/cli/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,14 @@ def index(host_url, filepath):
users = User.query.all()
for user in users:
Path(join(pod_dir,user.username)).mkdir(parents=True, exist_ok=True)
run_indexer_url(filepath, host_url)
with open(filepath, encoding="utf-8") as f:
for line in f:
m = re.match(r"^(.+?);(.+?);;(.+?)$", line)
assert m, "URL file is not formatted correctly!"
url = m.group(1)
pod = m.group(2)
user = m.group(3)
run_indexer_url(url, pod, None, user, host_url)


@pears.cli.command('randomcrawl')
Expand Down
4 changes: 0 additions & 4 deletions app/indexer/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,6 @@ def run_indexer_url(url, theme, note, contributor, host_url):
representations that include entries in the positional
index as well as vectors. A new entry is also
added to the database.
Arguments: the file containing the information provided
by the user about the URL to index (auto-generated by
'index_from_url') and the host name.
"""
print(">> INDEXER: run_indexer_url: Running indexer over suggested URL.")
messages = []
Expand Down

0 comments on commit f1ebbb4

Please sign in to comment.