forked from DingLi23/s2search
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy paths2search_score.py
75 lines (64 loc) · 3.17 KB
/
s2search_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from s2search.rank import S2Ranker
import os
import json
import numpy as np
from pathlib import Path
# data_dir = './s2search_data'
s2_dir = './s2search_data'
root_dir = '/Users/yinnnyou/workspace/XAI_PROJECT/data_process/masking'
features = ['title', 'abstract', 'venue', 'authors', 'year', 'n_citations', 'full']
papers_example = [
{
'title': 'Jumping NLP Curves: A Review of Natural Language Processing Research',
'abstract': 'Natural language processing (NLP) is a theory-motivated range of computational techniques for '
'the automatic analysis and representation of human language. NLP research has evolved from the '
'era of punch cards and batch processing (in which the analysis of a sentence could take up to 7 '
'minutes) to the era of Google and the likes of it (in which millions of webpages can be '
'processed in less than a second). This review paper draws on recent developments in NLP research '
'to look at the past, present, and future of NLP technology in a new light. Borrowing the '
'paradigm of jumping curves from the field of business management and marketing prediction, '
'this survey article reinterprets the evolution of NLP research as the intersection of three '
'overlapping curves-namely Syntactics, Semantics, and Pragmatics Curveswhich will eventually lead '
'NLP research to evolve into natural language understanding.',
'venue': 'IEEE Computational intelligence ',
'authors': ['E Cambria', 'B White'],
'year': 2014,
'n_citations': 900,
}
]
def S2_Rank(related_keywords, paper_dict_list, file=s2_dir):
s2ranker = S2Ranker(file)
score = s2ranker.score(related_keywords, paper_dict_list)
return score
def S2_open_json(path):
data = []
with open(path) as f:
Lines = f.readlines()
for line in Lines:
line_strip = line.strip()
jso = json.loads(line_strip, strict=False)
data.append(jso)
return S2_Rank('machine learning', data, s2_dir)
def S2_save_score_as_np(s2score, feature):
base_dir = str(Path(__file__).resolve().parent)
data_dir = os.path.join(base_dir)
os.environ.setdefault("DATA_DIR", data_dir)
output_data_file_name = os.path.join(os.environ.get("DATA_DIR"), "score" + feature)
np.save(output_data_file_name, s2score)
def S2_get_score(root_dir):
score = []
for root, dirs, files in os.walk(root_dir):
for name in files:
if name.endswith((".json")):
for feature in features:
if feature in name:
full_path = os.path.join(root, name)
print(full_path)
score = S2_open_json(full_path)
score = np.array(score)
print(score)
S2_save_score_as_np(score, feature)
S2_get_score(root_dir)
# print(S2_Rank('NLP', papers_example, s2_dir))
# score = np.load('/Users/ayuee/Documents/GitHub/XAI_PROJECT/data_process/masking/full_Score.npy')
# print(score, np.shape(score))