-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_generic.py
95 lines (79 loc) · 2.95 KB
/
_generic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# MIT License
# Copyright 2025 @asyncze (Michael Sjöberg)
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# 0 WHITESPACE
# 1 DEFAULT
# 2 KEYWORD
# 3 CLASS
# 4 NAME
# 5 PARAMETER
# 6 LAMBDA
# 7 STRING
# 8 NUMBER
# 9 OPERATOR
# 10 COMMENT
# 11 SPECIAL
# 12 CONDITIONAL
# 13 BUILT_IN
# 14 ERROR
# 15 WARNING
# 16 SUCCESS
from main import TOKEN_MAP # token map is same for all lexers
from pygments import lex
from pygments.lexers import get_lexer_for_filename
from pygments.token import Token
# https://pygments.org/docs/tokens/#module-pygments.token
TOKEN_MAP_PYGMENTS = {
Token.Text.Whitespace: 0,
Token.Text: 1,
Token.Punctuation: 1,
Token.Generic: 1,
Token.Other: 1,
Token.Keyword.Constant: 2,
Token.Keyword.Declaration: 2,
Token.Keyword.Namespace: 2,
Token.Keyword.Reserved: 2,
Token.Name.Class: 3,
Token.Name.Function: 4,
Token.Name.Property: 5,
Token.Literal: 7,
Token.Literal.String.Single: 7,
Token.Literal.String.Double: 7,
Token.Literal.Number.Integer: 8,
Token.Literal.Number.Float: 8,
Token.Operator: 9,
Token.Literal.Date: 9,
Token.Comment.Single: 10,
Token.Comment.Multiline: 10,
Token.Keyword.Type: 11,
Token.Keyword.Constant: 12,
Token.Name.Builtin: 13,
Token.Error: 14,
}
class Lexer(object):
def __init__(self, filename): self.lexer = get_lexer_for_filename(filename)
def comment_char(self): return "" # todo : create map for comment injection in common un-supported programming languages
def lexer_name(self): return f"<{ self.lexer.name }>"
def tokenize(self, text):
tokens = []
result = list(self.lexer.get_tokens_unprocessed(text))
for token in result:
token_type = str(TOKEN_MAP[TOKEN_MAP_PYGMENTS[token[1]]] if token[1] in TOKEN_MAP_PYGMENTS else TOKEN_MAP[1])
start_pos = int(token[0])
value = str(token[2])
tokens.append((token_type, start_pos, value))
return tokens