-
Notifications
You must be signed in to change notification settings - Fork 0
/
rawlist.py
128 lines (109 loc) · 3.4 KB
/
rawlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import re
import urllib2
p = re.compile("<td class=\"menugridcell.*?</td>", re.S)
response = urllib2.urlopen("http://menu.ha.ucla.edu/foodpro/default.asp?location=07")
page_source = response.read()
#Get a list of all grid cells, every third one belongs to each b/l/d
menuGridCellItemP = re.compile("<td class=\"menugridcell.*?</td>", re.S)
menuGridCellList = menuGridCellItemP.findall(page_source)
breakfastGrids = []
lunchGrids = []
dinnerGrids = []
brunchGrids = [] #for weekends
#for each cell
currentIndex = 0
currentType = 0
if len(menuGridCellList) == 27:
while len(menuGridCellList) > currentIndex:
if currentType == 0: #if breakfast
breakfastGrids.append(menuGridCellList[currentIndex])
currentType = 1
elif currentType == 1: #if lunch
lunchGrids.append(menuGridCellList[currentIndex])
currentType = 2
else: #if dinner
dinnerGrids.append(menuGridCellList[currentIndex])
currentType = 0
currentIndex = currentIndex + 1
elif len(menuGridCellList) == 18:
while len(menuGridCellList) > currentIndex:
if currentType == 0: #if brunch
brunchGrids.append(menuGridCellList[currentIndex])
currentType = 1
else: #if dinner
dinnerGrids.append(menuGridCellList[currentIndex])
currentType = 0
currentIndex = currentIndex + 1
#for index in range(0, len(breakfastGrids)):
# print breakfastGrids[index]
# print lunchGrids[index]
# print dinnerGrids[index]
# print("----------")
def parseMeal(mealData): #parse the meal grids into readable data
#itemMatchList contains meal data and HTML junk
#Returns the meal items AND the locations
rawItemP = re.compile("onmouseover=.*?>.*?</a>|<li class=\"category\d\">.*?</li>", re.S)
itemMatchList = rawItemP.findall("".join(mealData))
#for item in itemMatchList:
# print item
#print("-----------")
#Clean up the HTML shit for each menu item
refinedItemP = re.compile(">.*?</a>|\"category\d\">.*?</li>", re.S)
refinedItemMatchList = refinedItemP.findall("".join(itemMatchList))
#for item in refinedItemMatchList:
# print item
#print("-----------")
finalList = []
#Remove the final HTML crap, then print
for r in refinedItemMatchList:
#check if meal item or kitchen title
if "</a>" in r:
#if meal item
s = list(r)
s.remove(">")
s.pop()
s.pop()
s.pop()
s.pop()
r = "".join(s)
finalList.append(r)
else:
finalItemP = re.compile("\d\">.*?</li>", re.S)
finalItemMatchList = finalItemP.findall(r)
for i in finalItemMatchList:
s = list(i)
s.remove('"')
s.remove(">")
s.pop(0)
s.pop()
s.pop()
s.pop()
s.pop()
s.pop()
s.insert(0, " { \"title\": ")
s.append(" }")
r = "".join(s)
finalList.append(r)
return finalList
if len(menuGridCellList) == 27:
breakfastGrids = parseMeal(breakfastGrids)
print("!!!!!!!!!!!!!!PRINTING FINAL BREAKFAST LIST!!!!!!!!!")
for item in breakfastGrids:
print item
lunchGrids = parseMeal(lunchGrids)
print("!!!!!!!!!!!!!!PRINTING FINAL LUNCH LIST!!!!!!!!!")
for item in lunchGrids:
print item
dinnerGrids = parseMeal(dinnerGrids)
print("!!!!!!!!!!!!!!PRINTING FINAL DINNER LIST!!!!!!!!!")
for item in dinnerGrids:
print item
elif len(menuGridCellList) == 18:
brunchGrids = parseMeal(brunchGrids)
print("!!!!!!!!!!!!!!PRINTING FINAL BRUNCH LIST!!!!!!!!!")
for item in brunchGrids:
print item
dinnerGrids = parseMeal(dinnerGrids)
print("!!!!!!!!!!!!!!PRINTING FINAL DINNER LIST!!!!!!!!!")
for item in dinnerGrids:
print item