-
Notifications
You must be signed in to change notification settings - Fork 1
/
conneg.py
273 lines (239 loc) · 8.19 KB
/
conneg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Helper class used for content negotiation
Copyright: (c) 2012 by Anusha Ranganathan.
"""
from datetime import datetime
from diskMonitorConfig import FORMATS_SERVED
def skipws(next):
skip = 1
if not skip:
return next
else:
def foo(*args):
tok = next(*args)
if tok.isspace():
tok = next(*args)
return tok
return foo
class ParseError(Exception):
pass
class MiniLex(object):
def __init__(self, data,
whitespace= " \t",
sep="[](){}<>\\/@:;,?=",
quotes="\"",
eof="\n\r"):
self.data = data
self.whitespace=whitespace
self.separators=sep
self.quotes=quotes
self.eof=eof
self.state = 0
self.token = []
self.quoted = ''
self.pos = 0
def __iter__(self):
return self
@skipws
def next(self):
while True:
if self.pos == len(self.data):
if self.token:
tok= ''.join(self.token)
self.token = []
return tok
else:
raise StopIteration
char = self.data[self.pos]
tok = ''
if self.quoted and not char in self.quotes:
self.token.append(char)
self.pos +=1
elif char in self.quotes:
if char == self.quoted:
# we're in quoted text
if self.data[self.pos-1] == "\\":
self.token.append(char)
self.pos += 1
else:
self.token.append(char)
tok = ''.join(self.token)
self.token = []
self.pos += 1
self.quoted=0
self.state=0
return tok
elif self.quoted:
# other quotes
self.token.append(char)
self.pos += 1
else:
# begin quoted text
if self.token:
tok = ''.join(self.token)
self.quoted=char
self.token = [char]
self.pos += 1
self.state = 2
if tok:
return tok
elif char in self.whitespace:
if self.state == 1:
self.token.append(char)
else:
if self.token:
tok = ''.join(self.token)
self.state = 1
self.token = [char]
self.pos += 1
if tok:
return tok
elif char in self.separators:
# can't join seps (currently)
if self.token:
tok = ''.join(self.token)
else:
tok = char
self.pos += 1
self.token = []
self.state = 0
return tok
elif char in self.eof:
if self.token:
return ''.join(self.token)
else:
raise StopIteration
else:
if self.state == 3:
self.token.append(char)
else:
if self.token:
tok = ''.join(self.token)
self.token = [char]
self.state=3
self.pos += 1
if tok:
return tok
class MimeType(object):
def __init__(self, m1="", m2=""):
self.mimetype1 = m1
self.mimetype2 = m2
self.params = {}
self.qval = 1.0
def __str__(self):
#l = [('q', self.qval)]
#l.extend(self.params.items())
# Actually, most likely Don't want to serialize the qval
l = self.params.items()
if l:
return self.mimetype1 + "/" + self.mimetype2 + ";" + ";".join(["%s=%s" % x for x in l])
else:
return self.mimetype1 + "/" + self.mimetype2
def __repr__(self):
return "<MimeType: %s>" % self
def sort2(self):
if self.mimetype1 == "*":
return 0
elif self.mimetype2 == "*":
return 1
elif self.params:
return 2 + len(self.params)
else:
return 2
def matches(self, other):
if other.mimetype1 == self.mimetype1 or other.mimetype1 == '*' or self.mimetype1 == '*':
if other.mimetype2 == self.mimetype2 or other.mimetype2 == '*' or self.mimetype2 == '*':
if other.params == self.params:
return True
return False
class Parser(object):
def __init__(self, ml):
self.ml = ml
def process(self):
mts = []
mt = self.top()
while mt:
if mt.mimetype1 == "*" and mt.mimetype2 == "*" and mt.qval == 1.0:
# downgrade anything to the lowest, otherwise behaviour is
# non deterministic. See apache conneg rules.
mt.qval = 0.001
mts.append(mt)
mt = self.top()
return mts
def top(self):
mt = MimeType()
try:
tok = self.ml.next() # text
except StopIteration:
return None
mt.mimetype1 = tok
sl = self.ml.next() # /
if sl != "/":
raise ParseError("Expected /, got: " + sl)
tok2 = self.ml.next() # html
mt.mimetype2 = tok2
while True:
try:
tok = self.ml.next()
except StopIteration:
return mt
if tok == ',':
return mt
elif tok == ';':
(key, val) = self.param()
if key == "q":
mt.qval = float(val)
else:
mt.params[key] = val
else:
raise ParseError("Expected , or ; got: %r" % tok)
def param(self):
key = self.ml.next()
eq = self.ml.next()
if eq != "=":
raise ParseError("Expected =, got: " + eq)
val = self.ml.next()
return (key, val)
def best(client, server):
# step through client request against server possibilities
# and find highest according to qvals in client
# both client and server are lists of mt objects
# client should be sorted by qval already
# assume that server is unsorted
# AFAICT, if the request has any params, they MUST be honored
# so if params, and no exact match, discard
# And hence */*;params means that params must be matched.
for mtc in client:
# this is most wanted, can we provide?
for mts in server:
if mts.matches(mtc):
return mtc
return None
def parse(data):
lex = MiniLex(data)
p = Parser(lex)
mts = p.process()
#Accept headers added using javascript are appended to the end of the list of default accept headers
#This behaviour observed in Opera 9.80, Chrome 10.0, MSIE 7.0, MSIE 8.0.
#In Firefox 3.6.14 and Firefox 3.6.15, only the new headers set in ajax is sent
#See doc accessLogEWithHeaderInfo_2011_03_16
#So moving the last accept header to the front
tmp = str(mts[-1]).lower()
if tmp in FORMATS_SERVED:
last_mt = mts.pop()
mts.insert(0, last_mt)
mts.sort(key=lambda x: x.sort2(), reverse=True)
mts.sort(key=lambda x: x.qval, reverse=True)
return mts
if __name__ == '__main__':
ml = MiniLex("text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.2")
p = Parser(ml)
mts = p.process()
mts.sort(key=lambda x: x.sort2(), reverse=True)
mts.sort(key=lambda x: x.qval, reverse=True)
ml2 = MiniLex("text/xhtml+xml, text/xml, application/atom+xml, text/html;level=2")
p2 = Parser(ml2)
mts2 = p2.process()
b = best(mts, mts2)