Skip to content

Commit 611cf11

Browse files
chunpingchunping
authored andcommitted
add url_parser
1 parent c8c2c4f commit 611cf11

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

src/url_parser.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import sys
2+
import os
3+
4+
class url_parser():
5+
url = ""
6+
host = ""
7+
# "www.deephex.com/AI/docs/index.html", uri_path="/AI/docs/"
8+
uri_path = ""
9+
# without res_type, for example: "index.html", res_name="index", res_type="html"
10+
res_name = ""
11+
res_type = ""
12+
# "www.deephex.com/AI/request.jpg?id=1&name=AIstory&type=pdf", uri_params= {'id':1, 'name':AIstory, 'type':pdf}
13+
uri_params = { }
14+
15+
def __init__(self, url):
16+
self.url = url.lstrip(r"http://")
17+
segs = self.url.split(r"/")
18+
self.host = segs[0]
19+
for i in segs[1:len(segs)-1:]:
20+
self.uri_path = self.uri_path + "/" + i
21+
res = segs[-1]
22+
23+
paras = ""
24+
name = ""
25+
if len(res.split(r"?")) < 2:
26+
paras = ""
27+
else:
28+
paras = res.split(r"?")[1]
29+
30+
name = res.split(r"?")[0]
31+
32+
if len(name.split(r".")) < 2:
33+
self.res_name = ""
34+
self.res_type = ""
35+
# append name to uri_path
36+
self.uri_path = self.uri_path + r"/" + name
37+
else:
38+
self.res_name = name.split(r".")[0]
39+
self.res_type = name.split(r".")[1]
40+
41+
# extract parameters
42+
for para in paras.split(r"&"):
43+
para = para.split(r"=")
44+
para_name = ""
45+
value = ""
46+
if len(para) <= 0:
47+
para_name = ""
48+
value = ""
49+
elif len(para) == 1:
50+
para_name = para[0]
51+
value = ""
52+
else:
53+
para_name = para[0]
54+
value = para[1]
55+
if len(para_name) > 0 and para_name not in self.uri_params:
56+
self.uri_params[para_name] = value
57+
58+
def get_host(self):
59+
return self.host
60+
61+
def get_uri_path(self):
62+
return self.uri_path
63+
64+
def get_res(self):
65+
return self.res_name + r"." + self.res_type
66+
67+
def get_res_type(self):
68+
return self.res_type
69+
70+
def get_params(self):
71+
return self.uri_params
72+
73+
def show(self):
74+
print "host = %s, uri_path = %s, resource=%s.%s" %(self.host, self.uri_path, self.res_name, self.res_type)
75+
for name in self.uri_params:
76+
print "\'%s\':%s" %(name, self.uri_params[name])
77+
78+
def test_url_parser(argv):
79+
url_result = url_parser(argv[1])
80+
url_result.show()
81+
82+
if __name__ == "__main__":
83+
test_url_parser(sys.argv)

0 commit comments

Comments
 (0)