|
| 1 | +import sys |
| 2 | +import os |
| 3 | + |
| 4 | +class url_parser(): |
| 5 | + url = "" |
| 6 | + host = "" |
| 7 | + # "www.deephex.com/AI/docs/index.html", uri_path="/AI/docs/" |
| 8 | + uri_path = "" |
| 9 | + # without res_type, for example: "index.html", res_name="index", res_type="html" |
| 10 | + res_name = "" |
| 11 | + res_type = "" |
| 12 | + # "www.deephex.com/AI/request.jpg?id=1&name=AIstory&type=pdf", uri_params= {'id':1, 'name':AIstory, 'type':pdf} |
| 13 | + uri_params = { } |
| 14 | + |
| 15 | + def __init__(self, url): |
| 16 | + self.url = url.lstrip(r"http://") |
| 17 | + segs = self.url.split(r"/") |
| 18 | + self.host = segs[0] |
| 19 | + for i in segs[1:len(segs)-1:]: |
| 20 | + self.uri_path = self.uri_path + "/" + i |
| 21 | + res = segs[-1] |
| 22 | + |
| 23 | + paras = "" |
| 24 | + name = "" |
| 25 | + if len(res.split(r"?")) < 2: |
| 26 | + paras = "" |
| 27 | + else: |
| 28 | + paras = res.split(r"?")[1] |
| 29 | + |
| 30 | + name = res.split(r"?")[0] |
| 31 | + |
| 32 | + if len(name.split(r".")) < 2: |
| 33 | + self.res_name = "" |
| 34 | + self.res_type = "" |
| 35 | + # append name to uri_path |
| 36 | + self.uri_path = self.uri_path + r"/" + name |
| 37 | + else: |
| 38 | + self.res_name = name.split(r".")[0] |
| 39 | + self.res_type = name.split(r".")[1] |
| 40 | + |
| 41 | + # extract parameters |
| 42 | + for para in paras.split(r"&"): |
| 43 | + para = para.split(r"=") |
| 44 | + para_name = "" |
| 45 | + value = "" |
| 46 | + if len(para) <= 0: |
| 47 | + para_name = "" |
| 48 | + value = "" |
| 49 | + elif len(para) == 1: |
| 50 | + para_name = para[0] |
| 51 | + value = "" |
| 52 | + else: |
| 53 | + para_name = para[0] |
| 54 | + value = para[1] |
| 55 | + if len(para_name) > 0 and para_name not in self.uri_params: |
| 56 | + self.uri_params[para_name] = value |
| 57 | + |
| 58 | + def get_host(self): |
| 59 | + return self.host |
| 60 | + |
| 61 | + def get_uri_path(self): |
| 62 | + return self.uri_path |
| 63 | + |
| 64 | + def get_res(self): |
| 65 | + return self.res_name + r"." + self.res_type |
| 66 | + |
| 67 | + def get_res_type(self): |
| 68 | + return self.res_type |
| 69 | + |
| 70 | + def get_params(self): |
| 71 | + return self.uri_params |
| 72 | + |
| 73 | + def show(self): |
| 74 | + print "host = %s, uri_path = %s, resource=%s.%s" %(self.host, self.uri_path, self.res_name, self.res_type) |
| 75 | + for name in self.uri_params: |
| 76 | + print "\'%s\':%s" %(name, self.uri_params[name]) |
| 77 | + |
| 78 | +def test_url_parser(argv): |
| 79 | + url_result = url_parser(argv[1]) |
| 80 | + url_result.show() |
| 81 | + |
| 82 | +if __name__ == "__main__": |
| 83 | + test_url_parser(sys.argv) |
0 commit comments