99import glob
1010import os
1111import os .path as osp
12+ import re
1213import sys
1314
1415import mmcv
16+ from lxml import etree
1517
1618MMSEG_ROOT = osp .dirname (osp .dirname ((osp .dirname (__file__ ))))
1719
1820
19- def dump_yaml_and_check_difference (obj , filename ):
21+ def dump_yaml_and_check_difference (obj , filename , sort_keys = False ):
2022 """Dump object to a yaml file, and check if the file content is different
2123 from the original.
2224
2325 Args:
2426 obj (any): The python object to be dumped.
2527 filename (str): YAML filename to dump the object to.
28+ sort_keys (str); Sort key by dictionary order.
2629 Returns:
2730 Bool: If the target YAML file is different from the original.
2831 """
2932
30- str_dump = mmcv .dump (obj , None , file_format = 'yaml' , sort_keys = True )
33+ str_dump = mmcv .dump (obj , None , file_format = 'yaml' , sort_keys = sort_keys )
3134 if osp .isfile (filename ):
3235 file_exists = True
3336 with open (filename , 'r' , encoding = 'utf-8' ) as f :
@@ -54,12 +57,29 @@ def parse_md(md_file):
5457 Returns:
5558 Bool: If the target YAML file is different from the original.
5659 """
57- collection_name = osp .dirname (md_file ). split ( '/' )[ - 1 ]
60+ collection_name = osp .split ( osp . dirname (md_file ))[ 1 ]
5861 configs = os .listdir (osp .dirname (md_file ))
5962
60- collection = dict (Name = collection_name , Metadata = {'Training Data' : []})
63+ collection = dict (
64+ Name = collection_name ,
65+ Metadata = {'Training Data' : []},
66+ Paper = {
67+ 'URL' : '' ,
68+ 'Title' : ''
69+ },
70+ README = md_file ,
71+ Code = {
72+ 'URL' : '' ,
73+ 'Version' : ''
74+ })
75+ collection .update ({'Converted From' : {'Weights' : '' , 'Code' : '' }})
6176 models = []
6277 datasets = []
78+ paper_url = None
79+ paper_title = None
80+ code_url = None
81+ code_version = None
82+ repo_url = None
6383
6484 with open (md_file , 'r' ) as md :
6585 lines = md .readlines ()
@@ -70,7 +90,36 @@ def parse_md(md_file):
7090 if len (line ) == 0 :
7191 i += 1
7292 continue
73- if line [:3 ] == '###' :
93+ if line [:2 ] == '# ' :
94+ paper_title = line .replace ('# ' , '' )
95+ i += 1
96+ elif line [:3 ] == '<a ' :
97+ content = etree .HTML (line )
98+ node = content .xpath ('//a' )[0 ]
99+ if node .text == 'Code Snippet' :
100+ code_url = node .get ('href' , None )
101+ assert code_url is not None , (
102+ f'{ collection_name } hasn\' t code snippet url.' )
103+ # version extraction
104+ filter_str = r'blob/(.*)/mm'
105+ pattern = re .compile (filter_str )
106+ code_version = pattern .findall (code_url )
107+ assert len (code_version ) == 1 , (
108+ f'false regular expression ({ filter_str } ) use.' )
109+ code_version = code_version [0 ]
110+ elif node .text == 'Official Repo' :
111+ repo_url = node .get ('href' , None )
112+ assert repo_url is not None , (
113+ f'{ collection_name } hasn\' t official repo url.' )
114+ i += 1
115+ elif line [:9 ] == '<summary ' :
116+ content = etree .HTML (line )
117+ nodes = content .xpath ('//a' )
118+ assert len (nodes ) == 1 , (
119+ 'summary tag should only have single a tag.' )
120+ paper_url = nodes [0 ].get ('href' , None )
121+ i += 1
122+ elif line [:4 ] == '### ' :
74123 datasets .append (line [4 :])
75124 current_dataset = line [4 :]
76125 i += 2
@@ -113,22 +162,28 @@ def parse_md(md_file):
113162 crop_size = els [crop_size_id ].split ('x' )
114163 assert len (crop_size ) == 2
115164 model = {
116- 'Name' : model_name ,
117- 'In Collection' : collection_name ,
165+ 'Name' :
166+ model_name ,
167+ 'In Collection' :
168+ collection_name ,
118169 'Metadata' : {
119170 'backbone' : els [backbone_id ],
120171 'crop size' : f'({ crop_size [0 ]} ,{ crop_size [1 ]} )' ,
121172 'lr schd' : int (els [lr_schd_id ]),
122173 },
123- 'Results' : {
124- 'Task' : 'Semantic Segmentation' ,
125- 'Dataset' : current_dataset ,
126- 'Metrics' : {
127- 'mIoU' : float (els [ss_id ]),
174+ 'Results' : [
175+ {
176+ 'Task' : 'Semantic Segmentation' ,
177+ 'Dataset' : current_dataset ,
178+ 'Metrics' : {
179+ 'mIoU' : float (els [ss_id ]),
180+ },
128181 },
129- },
130- 'Config' : config ,
131- 'Weights' : weight ,
182+ ],
183+ 'Config' :
184+ config ,
185+ 'Weights' :
186+ weight ,
132187 }
133188 if fps != - 1 :
134189 try :
@@ -152,15 +207,38 @@ def parse_md(md_file):
152207 }]
153208 if mem != - 1 :
154209 model ['Metadata' ]['memory (GB)' ] = float (mem )
210+ # Only have semantic segmentation now
155211 if ms_id and els [ms_id ] != '-' and els [ms_id ] != '' :
156- model ['Results' ]['Metrics' ]['mIoU(ms+flip)' ] = float (
157- els [ms_id ])
212+ model ['Results' ][0 ]['Metrics' ][
213+ 'mIoU(ms+flip)' ] = float ( els [ms_id ])
158214 models .append (model )
159215 j += 1
160216 i = j
161217 else :
162218 i += 1
219+ flag = (code_url is not None ) and (paper_url is not None ) and (repo_url
220+ is not None )
221+ assert flag , f'{ collection_name } readme error'
163222 collection ['Metadata' ]['Training Data' ] = datasets
223+ collection ['Code' ]['URL' ] = code_url
224+ collection ['Code' ]['Version' ] = code_version
225+ collection ['Paper' ]['URL' ] = paper_url
226+ collection ['Paper' ]['Title' ] = paper_title
227+ collection ['Converted From' ]['Code' ] = repo_url
228+ # ['Converted From']['Weights] miss
229+ # remove empty attribute
230+ check_key_list = ['Code' , 'Paper' , 'Converted From' ]
231+ for check_key in check_key_list :
232+ key_list = list (collection [check_key ].keys ())
233+ for key in key_list :
234+ if check_key not in collection :
235+ break
236+ if collection [check_key ][key ] == '' :
237+ if len (collection [check_key ].keys ()) == 1 :
238+ collection .pop (check_key )
239+ else :
240+ collection [check_key ].pop (key )
241+
164242 result = {'Collections' : [collection ], 'Models' : models }
165243 yml_file = f'{ md_file [:- 9 ]} { collection_name } .yml'
166244 return dump_yaml_and_check_difference (result , yml_file )
0 commit comments