1- import pandas as pd
21import xml .etree .ElementTree as ET
32
3+ import pandas as pd
44from asreview .io .utils import _standardize_dataframe
55
66
@@ -12,7 +12,7 @@ class EndnoteXMLReader:
1212
1313 @classmethod
1414 def read_data (cls , fp ):
15- """Import dataset.
15+ """Import dataset from Endnote XML file .
1616
1717 Arguments
1818 ---------
@@ -28,73 +28,73 @@ def read_data(cls, fp):
2828 root = tree .getroot ()
2929 dataset_list = []
3030 for i , record in enumerate (root [0 ]):
31- record_id = record .find ("rec-number" ).text
31+ try :
32+ record_id = record .find ("rec-number" ).text
33+ except (AttributeError , TypeError ):
34+ record_id = None
3235 try :
3336 ref_type = record .find ("ref-type" ).attrib ["name" ]
34- except :
37+ except ( AttributeError , TypeError ) :
3538 ref_type = None
3639 try :
3740 authors = ", " .join (
3841 author [0 ].text
3942 for author in record .find ("contributors" ).find ("authors" )
4043 )
41- except :
44+ except ( AttributeError , TypeError ) :
4245 authors = None
4346 try :
4447 title = record .find ("titles" ).find ("title" )[0 ].text
45- except :
48+ except ( AttributeError , TypeError ) :
4649 title = None
4750 try :
4851 second_title = record .find ("titles" ).find ("secondary-title" )[0 ].text
49- except :
52+ except ( AttributeError , TypeError ) :
5053 second_title = None
5154 try :
5255 journal = record .find ("periodical" ).find ("full-title" )[0 ].text
53- except :
56+ except ( AttributeError , TypeError ) :
5457 journal = None
5558 try :
5659 doi = record .find ("electronic-resource-num" )[0 ].text
57- except :
60+ except ( AttributeError , TypeError ) :
5861 doi = None
5962 try :
6063 pages = record .find ("pages" )[0 ].text
61- except :
64+ except ( AttributeError , TypeError ) :
6265 pages = None
6366 try :
6467 volume = record .find ("volume" )[0 ].text
65- except :
68+ except ( AttributeError , TypeError ) :
6669 volume = None
6770 try :
68- issue = record .find ("number" )[0 ].text
69- except :
70- issue = None
71+ number = record .find ("number" )[0 ].text
72+ except ( AttributeError , TypeError ) :
73+ number = None
7174 try :
7275 year = record .find ("dates" ).find ("year" )[0 ].text
73- except :
76+ except ( AttributeError , TypeError ) :
7477 year = None
7578 try :
7679 url = record .find ("urls" ).find ("related-urls" ).find ("url" )[0 ].text
77- except :
80+ except ( AttributeError , TypeError ) :
7881 url = None
7982 try :
8083 isbn = record .find ("isbn" )[0 ].text
81- except :
84+ except ( AttributeError , TypeError ) :
8285 isbn = None
8386 try :
8487 abstract = record .find ("abstract" )[0 ].text
85- except :
88+ except ( AttributeError , TypeError ) :
8689 abstract = None
87- try :
88- caption = record .find ("caption" )[0 ].text
89- except :
90- caption = None
91- try :
92- label = record .find ("label" )[0 ].text
93- except :
94- label = None
90+ # try:
91+ # label = record.find("label")[0].text
92+ # except (AttributeError, TypeError):
93+ # label = None
9594 dataset_list .append (
9695 {
97- "record_id" : record_id ,
96+ "recordID" : record_id ,
97+ # record_id is overwritten by ASReview standardize_dataframe
9898 "ref_type" : ref_type ,
9999 "authors" : authors ,
100100 "title" : title ,
@@ -104,12 +104,12 @@ def read_data(cls, fp):
104104 "doi" : doi ,
105105 "pages" : pages ,
106106 "volume" : volume ,
107- "issue " : issue ,
107+ "number " : number ,
108108 "abstract" : abstract ,
109109 "isbn" : isbn ,
110110 "url" : url ,
111- "caption " : caption ,
112- "label" : label ,
111+ # "label ": label ,
112+ # TODO: Handle conflict between Endnote label and ASReview label
113113 }
114114 )
115115
0 commit comments