Skip to content

Commit 6bfbc42

Browse files
committed
Update xml_reader.py
Handle specific exceptions
1 parent 97089c5 commit 6bfbc42

File tree

1 file changed

+30
-30
lines changed

1 file changed

+30
-30
lines changed

asreviewcontrib/preprocess/io/xml_reader.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import pandas as pd
21
import xml.etree.ElementTree as ET
32

3+
import pandas as pd
44
from asreview.io.utils import _standardize_dataframe
55

66

@@ -12,7 +12,7 @@ class EndnoteXMLReader:
1212

1313
@classmethod
1414
def read_data(cls, fp):
15-
"""Import dataset.
15+
"""Import dataset from Endnote XML file.
1616
1717
Arguments
1818
---------
@@ -28,73 +28,73 @@ def read_data(cls, fp):
2828
root = tree.getroot()
2929
dataset_list = []
3030
for i, record in enumerate(root[0]):
31-
record_id = record.find("rec-number").text
31+
try:
32+
record_id = record.find("rec-number").text
33+
except (AttributeError, TypeError):
34+
record_id = None
3235
try:
3336
ref_type = record.find("ref-type").attrib["name"]
34-
except:
37+
except (AttributeError, TypeError):
3538
ref_type = None
3639
try:
3740
authors = ", ".join(
3841
author[0].text
3942
for author in record.find("contributors").find("authors")
4043
)
41-
except:
44+
except (AttributeError, TypeError):
4245
authors = None
4346
try:
4447
title = record.find("titles").find("title")[0].text
45-
except:
48+
except (AttributeError, TypeError):
4649
title = None
4750
try:
4851
second_title = record.find("titles").find("secondary-title")[0].text
49-
except:
52+
except (AttributeError, TypeError):
5053
second_title = None
5154
try:
5255
journal = record.find("periodical").find("full-title")[0].text
53-
except:
56+
except (AttributeError, TypeError):
5457
journal = None
5558
try:
5659
doi = record.find("electronic-resource-num")[0].text
57-
except:
60+
except (AttributeError, TypeError):
5861
doi = None
5962
try:
6063
pages = record.find("pages")[0].text
61-
except:
64+
except (AttributeError, TypeError):
6265
pages = None
6366
try:
6467
volume = record.find("volume")[0].text
65-
except:
68+
except (AttributeError, TypeError):
6669
volume = None
6770
try:
68-
issue = record.find("number")[0].text
69-
except:
70-
issue = None
71+
number = record.find("number")[0].text
72+
except (AttributeError, TypeError):
73+
number = None
7174
try:
7275
year = record.find("dates").find("year")[0].text
73-
except:
76+
except (AttributeError, TypeError):
7477
year = None
7578
try:
7679
url = record.find("urls").find("related-urls").find("url")[0].text
77-
except:
80+
except (AttributeError, TypeError):
7881
url = None
7982
try:
8083
isbn = record.find("isbn")[0].text
81-
except:
84+
except (AttributeError, TypeError):
8285
isbn = None
8386
try:
8487
abstract = record.find("abstract")[0].text
85-
except:
88+
except (AttributeError, TypeError):
8689
abstract = None
87-
try:
88-
caption = record.find("caption")[0].text
89-
except:
90-
caption = None
91-
try:
92-
label = record.find("label")[0].text
93-
except:
94-
label = None
90+
# try:
91+
# label = record.find("label")[0].text
92+
# except (AttributeError, TypeError):
93+
# label = None
9594
dataset_list.append(
9695
{
97-
"record_id": record_id,
96+
"recordID": record_id,
97+
# record_id is overwritten by ASReview standardize_dataframe
9898
"ref_type": ref_type,
9999
"authors": authors,
100100
"title": title,
@@ -104,12 +104,12 @@ def read_data(cls, fp):
104104
"doi": doi,
105105
"pages": pages,
106106
"volume": volume,
107-
"issue": issue,
107+
"number": number,
108108
"abstract": abstract,
109109
"isbn": isbn,
110110
"url": url,
111-
"caption": caption,
112-
"label": label,
111+
# "label": label,
112+
# TODO: Handle conflict between Endnote label and ASReview label
113113
}
114114
)
115115

0 commit comments

Comments
 (0)