Skip to content

Fix case when we have two same prefixes in the name #147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions nameparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ class HumanName(object):

Instantiation assigns to ``full_name``, and assignment to
:py:attr:`full_name` triggers :py:func:`parse_full_name`. After parsing the
name, these instance attributes are available. Alternatively, you can pass
name, these instance attributes are available. Alternatively, you can pass
any of the instance attributes to the constructor method and skip the parsing
process. If any of the the instance attributes are passed to the constructor
as keywords, :py:func:`parse_full_name` will not be performed.
process. If any of the the instance attributes are passed to the constructor
as keywords, :py:func:`parse_full_name` will not be performed.

**HumanName Instance Attributes**

Expand Down Expand Up @@ -536,9 +536,9 @@ def parse_nicknames(self):
Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`;
`quoted_word`, `double_quotes` and `parenthesis`.
"""

empty_re = re.compile("")

re_quoted_word = self.C.regexes.quoted_word or empty_re
re_double_quotes = self.C.regexes.double_quotes or empty_re
re_parenthesis = self.C.regexes.parenthesis or empty_re
Expand Down Expand Up @@ -906,7 +906,7 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):
# If it's the first piece and there are more than 1 rootnames, assume it's a first name
continue
next_prefix = next(iter(filter(self.is_prefix, pieces[i + 1:])))
j = pieces.index(next_prefix)
j = pieces.index(next_prefix, i + 1)
if j == i + 1:
# if there are two prefixes in sequence, join to the following piece
j += 1
Expand Down
11 changes: 8 additions & 3 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2071,6 +2071,11 @@ def test_multiple_prefixes(self):
self.m(hn.first, "Mike", hn)
self.m(hn.last, "van der Velt", hn)

def test_2_same_prefixes_in_the_name(self):
hh = HumanName("Vincent van Gogh van Beethoven")
self.m(hh.first, "Vincent", hh)
self.m(hh.middle, "van Gogh", hh)
self.m(hh.last, "van Beethoven", hh)

class HumanNameCapitalizationTestCase(HumanNameTestBase):
def test_capitalization_exception_for_III(self):
Expand Down Expand Up @@ -2343,12 +2348,12 @@ def test_initials_with_prefix_firstname(self):
def test_initials_with_prefix(self):
hn = HumanName("Alex van Johnson")
self.m(hn.initials_list(), ["A", "J"], hn)

def test_constructor_first(self):
hn = HumanName(first="TheName")
self.assertFalse(hn.unparsable)
self.m(hn.first, "TheName", hn)

def test_constructor_middle(self):
hn = HumanName(middle="TheName")
self.assertFalse(hn.unparsable)
Expand Down Expand Up @@ -2380,7 +2385,7 @@ def test_constructor_multiple(self):
self.m(hn.first, "TheName", hn)
self.m(hn.last, "lastname", hn)
self.m(hn.title, "mytitle", hn)


TEST_NAMES = (
"John Doe",
Expand Down