Skip to content

[Cosmos] Hash v1 Key Error Hotfix #41639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 19, 2025
9 changes: 2 additions & 7 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
## Release History

### 4.13.0b2 (Unreleased)

#### Features Added

#### Breaking Changes
### 4.13.0b2 (2025-06-18)

#### Bugs Fixed

#### Other Changes
- Fixed issue where key error would occur when getting properties from a container using legacy hash v1 as they may not always contain version property in the partition key definition. See [PR 41639](https://github.com/Azure/azure-sdk-for-python/pull/41639)

### 4.13.0b1 (2025-06-05)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
_Empty,
PartitionKey,
_return_undefined_or_empty_partition_key,
NonePartitionKeyValue
NonePartitionKeyValue,
_get_partition_key_from_partition_key_definition
)

PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long
Expand Down Expand Up @@ -3166,11 +3167,12 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]:
# here get the over lapping ranges
# Default to empty Dictionary, but unlikely to be empty as we first check if we have it in kwargs
pk_properties: Union[PartitionKey, Dict] = kwargs.pop("partitionKeyDefinition", {})
partition_key_definition = PartitionKey(
path=pk_properties["paths"],
kind=pk_properties["kind"],
version=pk_properties["version"])
partition_key_value = pk_properties["partition_key"]
partition_key_definition = _get_partition_key_from_partition_key_definition(pk_properties)
partition_key_value: Sequence[
Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] = cast(
Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]],
pk_properties.get("partition_key")
)
feedrangeEPK = partition_key_definition._get_epk_range_for_prefix_partition_key(
partition_key_value
) # cspell:disable-line
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"""
from typing import TYPE_CHECKING, Optional

from azure.cosmos.partition_key import PartitionKey
from azure.cosmos.partition_key import _get_partition_key_from_partition_key_definition
from azure.cosmos._global_partition_endpoint_manager_circuit_breaker_core import \
_GlobalPartitionEndpointManagerForCircuitBreakerCore

Expand Down Expand Up @@ -62,9 +62,7 @@ def create_pk_range_wrapper(self, request: RequestObject) -> Optional[PartitionK
# get relevant information from container cache to get the overlapping ranges
container_link = properties["container_link"]
partition_key_definition = properties["partitionKey"]
partition_key = PartitionKey(path=partition_key_definition["paths"],
kind=partition_key_definition["kind"],
version=partition_key_definition["version"])
partition_key = _get_partition_key_from_partition_key_definition(partition_key_definition)

if HttpHeaders.PartitionKey in request.headers:
partition_key_value = request.headers[HttpHeaders.PartitionKey]
Expand Down
8 changes: 3 additions & 5 deletions sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
NonePartitionKeyValue,
_return_undefined_or_empty_partition_key,
_Empty,
_Undefined, PartitionKey
_Undefined,
_get_partition_key_from_partition_key_definition
)

__all__ = ("ContainerProxy",)
Expand Down Expand Up @@ -153,10 +154,7 @@ async def _get_epk_range_for_partition_key(
feed_options: Optional[Dict[str, Any]] = None) -> Range:
container_properties = await self._get_properties_with_options(feed_options)
partition_key_definition = container_properties["partitionKey"]
partition_key = PartitionKey(
path=partition_key_definition["paths"],
kind=partition_key_definition["kind"],
version=partition_key_definition["version"])
partition_key = _get_partition_key_from_partition_key_definition(partition_key_definition)

return partition_key._get_epk_range_for_partition_key(partition_key_value)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@
from .. import _utils
from ..partition_key import (
_Undefined,
PartitionKey,
_return_undefined_or_empty_partition_key,
NonePartitionKeyValue, _Empty
NonePartitionKeyValue, _Empty,
_get_partition_key_from_partition_key_definition
)
from ._auth_policy_async import AsyncCosmosBearerTokenCredentialPolicy
from .._cosmos_http_logging_policy import CosmosHttpLoggingPolicy
Expand Down Expand Up @@ -2956,9 +2956,7 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]:
partition_key_obj = None
if cont_prop and partition_key_value is not None:
partition_key_definition = cont_prop["partitionKey"]
partition_key_obj = PartitionKey(path=partition_key_definition["paths"],
kind=partition_key_definition["kind"],
version=partition_key_definition["version"])
partition_key_obj = _get_partition_key_from_partition_key_definition(partition_key_definition)
is_prefix_partition_query = partition_key_obj._is_prefix_partition_key(partition_key_value)

if is_prefix_partition_query and partition_key_obj:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"""
from typing import TYPE_CHECKING, Optional

from azure.cosmos import PartitionKey
from azure.cosmos.partition_key import _get_partition_key_from_partition_key_definition
from azure.cosmos._global_partition_endpoint_manager_circuit_breaker_core import \
_GlobalPartitionEndpointManagerForCircuitBreakerCore
from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper, Range
Expand Down Expand Up @@ -60,9 +60,7 @@ async def create_pk_range_wrapper(self, request: RequestObject) -> Optional[Part
# get relevant information from container cache to get the overlapping ranges
container_link = properties["container_link"]
partition_key_definition = properties["partitionKey"]
partition_key = PartitionKey(path=partition_key_definition["paths"],
kind=partition_key_definition["kind"],
version=partition_key_definition["version"])
partition_key = _get_partition_key_from_partition_key_definition(partition_key_definition)

if HttpHeaders.PartitionKey in request.headers:
partition_key_value = request.headers[HttpHeaders.PartitionKey]
Expand Down
8 changes: 3 additions & 5 deletions sdk/cosmos/azure-cosmos/azure/cosmos/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
PartitionKey,
_Empty,
_Undefined,
_return_undefined_or_empty_partition_key
_return_undefined_or_empty_partition_key,
_get_partition_key_from_partition_key_definition
)
from .scripts import ScriptsProxy

Expand All @@ -64,10 +65,7 @@

def get_partition_key_from_properties(container_properties: Dict[str, Any]) -> PartitionKey:
partition_key_definition = container_properties["partitionKey"]
return PartitionKey(
path=partition_key_definition["paths"],
kind=partition_key_definition["kind"],
version=partition_key_definition["version"])
return _get_partition_key_from_partition_key_definition(partition_key_definition)

def is_prefix_partition_key(container_properties: Dict[str, Any], partition_key: PartitionKeyType) -> bool:
partition_key_obj: PartitionKey = get_partition_key_from_properties(container_properties)
Expand Down
47 changes: 43 additions & 4 deletions sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from io import BytesIO
import binascii
import struct
from typing import IO, Sequence, Type, Union, overload, List, cast
from typing import IO, Sequence, Type, Union, overload, List, cast, Dict, Any

from typing_extensions import Literal

from ._cosmos_integers import _UInt32, _UInt64, _UInt128
Expand Down Expand Up @@ -96,9 +97,31 @@ class PartitionKey(dict):
See https://learn.microsoft.com/azure/cosmos-db/partitioning-overview#choose-partitionkey
for information on how to choose partition keys.

:ivar str path: The path of the partition key
:ivar str kind: What kind of partition key is being defined (default: "Hash")
:ivar int version: The version of the partition key (default: 2)
This constructor supports multiple overloads:

1. **Single Partition Key**:

**Parameters**:
- `path` (str): The path of the partition key.
- `kind` (Literal["Hash"], optional): The kind of partition key. Defaults to "Hash".
- `version` (int, optional): The version of the partition key. Defaults to 2.

**Example**:
>>> pk = PartitionKey(path="/id")

2. **Hierarchical Partition Key**:

**Parameters**:
- `path` (List[str]): A list of paths representing the partition key, supports up to three hierarchical levels.
- `kind` (Literal["MultiHash"], optional): The kind of partition key. Defaults to "MultiHash".
- `version` (int, optional): The version of the partition key. Defaults to 2.

**Example**:
>>> pk = PartitionKey(path=["/id", "/category"], kind="MultiHash")

:ivar str path: The path(s) of the partition key.
:ivar str kind: The kind of partition key ("Hash" or "MultiHash") (default: "Hash").
:ivar int version: The version of the partition key (default: 2).
"""

@overload
Expand Down Expand Up @@ -472,3 +495,19 @@ def _write_for_binary_encoding(

elif isinstance(value, _Undefined):
binary_writer.write(bytes([_PartitionKeyComponentType.Undefined]))


def _get_partition_key_from_partition_key_definition(
partition_key_definition: Union[Dict[str, Any], "PartitionKey"]
) -> "PartitionKey":
"""Internal method to create a PartitionKey instance from a dictionary or PartitionKey object.

:param partition_key_definition: A dictionary or PartitionKey object containing the partition key definition.
:type partition_key_definition: Union[Dict[str, Any], PartitionKey]
:return: A PartitionKey instance created from the provided definition.
:rtype: PartitionKey
"""
path = partition_key_definition.get("paths", "")
kind = partition_key_definition.get("kind", "Hash")
version: int = partition_key_definition.get("version", 1) # Default to version 1 if not provided
return PartitionKey(path=path, kind=kind, version=version)
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@

import azure.cosmos.cosmos_client as cosmos_client
import test_config
from azure.cosmos.partition_key import PartitionKey

from azure.cosmos.partition_key import PartitionKey, _get_partition_key_from_partition_key_definition
from azure.cosmos.container import get_epk_range_for_partition_key

@pytest.mark.cosmosEmulator
@pytest.mark.cosmosQuery
class TestChangeFeedPKVariation(unittest.TestCase):
"""Test change feed with different partition key variations."""

Expand Down Expand Up @@ -224,5 +223,67 @@ def test_multiple_physical_partitions(self):
self.validate_changefeed_hpk(container_hpk)
self.db.delete_container(container_hpk.id)

def test_partition_key_version_1_properties(self):
"""Test container with version 1 partition key definition and validate properties."""
container_id = f"container_test_pk_version_1_properties_{uuid.uuid4()}"
pk = PartitionKey(path="/pk", kind="Hash", version=1)
container = self.db.create_container(id=container_id, partition_key=pk)
original_get_properties = container._get_properties

# Simulate the version key not being in the definition

def _get_properties_override():
properties = original_get_properties()
partition_key = properties["partitionKey"]
partition_key.pop("version", None) # Remove version key for validation
return {**properties, "partitionKey": partition_key}

container._get_properties = _get_properties_override

try:
# Get container properties and validate partition key definition
container_properties = container._get_properties()
partition_key_definition = container_properties["partitionKey"]
# Ensure the version key is not included in the definition
assert "version" not in partition_key_definition, ("Version key should not be included "
"in the partition key definition.")

# Create a PartitionKey instance from the definition and validate
partition_key_instance = _get_partition_key_from_partition_key_definition(partition_key_definition)
assert partition_key_instance.kind == "Hash", "Partition key kind mismatch."
assert partition_key_instance.version == 1, "Partition key version mismatch."

# Upsert items and validate _get_epk_range_for_partition_key
items = [
{"id": "1", "pk": "value1"},
{"id": "2", "pk": "value2"},
{"id": "3", "pk": "value3"}
]
self.insert_items(container, items)

for item in items:
try:
epk_range = get_epk_range_for_partition_key(container_properties, item["pk"])
assert epk_range is not None, f"EPK range should not be None for partition key {item['pk']}."
except Exception as e:
assert False, f"Failed to get EPK range for partition key {item['pk']}: {str(e)}"
# Query the change feed and validate the results
change_feed = container.query_items_change_feed(is_start_from_beginning=True)
change_feed_items = [item for item in change_feed]

# Ensure the same items are retrieved
assert len(change_feed_items) == len(items), (
f"Mismatch in document count: Change feed returned {len(change_feed_items)} items, "
f"while {len(items)} items were created."
)
for index, item in enumerate(items):
assert item['id'] == change_feed_items[index]['id'], f"Item {item} not found in change feed results."

finally:
# Clean up the container
container._get_properties = original_get_properties
self.db.delete_container(container.id)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from azure.cosmos.aio import CosmosClient
import test_config
from azure.cosmos.partition_key import PartitionKey
from azure.cosmos.partition_key import PartitionKey, _get_partition_key_from_partition_key_definition

@pytest.mark.cosmosEmulator
@pytest.mark.asyncio
Expand Down Expand Up @@ -222,5 +222,67 @@ async def test_multiple_physical_partitions_async(self):
await self.validate_changefeed_hpk(container_hpk)
await self.db.delete_container(container_hpk.id)

async def test_partition_key_version_1_properties_async(self):
"""Test container with version 1 partition key definition and validate properties (async)."""
container_id = f"container_test_pk_version_1_properties_{uuid.uuid4()}"
pk = PartitionKey(path="/pk", kind="Hash", version=1)
container = await self.db.create_container(id=container_id, partition_key=pk)
original_get_properties = container._get_properties

# Simulate the version key not being in the definition

async def _get_properties_override():
properties = await original_get_properties()
partition_key = properties["partitionKey"]
partition_key.pop("version", None) # Remove version key for validation
return {**properties, "partitionKey": partition_key}

container._get_properties = _get_properties_override

try:
# Get container properties and validate partition key definition
container_properties = await container._get_properties()
partition_key_definition = container_properties["partitionKey"]
# Ensure the version key is not included in the definition
assert "version" not in partition_key_definition, ("Version key should not be included "
"in the partition key definition.")

# Create a PartitionKey instance from the definition and validate
partition_key_instance = _get_partition_key_from_partition_key_definition(partition_key_definition)
assert partition_key_instance.kind == "Hash", "Partition key kind mismatch."
assert partition_key_instance.version == 1, "Partition key version mismatch."

# Upsert items and validate _get_epk_range_for_partition_key
items = [
{"id": "1", "pk": "value1"},
{"id": "2", "pk": "value2"},
{"id": "3", "pk": "value3"}
]
await self.insert_items(container, items)

for item in items:
try:
epk_range = container._get_epk_range_for_partition_key(container_properties, item["pk"])
assert epk_range is not None, f"EPK range should not be None for partition key {item['pk']}."
except Exception as e:
assert False, f"Failed to get EPK range for partition key {item['pk']}: {str(e)}"
# Query the change feed and validate the results
change_feed = container.query_items_change_feed(is_start_from_beginning=True)
change_feed_items = [item async for item in change_feed]

# Ensure the same items are retrieved
assert len(change_feed_items) == len(items), (
f"Mismatch in document count: Change feed returned {len(change_feed_items)} items, "
f"while {len(items)} items were created."
)
for index, item in enumerate(items):
assert item['id'] == change_feed_items[index]['id'], f"Item {item} not found in change feed results."

finally:
# Clean up the container
container._get_properties = original_get_properties
await self.db.delete_container(container.id)


if __name__ == '__main__':
unittest.main()
unittest.main()