-
Hi, If (_encryption)
{
parquet::FileEncryptionProperties::Builder encryption_builder(_encryption_key); encryption_builder.footer_key_metadata("kf");
encryption_builder.algorithm(parquet::ParquetCipher::AES_GCM_V1)->build();
std::shared_ptr<parquet::FileEncryptionProperties> encryption_props = encryption_builder.build();
parquetBuilder.encryption (encryption_props);
} python code snippet used to decrypt kms_connection_config = pe.KmsConnectionConfig(custom_kms_conf "kf":private_key]}])
def kms_factory(kms_connection_configuration):
return InMemoryKms Client (kms_connection_configuration)
crypto_factory = pe.CryptoFactory(kms_factory)
# decryption
decryption_properties = (
crypto_factory.file_decryption_properties(
kms_connection_config)
parquet_file = pq.ParquetFile(
r'example.parquet',
decryption_properties-decryption_properties)
print(parquet file.read().to_pandas()) With above python code getting error: OSError failed to parse key metadata kf Where as the below cpp code works fine for decryption "" const arrow::Status ReadEncryptedParquetFile(const std::string& file_path, const std::string& decryption_key) {
try {
//Set up decryption properties
std::shared_ptr<parquet::FileDecryptionProperties> decryption_props parquet::FileDecryptionProperties::Builder()
footer key(decryption key) // Use the appropriate method to set the key ->build();
// Configure reader properties with decryption properties arrow::MemoryPool* pool = arrow::default_memory_pool();
// Configure general Parquet reader settings
auto reader_properties = parquet::ReaderProperties (pool); reader_properties.file_decryption_properties (decryption_props);
parquet::arrow::FileReaderBuilder reader builder; PARQUET THROW_NOT_OK(
reader_builder.OpenFile(file_path, /*memory map=*/false, reader properties));
std::unique_ptr<parquet::arrow::FileReader> arrow reader; ARROW_ASSIGN_OR_RAISE (arrow_reader, reader_builder.Build());
Read the entire file as a table std::shared_ptr<arrow::Table> table;
PARQUET THROW_NOT_OK(arrow_reader->ReadTable(&table));
Il Print the table (for demonstration purposes) std::cout << "Read table:" << std::endl;
std::cout << table->ToString() << std::endl;
} catch (const std::exception& e) {
}
}
std::cerr << "Error reading encrypted parquet file: " << e.what() << std::endl; Can anyone plz suggest how to decrypt using python |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments
-
Hi @basu45. In your C++ example you are directly setting a footer key using the low-level encryption API. This isn't compatible with PyArrow, which only exposes the higher level "key management tools" API. With this API, the There is a design document that describes this API at https://docs.google.com/document/d/1bEu903840yb95k9q2X-BlsYKuXoygE4VnMDl9xz_zhk/edit?usp=sharing. You'll need to also use the key management tools API when writing files in C++ if you need compatibility with PyArrow. I don't think there are any examples or documentation on how to use it, but there are some tests here that could be helpful: https://github.com/apache/arrow/blob/main/cpp/src/parquet/encryption/key_management_test.cc |
Beta Was this translation helpful? Give feedback.
-
Hi @adamreeve , |
Beta Was this translation helpful? Give feedback.
Hi @basu45. In your C++ example you are directly setting a footer key using the low-level encryption API. This isn't compatible with PyArrow, which only exposes the higher level "key management tools" API. With this API, the
private_key
you have configured in yourInMemoryKmsClient
isn't the key that decrypts Parquet data, but is instead used to decrypt keys that have been randomly generated by the Arrow library and stored encrypted in the key metadata.There is a design document that describes this API at https://docs.google.com/document/d/1bEu903840yb95k9q2X-BlsYKuXoygE4VnMDl9xz_zhk/edit?usp=sharing.
You'll need to also use the key management tools API when writing files in C++ if you n…