elastic
diff --git a/‎docs/reference/enrich-processor/index.md
Lines changed: 3 additions & 0 deletions b/‎docs/reference/enrich-processor/index.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/reference/enrich-processor/toc.yml
Lines changed: 1 addition & 0 deletions b/‎docs/reference/enrich-processor/toc.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/reference/enrich-processor/xml-processor.md
Lines changed: 281 additions & 0 deletions b/‎docs/reference/enrich-processor/xml-processor.md
Lines changed: 281 additions & 0 deletions
diff --git a/‎modules/ingest-common/src/main/java/module-info.java
Lines changed: 2 additions & 0 deletions b/‎modules/ingest-common/src/main/java/module-info.java
Lines changed: 2 additions & 0 deletions
diff --git a/‎modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java
Lines changed: 2 additions & 1 deletion b/‎modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java
Lines changed: 2 additions & 1 deletion
@@ -159,6 +159,9 @@ Refer to [Enrich your data](docs-content://manage-data/ingest/transform-enrich/d
 [`split` processor](/reference/enrich-processor/split-processor.md)
 :   Splits a field into an array of values.
 
+[`xml` processor](/reference/enrich-processor/xml-processor.md)
+:   Parses XML documents and converts them to JSON objects.
+
 [`trim` processor](/reference/enrich-processor/trim-processor.md)
 :   Trims whitespace from field.
 
 
@@ -46,3 +46,4 @@ toc:
   - file: urldecode-processor.md
   - file: uri-parts-processor.md
   - file: user-agent-processor.md
+  - file: xml-processor.md
@@ -0,0 +1,281 @@
+---
+navigation_title: "XML"
+mapped_pages:
+  - https://www.elastic.co/guide/en/elasticsearch/reference/current/xml-processor.html
+---
+
+# XML processor [xml-processor]
+
+
+Parses XML documents and converts them to JSON objects using a streaming XML parser. This processor efficiently handles XML data by avoiding loading the entire document into memory.
+
+$$$xml-options$$$
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `field` | yes | - | The field containing the XML string to be parsed. |
+| `target_field` | no | `field` | The field that the converted structured object will be written into. Any existing content in this field will be overwritten. |
+| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document. |
+| `ignore_failure` | no | `false` | Ignore failures for the processor. See [Handling pipeline failures](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#handling-pipeline-failures). |
+| `to_lower` | no | `false` | Convert XML element names to lowercase. |
+| `ignore_empty_value` | no | `false` | If `true`, the processor will filter out null and empty values from the parsed XML structure, including empty elements, elements with null values, and elements with whitespace-only content. |
+| `description` | no | - | Description of the processor. Useful for describing the purpose of the processor or its configuration. |
+| `if` | no | - | Conditionally execute the processor. See [Conditionally run a processor](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#conditionally-run-processor). |
+| `on_failure` | no | - | Handle failures for the processor. See [Handling pipeline failures](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#handling-pipeline-failures). |
+| `tag` | no | - | Identifier for the processor. Useful for debugging and metrics. |
+
+## Configuration
+
+```js
+{
+  "xml": {
+    "field": "xml_field",
+    "target_field": "parsed_xml",
+    "ignore_empty_value": true
+  }
+}
+```
+
+## Examples
+
+### Basic XML parsing
+
+```console
+POST _ingest/pipeline/_simulate
+{
+  "pipeline": {
+    "processors": [
+      {
+        "xml": {
+          "field": "xml_content"
+        }
+      }
+    ]
+  },
+  "docs": [
+    {
+      "_source": {
+        "xml_content": "<catalog><book><author>William H. Gaddis</author><title>The Recognitions</title><review>One of the great seminal American novels.</review></book></catalog>"
+      }
+    }
+  ]
+}
+```
+
+Result:
+
+```console-result
+{
+  "docs": [
+    {
+      "doc": {
+        "_index": "_index",
+        "_id": "_id",
+        "_version": "-3",
+        "_source": {
+          "xml_content": "<catalog><book><author>William H. Gaddis</author><title>The Recognitions</title><review>One of the great seminal American novels.</review></book></catalog>",
+          "catalog": {
+            "book": {
+              "author": "William H. Gaddis",
+              "title": "The Recognitions",
+              "review": "One of the great seminal American novels."
+            }
+          }
+        },
+        "_ingest": {
+          "timestamp": "2019-03-11T21:54:37.909224Z"
+        }
+      }
+    }
+  ]
+}
+```
+
+### Filtering empty values
+
+When `ignore_empty_value` is set to `true`, the processor will remove empty elements from the parsed XML:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+  "pipeline": {
+    "processors": [
+      {
+        "xml": {
+          "field": "xml_content",
+          "target_field": "parsed_xml",
+          "ignore_empty_value": true
+        }
+      }
+    ]
+  },
+  "docs": [
+    {
+      "_source": {
+        "xml_content": "<catalog><book><author>William H. Gaddis</author><title></title><review>One of the great seminal American novels.</review><empty/><nested><empty_text>   </empty_text><valid_content>Some content</valid_content></nested></book><empty_book></empty_book></catalog>"
+      }
+    }
+  ]
+}
+```
+
+Result with empty elements filtered out:
+
+```console-result
+{
+  "docs": [
+    {
+      "doc": {
+        "_index": "_index",
+        "_id": "_id",
+        "_version": "-3",
+        "_source": {
+          "xml_content": "<catalog><book><author>William H. Gaddis</author><title></title><review>One of the great seminal American novels.</review><empty/><nested><empty_text>   </empty_text><valid_content>Some content</valid_content></nested></book><empty_book></empty_book></catalog>",
+          "parsed_xml": {
+            "catalog": {
+              "book": {
+                "author": "William H. Gaddis",
+                "review": "One of the great seminal American novels.",
+                "nested": {
+                  "valid_content": "Some content"
+                }
+              }
+            }
+          }
+        },
+        "_ingest": {
+          "timestamp": "2019-03-11T21:54:37.909224Z"
+        }
+      }
+    }
+  ]
+}
+```
+
+### Converting element names to lowercase
+
+```console
+POST _ingest/pipeline/_simulate
+{
+  "pipeline": {
+    "processors": [
+      {
+        "xml": {
+          "field": "xml_content",
+          "to_lower": true
+        }
+      }
+    ]
+  },
+  "docs": [
+    {
+      "_source": {
+        "xml_content": "<Catalog><Book><Author>William H. Gaddis</Author><Title>The Recognitions</Title></Book></Catalog>"
+      }
+    }
+  ]
+}
+```
+
+Result:
+
+```console-result
+{
+  "docs": [
+    {
+      "doc": {
+        "_index": "_index",
+        "_id": "_id",
+        "_version": "-3",
+        "_source": {
+          "xml_content": "<Catalog><Book><Author>William H. Gaddis</Author><Title>The Recognitions</Title></Book></Catalog>",
+          "catalog": {
+            "book": {
+              "author": "William H. Gaddis",
+              "title": "The Recognitions"
+            }
+          }
+        },
+        "_ingest": {
+          "timestamp": "2019-03-11T21:54:37.909224Z"
+        }
+      }
+    }
+  ]
+}
+```
+
+### Handling XML attributes
+
+XML attributes are included as properties in the resulting JSON object alongside element content:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+  "pipeline": {
+    "processors": [
+      {
+        "xml": {
+          "field": "xml_content"
+        }
+      }
+    ]
+  },
+  "docs": [
+    {
+      "_source": {
+        "xml_content": "<catalog version=\"1.0\"><book id=\"123\" isbn=\"978-0-684-80335-9\"><title lang=\"en\">The Recognitions</title><author nationality=\"American\">William H. Gaddis</author></book></catalog>"
+      }
+    }
+  ]
+}
+```
+
+Result:
+
+```console-result
+{
+  "docs": [
+    {
+      "doc": {
+        "_index": "_index",
+        "_id": "_id",
+        "_version": "-3",
+        "_source": {
+          "xml_content": "<catalog version=\"1.0\"><book id=\"123\" isbn=\"978-0-684-80335-9\"><title lang=\"en\">The Recognitions</title><author nationality=\"American\">William H. Gaddis</author></book></catalog>",
+          "catalog": {
+            "version": "1.0",
+            "book": {
+              "id": "123",
+              "isbn": "978-0-684-80335-9",
+              "title": {
+                "lang": "en",
+                "#text": "The Recognitions"
+              },
+              "author": {
+                "nationality": "American",
+                "#text": "William H. Gaddis"
+              }
+            }
+          }
+        },
+        "_ingest": {
+          "timestamp": "2019-03-11T21:54:37.909224Z"
+        }
+      }
+    }
+  ]
+}
+```
+
+## XML features
+
+The XML processor supports:
+
+- **Elements with text content**: Converted to key-value pairs where the element name is the key and text content is the value
+- **Nested elements**: Converted to nested JSON objects
+- **Empty elements**: Converted to `null` values (can be filtered with `ignore_empty_value`)
+- **Repeated elements**: Converted to arrays when multiple elements with the same name exist at the same level
+- **XML attributes**: Included as properties in the JSON object alongside element content. When an element has both attributes and text content, the text is stored under a special `#text` key
+- **Mixed content**: Elements with both text and child elements include text under a special `#text` key while attributes and child elements become object properties
+- **Namespaces**: Local names are used, namespace prefixes are ignored
@@ -19,6 +19,8 @@
     requires org.apache.logging.log4j;
     requires org.apache.lucene.analysis.common;
     requires org.jruby.joni;
+    
+    requires java.xml;
 
     exports org.elasticsearch.ingest.common; // for painless
 
 
@@ -74,7 +74,8 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
             entry(TrimProcessor.TYPE, new TrimProcessor.Factory()),
             entry(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory()),
             entry(UppercaseProcessor.TYPE, new UppercaseProcessor.Factory()),
-            entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory())
+            entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()),
+            entry(XmlProcessor.TYPE, new XmlProcessor.Factory())
         );
     }
Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,8 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet`
`74`	`74`	`entry(TrimProcessor.TYPE, new TrimProcessor.Factory()),`
`75`	`75`	`entry(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory()),`
`76`	`76`	`entry(UppercaseProcessor.TYPE, new UppercaseProcessor.Factory()),`
`77`		`- entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory())`
	`77`	`+ entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()),`
	`78`	`+ entry(XmlProcessor.TYPE, new XmlProcessor.Factory())`
`78`	`79`	`);`
`79`	`80`	`}`
`80`	`81`