@@ -71,6 +71,7 @@ python -m docext.app.app --concurrency_limit 10
71
71
import pandas as pd
72
72
import concurrent.futures
73
73
from gradio_client import Client, handle_file
74
+ from docext.core.file_converters.pdf_converter import PDFConverter
74
75
75
76
76
77
def dataframe_to_custom_dict (df : pd.DataFrame) -> dict :
@@ -110,6 +111,12 @@ fields_and_tables = dataframe_to_custom_dict(pd.DataFrame([
110
111
{" name" : " item_description" , " type" : " table" , " description" : " Item/Product description" }
111
112
# add more fields and table columns as needed
112
113
]))
114
+ # client url can be the local host or the public url like `https://6986bdd23daef6f7eb.gradio.live/`
115
+ CLIENT_URL = " http://localhost:7860"
116
+
117
+
118
+
119
+ # # ======= Image Inputs =======
113
120
114
121
file_inputs = [
115
122
{
@@ -119,21 +126,34 @@ file_inputs = [
119
126
]
120
127
121
128
# # send single request
122
- # ## client url can be the local host or the public url like `https://6986bdd23daef6f7eb.gradio.live/`
123
129
fields_df, tables_df = get_extracted_fields_and_tables(
124
- " http://localhost:7860 " , " admin" , " admin" , " hosted_vllm/Qwen/Qwen2.5-VL-7B-Instruct-AWQ" , fields_and_tables, file_inputs
130
+ CLIENT_URL , " admin" , " admin" , " hosted_vllm/Qwen/Qwen2.5-VL-7B-Instruct-AWQ" , fields_and_tables, file_inputs
125
131
)
126
132
print (" ========Fields:=========" )
127
133
print (fields_df)
128
134
print (" ========Tables:=========" )
129
135
print (tables_df)
130
136
131
137
138
+ # # ======= PDF Inputs =======
139
+
140
+ pdf_converter = PDFConverter()
141
+ document_pages = pdf_converter.convert_and_save_images(" assets/invoice_test.pdf" )
142
+ file_inputs = [{" image" : handle_file(page)} for page in document_pages]
143
+
144
+ fields_df, tables_df = get_extracted_fields_and_tables(
145
+ CLIENT_URL , " admin" , " admin" , " hosted_vllm/Qwen/Qwen2.5-VL-7B-Instruct-AWQ" , fields_and_tables, file_inputs
146
+ )
147
+ print (" ========Fields:=========" )
148
+ print (fields_df)
149
+ print (" ========Tables:=========" )
150
+ print (tables_df)
151
+
132
152
# # send multiple requests in parallel
133
153
# Define a wrapper function for parallel execution
134
154
def run_request ():
135
155
return get_extracted_fields_and_tables(
136
- " http://localhost:7860 " , " admin" , " admin" , " hosted_vllm/Qwen/Qwen2.5-VL-7B-Instruct-AWQ" , fields_and_tables, file_inputs
156
+ CLIENT_URL , " admin" , " admin" , " hosted_vllm/Qwen/Qwen2.5-VL-7B-Instruct-AWQ" , fields_and_tables, file_inputs
137
157
)
138
158
139
159
# Use ThreadPoolExecutor to send 10 requests in parallel
0 commit comments