File tree Expand file tree Collapse file tree 3 files changed +45
-2
lines changed Expand file tree Collapse file tree 3 files changed +45
-2
lines changed Original file line number Diff line number Diff line change 14
14
from llmstack .data .sources .base import BaseSource , DataDocument
15
15
from llmstack .data .sources .utils import (
16
16
create_source_document_asset ,
17
+ get_document_data_uri_from_objref ,
17
18
get_source_document_asset_by_objref ,
18
19
)
19
20
@@ -74,10 +75,15 @@ def provider_slug(cls):
74
75
return "promptly"
75
76
76
77
def get_data_documents (self , ** kwargs ) -> List [DataDocument ]:
78
+ archive_file = self .file
79
+ # If objref:// is present, get the data URI from the objref
80
+ if archive_file and archive_file .startswith ("objref://" ):
81
+ archive_file = get_document_data_uri_from_objref (archive_file , datasource_uuid = kwargs ["datasource_uuid" ])
82
+
77
83
if self .split_files :
78
- files = extract_archive_files (* validate_parse_data_uri (self . file ))
84
+ files = extract_archive_files (* validate_parse_data_uri (archive_file ))
79
85
else :
80
- files = [self . file ]
86
+ files = [archive_file ]
81
87
82
88
documents = []
83
89
for file in files :
Original file line number Diff line number Diff line change 10
10
from llmstack .data .sources .base import BaseSource , DataDocument
11
11
from llmstack .data .sources .utils import (
12
12
create_source_document_asset ,
13
+ get_document_data_uri_from_objref ,
13
14
get_source_document_asset_by_objref ,
14
15
)
15
16
@@ -48,6 +49,21 @@ def provider_slug(cls):
48
49
49
50
def get_data_documents (self , ** kwargs ) -> List [DataDocument ]:
50
51
files = self .file .split ("|" )
52
+ files = list (
53
+ filter (
54
+ lambda entry : entry is not None ,
55
+ list (
56
+ map (
57
+ lambda entry : (
58
+ get_document_data_uri_from_objref (file_objref , datasource_uuid = kwargs ["datasource_uuid" ])
59
+ if entry .startswith ("objref://" )
60
+ else entry
61
+ ),
62
+ files ,
63
+ )
64
+ ),
65
+ )
66
+ )
51
67
documents = []
52
68
for file in files :
53
69
file_id = str (uuid .uuid4 ())
Original file line number Diff line number Diff line change @@ -71,3 +71,24 @@ def get_source_document_asset_by_objref(objref):
71
71
pass
72
72
73
73
return asset
74
+
75
+
76
+ def get_document_data_uri_from_objref (objref , datasource_uuid ):
77
+ from llmstack .data .models import DataSourceEntryFiles
78
+
79
+ if not objref :
80
+ return None
81
+ asset = None
82
+ try :
83
+ _ , uuid = objref .strip ().split ("//" )[1 ].split ("/" )
84
+ asset_obj = DataSourceEntryFiles .objects .get (uuid = uuid )
85
+
86
+ if asset_obj .metadata .get ("datasource_uuid" ) != datasource_uuid :
87
+ return None
88
+
89
+ asset = DataSourceEntryFiles .get_asset_data_uri (asset_obj , include_name = True )
90
+
91
+ except Exception :
92
+ pass
93
+
94
+ return asset
You can’t perform that action at this time.
0 commit comments