Merge pull request #419 from djarecka/ref_bids2nidm

dbkeator · web-flow · commit e5fc96bf0e2b · 2025-11-17T15:26:12.000-08:00
bids2nidm refactoring
diff --git a/src/nidm/experiment/Utils.py b/src/nidm/experiment/Utils.py
@@ -3071,9 +3071,9 @@ def addGitAnnexSources(obj, bids_root, filepath=None):
             obj.add_attributes({Constants.PROV["Location"]: URIRef(source)})
 
         return len(sources)
-    except Exception:
-        # if "No annex found at" not in str(e):
-        #    print("Warning, error with AnnexRepo (Utils.py, addGitAnnexSources):", e)
+    except Exception as e:
+        if "No annex found at" not in str(e):
+            print("Warning, error with AnnexRepo (Utils.py, addGitAnnexSources):", e)
         return 0
 
 
diff --git a/src/nidm/experiment/tools/bidsmri2nidm.py b/src/nidm/experiment/tools/bidsmri2nidm.py
@@ -167,36 +167,20 @@ def main():
 
     # if args.outputfile was defined by user then use it else use default which is args.directory/nidm.ttl
     if args.outputfile == "nidm.ttl":
-        # if we're choosing json-ld, make sure file extension is .json
-        # if args.jsonld:
-        #     outputfile=os.path.join(directory,os.path.splitext(args.outputfile)[0]+".json")
-        # if flag set to add to .bidsignore then add
-        #     if (args.bidsignore):
-        #         addbidsignore(directory,os.path.splitext(args.outputfile)[0]+".json")
-
         outputfile = os.path.join(directory, args.outputfile)
-        if args.bidsignore:
-            addbidsignore(directory, args.outputfile)
-        rdf_graph.serialize(destination=outputfile, format="turtle")
-
-        # else:
-        #     outputfile=os.path.join(directory,args.outputfile)
-        #     if (args.bidsignore):
-        #         addbidsignore(directory,args.outputfile)
     else:
-        # if we're choosing json-ld, make sure file extension is .json
-        # if args.jsonld:
-        #     outputfile = os.path.splitext(args.outputfile)[0]+".json"
-        #     if (args.bidsignore):
-        #         addbidsignore(directory,os.path.splitext(args.outputfile)[0]+".json")
-        #  else:
-        #     outputfile = args.outputfile
-        #     if (args.bidsignore):
-        #         addbidsignore(directory,args.outputfile)
         outputfile = args.outputfile
-        if args.bidsignore:
-            addbidsignore(directory, args.outputfile)
-        rdf_graph.serialize(destination=outputfile, format="turtle")
+
+    # if we're choosing json-ld, make sure file extension is .json
+    # if args.jsonld:
+    #     outputfile=os.path.join(directory,os.path.splitext(args.outputfile)[0]+".json")
+    # if flag set to add to .bidsignore then add
+    #     if (args.bidsignore):
+    #         addbidsignore(directory,os.path.splitext(args.outputfile)[0]+".json")
+
+    if args.bidsignore:
+        addbidsignore(directory, args.outputfile)
+    rdf_graph.serialize(destination=outputfile, format="turtle")
 
     # serialize NIDM file
     # with open(outputfile,'w', encoding="utf-8") as f:
@@ -1072,82 +1056,30 @@ def bidsmri2project(directory, args):
                 # defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                 # if participants.json file doesn't exist then run without json mapping file
                 if not os.path.isfile(os.path.join(directory, "participants.json")):
-                    # temporary data frame of variables we need to create data dictionaries for
-                    temp = DataFrame(columns=mapping_list)
-                    # create data dictionary without concept mapping
-                    if args.no_concepts:
-                        column_to_terms, cde = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name="participants.tsv",
-                            df=temp,
-                            output_file=os.path.join(directory, "participants.json"),
-                            bids=True,
-                            associate_concepts=False,
-                            dataset_identifier=dataset_doi,
-                        )
-                    # create data dictionary with concept mapping
-                    else:
-                        column_to_terms, cde = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name="participants.tsv",
-                            df=temp,
-                            output_file=os.path.join(directory, "participants.json"),
-                            bids=True,
-                            dataset_identifier=dataset_doi,
-                        )
+                    json_source = None
                 else:
-                    # temporary data frame of variables we need to create data dictionaries for
-                    temp = DataFrame(columns=mapping_list)
-                    # create data dictionary without concept mapping
-                    if args.no_concepts:
-                        column_to_terms, cde = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name="participants.tsv",
-                            df=temp,
-                            output_file=os.path.join(directory, "participants.json"),
-                            json_source=os.path.join(directory, "participants.json"),
-                            bids=True,
-                            associate_concepts=False,
-                            dataset_identifier=dataset_doi,
-                        )
-                    # create data dictionary with concept mapping
-                    else:
-                        column_to_terms, cde = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name="participants.tsv",
-                            df=temp,
-                            output_file=os.path.join(directory, "participants.json"),
-                            json_source=os.path.join(directory, "participants.json"),
-                            bids=True,
-                            dataset_identifier=dataset_doi,
-                        )
-            # if user supplied a JSON data dictionary then use it
-            else:
-                # temporary data frame of variables we need to create data dictionaries for
-                temp = DataFrame(columns=mapping_list)
-                # create data dictionary without concept mapping
-                if args.no_concepts:
-                    column_to_terms, cde = map_variables_to_terms(
-                        directory=directory,
-                        assessment_name="participants.tsv",
-                        df=temp,
-                        output_file=os.path.join(directory, "participants.json"),
-                        json_source=args.json_map,
-                        bids=True,
-                        associate_concepts=False,
-                        dataset_identifier=dataset_doi,
-                    )
-                # create data dictionary with concept mapping
-                else:
-                    column_to_terms, cde = map_variables_to_terms(
-                        directory=directory,
-                        assessment_name="participants.tsv",
-                        df=temp,
-                        output_file=os.path.join(directory, "participants.json"),
-                        json_source=args.json_map,
-                        bids=True,
-                        dataset_identifier=dataset_doi,
-                    )
+                    json_source = os.path.join(directory, "participants.json")
+            else:  # if user supplied a JSON data dictionary then use it
+                json_source = args.json_map
+            # create data dictionary without concept mapping
+            if args.no_concepts:
+                associate_concepts = False
+            else:  # create data dictionary with concept mapping
+                associate_concepts = True
+
+            # temporary data frame of variables we need to create data dictionaries for
+            temp = DataFrame(columns=mapping_list)
+
+            column_to_terms, cde = map_variables_to_terms(
+                directory=directory,
+                assessment_name="participants.tsv",
+                df=temp,
+                output_file=os.path.join(directory, "participants.json"),
+                json_source=json_source,
+                bids=True,
+                associate_concepts=associate_concepts,
+                dataset_identifier=dataset_doi,
+            )
 
             # iterate over rows in participants.tsv file and create NIDM objects for sessions and acquisitions
             for row in participants_data:
@@ -1386,76 +1318,33 @@ def bidsmri2project(directory, args):
                     # add column to list for column_to_terms mapping
                     mapping_list.append(field)
 
-            # if user didn't supply a json data dictionary file
-            # create an empty one for column_to_terms to use
+            # if user didn't supply a json data dictionary file but we're doing some variable-term mapping create an empty one
+            # for column_to_terms to use
             if args.json_map is False:
                 # defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                 # if participants.json file doesn't exist then run without json mapping file
                 if not os.path.isfile(os.path.splitext(tsv_file)[0] + ".json"):
-                    # maps variables in CSV file to terms
-                    temp = DataFrame(columns=mapping_list)
-                    if args.no_concepts:
-                        column_to_terms_pheno, cde_tmp = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name=tsv_file,
-                            df=temp,
-                            output_file=os.path.splitext(tsv_file)[0] + ".json",
-                            bids=True,
-                            associate_concepts=False,
-                        )
-                    else:
-                        column_to_terms_pheno, cde_tmp = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name=tsv_file,
-                            df=temp,
-                            output_file=os.path.splitext(tsv_file)[0] + ".json",
-                            bids=True,
-                        )
-                else:
-                    # maps variables in CSV file to terms
-                    temp = DataFrame(columns=mapping_list)
-                    if args.no_concepts:
-                        column_to_terms_pheno, cde_tmp = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name=tsv_file,
-                            df=temp,
-                            output_file=os.path.splitext(tsv_file)[0] + ".json",
-                            json_source=os.path.splitext(tsv_file)[0] + ".json",
-                            bids=True,
-                            associate_concepts=False,
-                        )
-                    else:
-                        column_to_terms_pheno, cde_tmp = map_variables_to_terms(
-                            directory=directory,
-                            assessment_name=tsv_file,
-                            df=temp,
-                            output_file=os.path.splitext(tsv_file)[0] + ".json",
-                            json_source=os.path.splitext(tsv_file)[0] + ".json",
-                            bids=True,
-                        )
-            # else user did supply a json data dictionary so use it
-            else:
-                # maps variables in CSV file to terms
-                temp = DataFrame(columns=mapping_list)
-                if args.no_concepts:
-                    column_to_terms_pheno, cde_tmp = map_variables_to_terms(
-                        directory=directory,
-                        assessment_name=tsv_file,
-                        df=temp,
-                        output_file=os.path.splitext(tsv_file)[0] + ".json",
-                        json_source=args.json_map,
-                        bids=True,
-                        associate_concepts=False,
-                    )
+                    json_source = None
                 else:
-                    column_to_terms_pheno, cde_tmp = map_variables_to_terms(
-                        directory=directory,
-                        assessment_name=tsv_file,
-                        df=temp,
-                        output_file=os.path.splitext(tsv_file)[0] + ".json",
-                        json_source=args.json_map,
-                        bids=True,
-                    )
+                    json_source = os.path.splitext(tsv_file)[0] + ".json"
+            else:  # if user supplied a JSON data dictionary then use it
+                json_source = args.json_map
+            # create data dictionary without concept mapping
+            if args.no_concepts:
+                associate_concepts = False
+            else:  # create data dictionary with concept mapping
+                associate_concepts = True
+            # maps variables in CSV file to terms
+            temp = DataFrame(columns=mapping_list)
+            column_to_terms_pheno, cde_tmp = map_variables_to_terms(
+                directory=directory,
+                assessment_name=tsv_file,
+                df=temp,
+                output_file=os.path.splitext(tsv_file)[0] + ".json",
+                json_source=json_source,
+                bids=True,
+                associate_concepts=associate_concepts,
+            )
 
             for row in pheno_data:
                 subjid = row["participant_id"].split("-")