microsoft
diff --git a/‎llm2clip/eva_clip/model_configs/EVA02-CLIP-L-14-336.json‎
Lines changed: 1 addition & 1 deletion b/‎llm2clip/eva_clip/model_configs/EVA02-CLIP-L-14-336.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llm2clip/run.sh‎
Lines changed: 8 additions & 8 deletions b/‎llm2clip/run.sh‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎llm2clip/training/data.py‎
Lines changed: 9 additions & 5 deletions b/‎llm2clip/training/data.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎llm2clip/training/main.py‎
Lines changed: 9 additions & 3 deletions b/‎llm2clip/training/main.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎llm2clip/training/train.py‎
Lines changed: 9 additions & 9 deletions b/‎llm2clip/training/train.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎llm2clip/training/zero_shot.py‎
Lines changed: 2 additions & 2 deletions b/‎llm2clip/training/zero_shot.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎llm_caption_contrastive/ac_zero2.yaml‎
Lines changed: 1 addition & 1 deletion b/‎llm_caption_contrastive/ac_zero2.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -1,7 +1,7 @@
 {
     "embed_dim": 1280,
     "vision_cfg": {
-        "image_size": 336,
+        "image_size": 448,
         "layers": 24,
         "width": 1024,
         "drop_path_rate": 0,
 
@@ -1,11 +1,11 @@
 MODEL=EVA02-CLIP-L-14-336
 PRETRAINED=eva_clip
-python -m torch.distributed.launch --nproc_per_node=2 \
+python -m torch.distributed.launch --nproc_per_node=4 \
 	--use_env training/main.py \
         --enable-deepspeed \
         --grad-checkpointing \
-        --name="mimic_B16448_8b_local" \
-        --save-frequency 10  \
+        --name="final_llm2clip_caption" \
+        --save-frequency 2  \
         --local-loss \
         --zeroshot-frequency 2 \
         --report-to="tensorboard, wandb" \
@@ -16,8 +16,8 @@ python -m torch.distributed.launch --nproc_per_node=2 \
         --pretrained=${PRETRAINED} \
         --precision "fp16" \
         --warmup 0 \
-        --batch-size=160 \
-        --eval-batch-size=160 \
+        --batch-size=128 \
+        --eval-batch-size=128 \
         --log-every-n-steps 200 \
         --epochs=20 \
         --lr=1e-5 \
@@ -35,14 +35,14 @@ python -m torch.distributed.launch --nproc_per_node=2 \
         --model=${MODEL} \
         --seed 4096 \
         --gather-with-grad \
-        --text-base="meta-llama/Meta-Llama-3.1-8B-Instruct" \
-        --llm2vec-path="/data/research/tmp/checkpoint-llama8b2_old/" \
+        --text-base="/model/llm2clip/llm2vec/8b_special/mntp/checkpoint-5779/" \
+        --llm2vec-path="/model/llm2clip/llm2vec/8b_special/supervised/checkpoint-12535/" \
         --force-custom-clip \
         --optimizer="ap_adamw" \
         --zero-stage=1 \
         --dataset-type "cxr" \
         --csv-img-key "img_path" \
-        --csv-caption-key "caption" \
+        --csv-caption-key "caption2_lite" \
         --rsna "/data/research/csv/rsna_test.csv" \
         --siim "/data/research/csv/siim_test.csv" \
         --openi "/data/csv/llm2clip/openi_clip_val.csv" \
 
@@ -51,17 +51,21 @@
 from PIL import Image
 import torch
 
-def apply_dropout(text):
+def apply_dropout(text, age_dropout=0.3, view_dropout=0.3, gender_dropout=0.3, bmi_dropout=0.3):
     # 30% chance to drop each attribute
-    if random.random() < 0.3:
+    if random.random() < view_dropout:
         # Replace view position
         text = re.sub(r"This is a (\w+) view", "This is a unknown view", text)
 
-    if random.random() < 0.3:
+    if random.random() < age_dropout:
         # Replace age
-        text = re.sub(r"The patient is (\d+) years old", "The patient is unknown years old", text)
+        text = re.sub(r"The patient's age is (\d+)", "The patient's age is unknown", text)
+
+    if random.random() < bmi_dropout:
+        # Replace age
+        text = re.sub(r"The patient's bmi is (\d+)", "The patient's bmi is unknown", text)
 
-    if random.random() < 0.3:
+    if random.random() < gender_dropout:
         # Replace gender
         text = re.sub(r"The patient's gender is (\w+)", "The patient's gender is unknown", text)
 
 
@@ -4,7 +4,7 @@
 import random
 from datetime import datetime
 sys.path.append(os.getcwd())
-
+from peft import PeftModel
 import numpy as np
 import torch
 from torch.cuda.amp import GradScaler
@@ -143,19 +143,25 @@ def main(args):
         cache_dir=args.cache_dir,
         skip_list=args.skip_list,
     )
-
+    logging.info("text_model is loading...")
     random_seed(args.seed, args.rank)
     if args.llm2vec_path:   
         print("Using LLM2Vec")
         text_model = LLM2Vec.from_pretrained(
             base_model_name_or_path=args.text_base,
             enable_bidirectional=True,
-            peft_model_name_or_path=args.llm2vec_path,
+            peft_model_name_or_path=args.text_base,
             merge_peft=True,
             pooling_mode="mean",
             max_length=512,
             torch_dtype=torch.bfloat16,
         )
+        text_model.model = PeftModel.from_pretrained(
+            text_model.model,
+            args.llm2vec_path,
+            )
+        
+        text_model.model = text_model.model.merge_and_unload()
         # Add a trainable projection layer
         projection_layer = nn.Sequential(
             nn.LayerNorm(text_model.config.hidden_size),
 
@@ -259,12 +259,12 @@ def evaluate_iter(model, tokenizer, data, iter_nums, epoch, args, tb_writer=None
     model.eval()
     l2v = LLM2Vec(model.text.model, tokenizer, pooling_mode="mean", max_length=512) #TODO: modify this
     print('evaluating retrieval')
-    with torch.no_grad():
-        retrieval_zero_shot_metrics = retrieval_eval(model, l2v, data, epoch, args)
-        metrics.update(retrieval_zero_shot_metrics)
-        zero_shot_metrics = zero_shot_eval(model, l2v, data, epoch, args)
-        metrics.update(zero_shot_metrics)
-    print(zero_shot_metrics)
+    # with torch.no_grad():
+    #     retrieval_zero_shot_metrics = retrieval_eval(model, l2v, data, epoch, args)
+    #     metrics.update(retrieval_zero_shot_metrics)
+        # zero_shot_metrics = zero_shot_eval(model, l2v, data, epoch, args)
+        # metrics.update(zero_shot_metrics)
+    # print(zero_shot_metrics)
     autocast = get_autocast(args.precision)
     cast_dtype = get_cast_dtype(args.precision)
     if 'val' in data:
@@ -356,9 +356,9 @@ def evaluate(model, tokenizer, data, epoch, args, tb_writer=None):
     l2v = LLM2Vec(model.text.model, tokenizer, pooling_mode="mean", max_length=512) #TODO: modify this
     retrieval_zero_shot_metrics = retrieval_eval(model, l2v, data, epoch, args)
     metrics.update(retrieval_zero_shot_metrics)
-    zero_shot_metrics = zero_shot_eval(model, l2v, data, epoch, args)
-    metrics.update(zero_shot_metrics)
-    print(zero_shot_metrics)
+    # zero_shot_metrics = zero_shot_eval(model, l2v, data, epoch, args)
+    # metrics.update(zero_shot_metrics)
+    # print(zero_shot_metrics)
     autocast = get_autocast(args.precision)
     cast_dtype = get_cast_dtype(args.precision)
 
 
@@ -160,7 +160,7 @@ def zero_shot_eval(model, l2v, data, epoch, args):
     # Add medical condition evaluation
     if 'rsna' in data:
         text_categories = {
-            'pneumonia': ["Detected abnormalities : There is pneumonia.", "Detected abnormalities : There is no pneumonia"],
+            'pneumonia': ["pneumonia is present", "there is no pneumonia"],
         }
 
         logging.info('Building medical zero-shot classifier')
@@ -175,7 +175,7 @@ def zero_shot_eval(model, l2v, data, epoch, args):
 
     if 'siim' in data:
         text_categories = {
-            'pneumothorax': ['Detected abnormalities : There is pneumothorax.', 'Detected abnormalities : There is no pneumothorax.']
+            'pneumothorax': ['pneumothorax is present', 'there is no pneumothorax']
         }
         logging.info('Building medical zero-shot classifier')
         medical_classifier = zero_shot_classifier_medical(model, text_categories, l2v, args)
 
@@ -15,7 +15,7 @@ machine_rank: 0
 main_training_function: main
 mixed_precision: bf16
 num_machines: 1
-num_processes: 2
+num_processes: 4
 rdzv_backend: static
 same_network: true
 tpu_env: []
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"embed_dim": 1280,`
`3`	`3`	`"vision_cfg": {`
`4`		`- "image_size": 336,`
	`4`	`+ "image_size": 448,`
`5`	`5`	`"layers": 24,`
`6`	`6`	`"width": 1024,`
`7`	`7`	`"drop_path_rate": 0,`