|
5 | 5 | import pandas as pd
|
6 | 6 | import numpy as np
|
7 | 7 | from app.models.request_models import Example, Example_eval
|
8 |
| -from app.core.config import UseCase, Technique, ModelFamily, get_model_family,USE_CASE_CONFIGS, LENDING_DATA_PROMPT |
| 8 | +from app.core.config import UseCase, Technique, ModelFamily, get_model_family,USE_CASE_CONFIGS, LENDING_DATA_PROMPT, USE_CASE_CONFIGS_EVALS |
9 | 9 | from app.core.data_loader import DataLoader
|
10 | 10 | from app.core.data_analyser import DataAnalyser
|
11 | 11 | from app.core.summary_formatter import SummaryFormatter
|
|
73 | 73 | );
|
74 | 74 | """
|
75 | 75 |
|
| 76 | + |
| 77 | + |
76 | 78 | DEFAULT_freeform_TEXT2SQL_PROMPT = """Requirements:
|
77 | 79 | - Each solution must be a working SQL query
|
78 | 80 | - Include explanations where needed
|
|
92 | 94 | - "question"
|
93 | 95 | - "solution"""
|
94 | 96 |
|
| 97 | +Default_freeform_lending_data_prompt = """ |
| 98 | + You need to create profile data for the LendingClub company which specialises in lending various types of loans to urban customers. |
| 99 | + |
| 100 | +
|
| 101 | +You need to generate the data in the same order for the following fields (description of each field is followed after the colon): |
| 102 | +
|
| 103 | +loan_amnt: The listed amount of the loan applied for by the borrower. If at some point in time, the credit department reduces the loan amount, then it will be reflected in this value. |
| 104 | +term: The number of payments on the loan. Values are in months and can be either 36 months or 60 months. |
| 105 | +int_rate: Interest Rate on the loan |
| 106 | +installment: The monthly payment owed by the borrower if the loan originates. |
| 107 | +grade: LC assigned loan grade (Possible values: A, B, C, D, E, F, G) |
| 108 | +sub_grade: LC assigned loan subgrade (Possible sub-values: 1-5 i.e A5) |
| 109 | +emp_title: The job title supplied by the Borrower when applying for the loan. |
| 110 | +emp_length: Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years. |
| 111 | +home_ownership: The home ownership status provided by the borrower during registration or obtained from the credit report. Our values are: RENT, OWN, MORTGAGE, OTHER |
| 112 | +annual_inc: The self-reported annual income provided by the borrower during registration. |
| 113 | +verification_status: Indicates if income was verified by LC, not verified, or if the income source was verified |
| 114 | +issue_d: The month which the loan was funded |
| 115 | +loan_status: Current status of the loan |
| 116 | +purpose: A category provided by the borrower for the loan request. |
| 117 | +title: The loan title provided by the borrower |
| 118 | +dti: A ratio calculated using the borrower’s total monthly debt payments on the total debt obligations, excluding mortgage and the requested LC loan, divided by the borrower’s self-reported monthly income. |
| 119 | +earliest_cr_line: The month the borrower's earliest reported credit line was opened |
| 120 | +open_acc: The number of open credit lines in the borrower's credit file. |
| 121 | +pub_rec: Number of derogatory public records |
| 122 | +revol_bal: Total credit revolving balance |
| 123 | +revol_util: Revolving line utilization rate, or the amount of credit the borrower is using relative to all available revolving credit. |
| 124 | +total_acc: The total number of credit lines currently in the borrower's credit file |
| 125 | +initial_list_status: The initial listing status of the loan. Possible values are – W, F |
| 126 | +application_type: Indicates whether the loan is an individual application or a joint application with two co-borrowers |
| 127 | +mort_acc: Number of mortgage accounts. |
| 128 | +pub_rec_bankruptcies: Number of public record bankruptcies |
| 129 | +address: The physical address of the person |
| 130 | +
|
| 131 | +Ensure PII from examples such as addresses are not used in the generated data to minimize any privacy concerns. |
| 132 | +""" |
| 133 | + |
95 | 134 | DEFAULT_TEXT2SQL_PROMPT = """Requirements:
|
96 | 135 | - Each solution must be a working SQL query
|
97 | 136 | - Include explanations where needed
|
@@ -135,9 +174,10 @@ class PromptHandler:
|
135 | 174 | def format_examples(examples: List[Example]) -> str:
|
136 | 175 | """Format examples as JSON string"""
|
137 | 176 | return [
|
138 |
| - {"question": example.question, "solution": example.solution} |
139 |
| - for example in (examples) |
140 |
| - ] |
| 177 | + {"question": example.question, "solution": example.solution} |
| 178 | + for example in (examples) |
| 179 | + ] |
| 180 | + |
141 | 181 | @staticmethod
|
142 | 182 | def format_examples_eval(examples: List[Example_eval]) -> str:
|
143 | 183 | """Format examples as JSON string"""
|
@@ -196,17 +236,7 @@ def get_freeform_default_custom_prompt(use_case:UseCase, custom_prompt):
|
196 | 236 | @staticmethod
|
197 | 237 | def get_default_custom_eval_prompt(use_case:UseCase, custom_prompt):
|
198 | 238 | if custom_prompt == None:
|
199 |
| - if use_case == UseCase.TEXT2SQL: |
200 |
| - custom_prompt = DEFAULT_TEXT2SQL_EVAL_PROMPT |
201 |
| - |
202 |
| - return custom_prompt |
203 |
| - elif use_case == UseCase.CODE_GENERATION: |
204 |
| - custom_prompt = DEFAULT_CODE_GENERATION_EVAL_PROMPT |
205 |
| - return custom_prompt |
206 |
| - |
207 |
| - elif use_case == UseCase.CUSTOM: |
208 |
| - custom_prompt = " " |
209 |
| - return custom_prompt |
| 239 | + return USE_CASE_CONFIGS_EVALS[use_case].prompt |
210 | 240 | else:
|
211 | 241 | return custom_prompt
|
212 | 242 | @staticmethod
|
@@ -536,7 +566,13 @@ def get_freeform_eval_prompt(model_id: str,
|
536 | 566 | custom_prompt = Optional[str]
|
537 | 567 | ) -> str:
|
538 | 568 | custom_prompt_str = PromptHandler.get_default_custom_eval_prompt(use_case, custom_prompt)
|
539 |
| - examples_str = PromptHandler.get_default_eval_example(use_case, examples) |
| 569 | + #examples_str = PromptHandler.get_default_eval_example(use_case, examples) |
| 570 | + |
| 571 | + if examples: |
| 572 | + examples_str = PromptHandler.format_examples_eval(examples) |
| 573 | + |
| 574 | + elif examples == [] or examples == None: |
| 575 | + examples_str = PromptHandler.format_examples_eval(USE_CASE_CONFIGS_EVALS[use_case].default_examples) |
540 | 576 |
|
541 | 577 | base_prompt = """ You are a brilliant judge on evaluating a set of data with fields and corresponding values
|
542 | 578 | Follow the given instructions to understand the structure of given data and evaluate it based on parameters defined for you."""
|
@@ -1003,11 +1039,14 @@ def json_serializable(obj):
|
1003 | 1039 | examples_str = json.dumps(example_custom, indent=2)
|
1004 | 1040 |
|
1005 | 1041 | else:
|
1006 |
| - if use_case == UseCase.CODE_GENERATION or use_case == UseCase.TEXT2SQL: |
1007 |
| - examples_str = json.dumps(USE_CASE_CONFIGS[use_case].default_examples) |
1008 |
| - else: |
1009 |
| - examples_str = None |
1010 |
| - custom_prompt_default = PromptHandler.get_freeform_default_custom_prompt(use_case, custom_prompt) |
| 1042 | + #if use_case == UseCase.CODE_GENERATION or use_case == UseCase.TEXT2SQL or use_case == UseCase.LENDING_DATA: |
| 1043 | + examples_str = json.dumps(USE_CASE_CONFIGS[use_case].default_examples) |
| 1044 | + |
| 1045 | + if custom_prompt is None: |
| 1046 | + custom_prompt_default = USE_CASE_CONFIGS[use_case].prompt |
| 1047 | + else: |
| 1048 | + custom_prompt_default = custom_prompt |
| 1049 | + #custom_prompt_default = PromptHandler.get_freeform_default_custom_prompt(use_case, custom_prompt) |
1011 | 1050 | schema_str = PromptHandler.get_default_schema(use_case, schema)
|
1012 | 1051 | if use_case ==UseCase.TEXT2SQL:
|
1013 | 1052 | custom_prompt_str = f"""Using this database schema:
|
|
0 commit comments