Skip to content

Commit bd71a8b

Browse files
committed
Tweaks
1 parent 1c9c266 commit bd71a8b

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

corpus.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919

2020

2121
def ask_contextful_prompt(prompt, embeddings):
22+
print(f"supplied prompt: {prompt}")
2223
relevant = order_document_sections_by_query_similarity(prompt, embeddings)[:5]
23-
print("relevant", relevant)
24+
print(f"relevant: {relevant}")
2425

2526
chosen_sections = []
2627
chosen_sections_len = 0
@@ -33,10 +34,9 @@ def ask_contextful_prompt(prompt, embeddings):
3334
# so just working around with this for simplicity sake,
3435
# probably because I was doing something silly earlier but tbh
3536
# don't feel like reworking it :-)
36-
df_context = build_corpus()
37+
df_context = build_corpus()
3738
by_key = { (r.title, r.heading): r for _, r in df_context.iterrows()}
3839

39-
4040
for _, section_index in relevant:
4141
# Add contexts until we run out of space.
4242
document_section = by_key[section_index]
@@ -46,7 +46,7 @@ def ask_contextful_prompt(prompt, embeddings):
4646
break
4747
chosen_sections.append(SEPARATOR + document_section.content.replace("\n", " "))
4848
chosen_sections_indexes.append(str(section_index))
49-
49+
5050
return ask_prompt(prompt, context="".join(chosen_sections))
5151

5252

@@ -55,7 +55,7 @@ def ask_prompt(prompt, context=None):
5555
context_str = ""
5656
if context:
5757
context_str = f"\nContext:\n {context}"
58-
58+
5959
templated_prompt = f"""Answer the question as truthfully as possible, and if you're unsure of the answer, say "Sorry, I don't know".
6060
{context_str}
6161
@@ -65,7 +65,7 @@ def ask_prompt(prompt, context=None):
6565
A:
6666
"""
6767

68-
print(templated_prompt)
68+
print("\n",templated_prompt)
6969

7070
resp = openai.Completion.create(
7171
prompt=templated_prompt,
@@ -94,7 +94,7 @@ def order_document_sections_by_query_similarity(query: str, contexts: dict[(str,
9494
document_similarities = sorted([
9595
(vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
9696
], reverse=True)
97-
97+
9898
return document_similarities
9999

100100

@@ -177,7 +177,7 @@ def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
177177
pickle.dump(EMBEDDINGS_CACHE, embedding_cache_file)
178178

179179
return EMBEDDINGS_CACHE[key]
180-
180+
181181

182182
def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
183183
"""
@@ -257,9 +257,9 @@ def get_filepaths(directories):
257257
document_embeddings = get_document_embeddings()
258258

259259
prompts = [
260-
"What do staff engineers do?",
260+
"When should I promote internal canidates versus hiring externally?",
261261
"How should I get an engineering executive job?",
262-
262+
"What do staff engineers do?",
263263
]
264264

265265
for prompt in prompts[:1]:

0 commit comments

Comments
 (0)