Add Groq to README & specs

alexrudall · alexrudall · commit b177fc4de82d · 2024-04-28T16:01:35.000+03:00
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ Stream text with GPT-4, transcribe and translate audio with Whisper, or create i
         - [Faraday middleware](#faraday-middleware)
       - [Azure](#azure)
       - [Ollama](#ollama)
+      - [Groq](#groq)
     - [Counting Tokens](#counting-tokens)
     - [Models](#models)
     - [Chat](#chat)
@@ -239,6 +240,27 @@ client.chat(
 # => Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat?
 ```
 
+#### Groq
+
+[Groq API Chat](https://console.groq.com/docs/quickstart) is broadly compatible with the OpenAI API, with a [few minor differences](https://console.groq.com/docs/openai). Get an access token from [here](https://console.groq.com/keys), then:
+
+````ruby
+  client = OpenAI::Client.new(
+    access_token: "groq_access_token_goes_here",
+    uri_base: "https://api.groq.com/"
+  )
+
+  client.chat(
+    parameters: {
+        model: "llama3", # Required.
+        messages: [{ role: "user", content: "Hello!"}], # Required.
+        temperature: 0.7,
+        stream: proc do |chunk, _bytesize|
+            print chunk.dig("choices", 0, "delta", "content")
+        end
+    })
+```
+
 ### Counting Tokens
 
 OpenAI parses prompt text into [tokens](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them), which are words or portions of words. (These tokens are unrelated to your API access_token.) Counting tokens can help you estimate your [costs](https://openai.com/pricing). It can also help you ensure your prompt text size is within the max-token limits of your model's context window, and choose an appropriate [`max_tokens`](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens) completion parameter so your response will fit as well.
@@ -247,7 +269,7 @@ To estimate the token-count of your text:
 
 ```ruby
 OpenAI.rough_token_count("Your text")
-```
+````
 
 If you need a more accurate count, try [tiktoken_ruby](https://github.com/IAPark/tiktoken_ruby).
 
diff --git a/spec/fixtures/cassettes/groq_llama3-8b-8192_streamed_chat.yml b/spec/fixtures/cassettes/groq_llama3-8b-8192_streamed_chat.yml
@@ -0,0 +1,126 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.groq.com/openai/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"llama3-8b-8192","messages":[{"role":"user","content":"Hello!"}],"stream":true}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer 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
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Sun, 28 Apr 2024 12:58:48 GMT
+      Content-Type:
+      - text/event-stream
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cache-Control:
+      - no-cache
+      Vary:
+      - Origin
+      X-Ratelimit-Limit-Requests:
+      - '14400'
+      X-Ratelimit-Limit-Tokens:
+      - '12000'
+      X-Ratelimit-Remaining-Requests:
+      - '14399'
+      X-Ratelimit-Remaining-Tokens:
+      - '11994'
+      X-Ratelimit-Reset-Requests:
+      - 6s
+      X-Ratelimit-Reset-Tokens:
+      - 30ms
+      X-Request-Id:
+      - req_01hwjcjqzaebarxz5f2vm7fz34
+      Via:
+      - 1.1 google
+      Alt-Svc:
+      - h3=":443"; ma=86400
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=.ZNFRsf2P9DwyqjhO3cuf9Pu_3fVQzLXYXtUcJ_UMkI-1714309128-1.0.1.1-w6OhIRMxkM4Ouz9NIqUiuv8Kd1o9nuCib2x07qAHI12yNx193y6_5ol27ayr.mfxkd0ME.JLgC3W5DGsXoA3SA;
+        path=/; expires=Sun, 28-Apr-24 13:28:48 GMT; domain=.groq.com; HttpOnly; Secure;
+        SameSite=None
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 87b739d22ceb1c23-SOF
+    body:
+      encoding: UTF-8
+      string: |+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"x_groq":{"id":"req_01hwjcjqzaebarxz5f2vm7fz34"}}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" It"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":"'s"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" nice"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" meet"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" Is"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" there"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" something"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" help"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" with"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" or"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" would"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" like"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":" chat"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-020c62b8-ef08-4c4f-8d11-5fe27c0ad788","object":"chat.completion.chunk","created":1714309128,"model":"llama3-8b-8192","system_fingerprint":"fp_dadc9d6142","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"x_groq":{"id":"req_01hwjcjqzaebarxz5f2vm7fz34","usage":{"queue_time":0.085418399,"prompt_tokens":12,"prompt_time":0.005,"completion_tokens":25,"completion_time":0.029,"total_tokens":37,"total_time":0.034}}}
+
+        data: [DONE]
+
+  recorded_at: Sun, 28 Apr 2024 12:58:48 GMT
+recorded_with: VCR 6.1.0
+...
diff --git a/spec/fixtures/cassettes/ollama_llama3_chat.yml b/spec/fixtures/cassettes/ollama_llama3_chat.yml
diff --git a/spec/openai/client/chat_spec.rb b/spec/openai/client/chat_spec.rb
@@ -11,7 +11,8 @@
       end
       let(:parameters) { { model: model, messages: messages, stream: stream } }
       let(:content) { response.dig("choices", 0, "message", "content") }
-      let(:cassette) { "#{model} #{'streamed' if stream} chat".downcase }
+      let(:provider) { nil }
+      let(:cassette) { "#{"#{provider}_" if provider}#{model} #{'streamed' if stream} chat".downcase }
 
       context "with model: gpt-3.5-turbo" do
         let(:model) { "gpt-3.5-turbo" }
@@ -176,6 +177,7 @@ def call(chunk)
 
       context "with Ollama + model: llama3" do
         let(:uri_base) { "http://localhost:11434" }
+        let(:provider) { "ollama" }
         let(:model) { "llama3" }
 
         it "succeeds" do
@@ -190,6 +192,36 @@ def call(chunk)
           end
         end
       end
+
+      context "with Groq + model: llama3" do
+        let(:uri_base) { "https://api.groq.com/openai" }
+        let(:provider) { "groq" }
+        let(:model) { "llama3-8b-8192" }
+        let(:access_token) { ENV.fetch("GROQ_ACCESS_TOKEN", nil) }
+        let(:response) do
+          OpenAI::Client.new({ uri_base: uri_base, access_token: access_token }).chat(
+            parameters: parameters
+          )
+        end
+        let(:chunks) { [] }
+        let(:stream) do
+          proc do |chunk, _bytesize|
+            print chunk["choices"][0]["delta"]["content"]
+            chunks << chunk
+          end
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            unless access_token
+              pending "This test needs a `GROQ_ACCESS_TOKEN` environment variable to run"
+            end
+
+            response
+            expect(chunks.dig(0, "choices", 0, "index")).to eq(0)
+          end
+        end
+      end
     end
   end
 end