-
Notifications
You must be signed in to change notification settings - Fork 444
Llava api #563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Llava api #563
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
fc42471
Add llava_binaries, update all binaries to make the test
SignalRT 6307a2f
Llava API + LlavaTest
SignalRT b1fe9ab
First prototype of Load + Unit Test
SignalRT 042d6d1
Temporary run test con branch LlavaAPI
SignalRT de01e2c
Disable Embed test to review the rest of the test
SignalRT 2f730dc
Restore Embedding test
SignalRT 384fcef
Use BatchThread to eval image embeddings
SignalRT fcf60b4
Rename test file
SignalRT 8418a33
Update action versions
SignalRT 71a1ff5
Test only one method, no release embeddings
SignalRT fd467ad
Revert "Test only one method, no release embeddings"
SignalRT a13b3c1
Correct API call
SignalRT 2d75de3
Only test llava related functionality
SignalRT 0110745
Cuda and Cblast binaries
SignalRT da8b3fa
Restore build policy
SignalRT cc0bcfe
Changes related with code review
SignalRT d04bfc4
Add SafeHandles
SignalRT d87f355
Set overwrite to upload-artifact@v4
SignalRT 5fc2264
Revert to upload-artifact@v3
SignalRT 72fba18
revert to upload-artifact@v3
SignalRT File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Llava API + LlavaTest
Preliminary
- Loading branch information
commit 6307a2f635153c82da05d30a0e4c21f0b74695af
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
using LLama.Common; | ||
using LLama.Native; | ||
|
||
namespace LLama.Unittest | ||
{ | ||
public sealed class LLavaWeightTests | ||
: IDisposable | ||
{ | ||
private readonly LLamaWeights _llamaWeights; | ||
private readonly LLavaWeights _lLavaWeights; | ||
private readonly LLamaContext _context; | ||
|
||
public LLavaWeightTests() | ||
{ | ||
var @params = new ModelParams(Constants.LLavaMmpPath) | ||
{ | ||
// Llava models requires big context | ||
ContextSize = 4096, | ||
}; | ||
_llamaWeights = LLamaWeights.LoadFromFile(@params); | ||
_lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath); | ||
|
||
_context = _llamaWeights.CreateContext(@params); | ||
|
||
} | ||
|
||
public void Dispose() | ||
{ | ||
_llamaWeights.Dispose(); | ||
_lLavaWeights.Dispose(); | ||
} | ||
|
||
[Fact] | ||
public void CheckProperties() | ||
{ | ||
Assert.Equal(768u, _context.ContextSize); | ||
Assert.Equal(4096, _context.EmbeddingSize); | ||
Assert.Equal(32000, _context.VocabCount); | ||
} | ||
|
||
[Fact] | ||
public void Tokenize() | ||
{ | ||
var tokens = _context.Tokenize("The quick brown fox", true); | ||
|
||
Assert.Equal(new LLamaToken[] { 1, 450, 4996, 17354, 1701, 29916 }, tokens); | ||
} | ||
|
||
[Fact] | ||
public void TokenizeNewline() | ||
{ | ||
var tokens = _context.Tokenize("\n", false, false); | ||
|
||
Assert.Equal(new LLamaToken[] { 29871, 13 }, tokens); | ||
} | ||
|
||
[Fact] | ||
public void TokenizeRoundtripSpecialStrings() | ||
{ | ||
var strings = new[] | ||
{ | ||
"\t", "\t\t", "\t\t\t", | ||
"\n\n", "\n\n\n", "\n\n\n\n", | ||
"\t\n", "\t\n\t\n\n\n\n\t\t", | ||
"\b", "\v", "\0" | ||
}; | ||
|
||
foreach (var s in strings) | ||
{ | ||
var tokens = _context.Tokenize(s, false, false); | ||
var decoder = new StreamingTokenDecoder(_context); | ||
decoder.AddRange(tokens); | ||
|
||
var str = decoder.Read(); | ||
|
||
Assert.Equal(s, str.TrimStart(' ')); | ||
} | ||
} | ||
|
||
[Fact] | ||
public void TokenizeWithoutBOS() | ||
{ | ||
var tokens = _context.Tokenize("The quick brown fox", false); | ||
|
||
Assert.Equal(new LLamaToken[] { 450, 4996, 17354, 1701, 29916 }, tokens); | ||
} | ||
|
||
[Fact] | ||
public void TokenizeEmpty() | ||
{ | ||
var tokens = _context.Tokenize("", false); | ||
|
||
Assert.Equal(Array.Empty<LLamaToken>(), tokens); | ||
} | ||
} | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
|
||
using System; | ||
using LLama.Native; | ||
|
||
namespace LLama; | ||
|
||
public sealed class LLavaWeights : IDisposable | ||
{ | ||
public SafeLlavaModelHandle NativeClipHandle { get; } | ||
|
||
internal LLavaWeights(SafeLlavaModelHandle weights) | ||
{ | ||
NativeClipHandle = weights; | ||
} | ||
|
||
public static LLavaWeights LoadFromFile(string mmProject) | ||
{ | ||
var weights = SafeLlavaModelHandle.LoadFromFile(mmProject, 1); | ||
return new LLavaWeights(weights); | ||
} | ||
|
||
/// <summary> | ||
/// Embed the image from file into llama context | ||
/// </summary> | ||
/// <param name="ctxLlama"></param> | ||
/// <param name="Image"></param> | ||
/// <param name="n_past"></param> | ||
/// <returns></returns> | ||
public bool EmbedImage(LLamaContext ctxLlama, string Image, out int n_past ) | ||
{ | ||
return NativeClipHandle.EmbedImage(ctxLlama, Image, out n_past ); | ||
} | ||
|
||
/// <summary> | ||
/// Embed the image from binary into llama context. | ||
/// </summary> | ||
/// <param name="ctxLlama"></param> | ||
/// <param name="Image"></param> | ||
/// <param name="n_past"></param> | ||
/// <returns></returns> | ||
public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, out int n_past ) | ||
{ | ||
return NativeClipHandle.EmbedImage(ctxLlama, Image, out n_past ); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
NativeClipHandle.Dispose(); | ||
} | ||
|
||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
using System; | ||
using System.Runtime.InteropServices; | ||
|
||
namespace LLama.Native; | ||
|
||
using clip_ctx = IntPtr; | ||
public static unsafe partial class NativeApi | ||
{ | ||
|
||
/*/// <summary> | ||
/// Clip Vision Parameters | ||
/// </summary> | ||
[StructLayout(LayoutKind.Sequential)] | ||
public struct clip_vision_hparams | ||
{ | ||
public Int32 image_size; | ||
public Int32 patch_size; | ||
public Int32 hidden_size; | ||
public Int32 n_intermediate; | ||
public Int32 projection_dim; | ||
public Int32 n_head; | ||
public Int32 n_layer; | ||
public float eps; | ||
};*/ | ||
|
||
/// <summary> | ||
/// LLaVa Image embeddings | ||
/// </summary> | ||
[StructLayout(LayoutKind.Sequential)] | ||
public struct llava_image_embed | ||
SignalRT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
public float* embed; | ||
public int n_image_pos; | ||
} | ||
|
||
/// <summary> | ||
/// Load MULTI MODAL PROJECTIONS model / Clip Model | ||
/// </summary> | ||
/// <param name="mmProj"> Model path/file</param> | ||
/// <param name="verbosity">Verbosity level</param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern clip_ctx clip_model_load(string mmProj, int verbosity); | ||
|
||
/// <summary> | ||
/// Frees MULTI MODAL PROJECTIONS model / Clip Model | ||
/// </summary> | ||
/// <param name="ctx"></param> | ||
[DllImport(llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern void clip_free(clip_ctx ctx); | ||
|
||
|
||
/// <summary> | ||
/// Sanity check for clip <-> llava embed size match | ||
SignalRT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// </summary> | ||
/// <returns></returns> | ||
[DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, clip_ctx ctxClip); | ||
|
||
/// <summary> | ||
/// Build an image embed from image file bytes | ||
/// </summary> | ||
/// <param name="ctx_clip"></param> | ||
/// <param name="n_threads"></param> | ||
/// <param name="image_bytes"></param> | ||
/// <param name="image_bytes_length"></param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes", | ||
CallingConvention = CallingConvention.Cdecl)] | ||
public static extern llava_image_embed* llava_image_embed_make_with_bytes(clip_ctx ctx_clip, int n_threads, | ||
byte[] image_bytes, int image_bytes_length); | ||
|
||
/// <summary> | ||
/// Build an image embed from a path to an image filename | ||
/// </summary> | ||
/// <param name="ctx_clip"></param> | ||
/// <param name="n_threads"></param> | ||
/// <param name="image_path"></param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", | ||
CallingConvention = CallingConvention.Cdecl)] | ||
public static extern llava_image_embed* llava_image_embed_make_with_filename(clip_ctx ctx_clip, int n_threads, | ||
[MarshalAs(UnmanagedType.LPStr)] string image_path); | ||
|
||
/// <summary> | ||
/// Free an embedding made with llava_image_embed_make_* | ||
/// </summary> | ||
/// <param name="embed"></param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern llava_image_embed* llava_image_embed_free(llava_image_embed* embed); | ||
|
||
/// <summary> | ||
/// Write the image represented by embed into the llama context with batch size n_batch, starting at context | ||
/// pos n_past. on completion, n_past points to the next position in the context after the image embed. | ||
/// </summary> | ||
/// <param name="embed">ctx_llama</param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, llava_image_embed* embed, | ||
int n_batch, out int n_past); | ||
|
||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using LLama; | ||
using LLama.Exceptions; | ||
|
||
|
||
namespace LLama.Native | ||
{ | ||
/// <summary> | ||
/// A reference to a set of llava model weights | ||
/// </summary> | ||
public sealed class SafeLlavaModelHandle | ||
: SafeLLamaHandleBase | ||
{ | ||
|
||
internal protected SafeLlavaModelHandle(IntPtr handle) | ||
SignalRT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
: base(handle, true) | ||
{ | ||
} | ||
|
||
/// <inheritdoc /> | ||
protected override bool ReleaseHandle() | ||
{ | ||
|
||
NativeApi.clip_free(DangerousGetHandle()); | ||
SetHandle(IntPtr.Zero); | ||
return true; | ||
} | ||
|
||
/// <summary> | ||
/// Load a model from the given file path into memory | ||
/// </summary> | ||
/// <param name="modelPath"></param> | ||
/// <param name="lparams"></param> | ||
/// <returns></returns> | ||
/// <exception cref="RuntimeError"></exception> | ||
public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity ) | ||
{ | ||
SignalRT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
var ctxContext = NativeApi.clip_model_load(modelPath, verbosity ); | ||
if (ctxContext == IntPtr.Zero) | ||
throw new RuntimeError($"Failed to load LLaVa model {modelPath}."); | ||
|
||
return new SafeLlavaModelHandle(ctxContext); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can modify |
||
} | ||
|
||
public void LoadImage( string imagePath, int threads ) | ||
SignalRT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
unsafe | ||
{ | ||
NativeApi.llava_image_embed_make_with_filename( this.handle, threads, imagePath); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Embed the image from file in llama context | ||
/// </summary> | ||
/// <param name="ctxLlama"></param> | ||
/// <param name="image"></param> | ||
/// <param name="n_past"></param> | ||
/// <returns></returns> | ||
public bool EmbedImage(LLamaContext ctxLlama, string image, out int n_past) | ||
{ | ||
unsafe | ||
{ | ||
var ptrImageEmbed = NativeApi.llava_image_embed_make_with_filename(this.handle, (int) ctxLlama.Params.Threads, image); | ||
bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ptrImageEmbed, (int)ctxLlama.Params.BatchSize, out n_past ); | ||
NativeApi.llava_image_embed_free(ptrImageEmbed); | ||
return result; | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Embed the image from binary in llama context | ||
/// </summary> | ||
/// <param name="ctxLlama"></param> | ||
/// <param name="image"></param> | ||
/// <param name="n_past"></param> | ||
/// <returns></returns> | ||
public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, out int n_past ) | ||
{ | ||
unsafe | ||
{ | ||
var ptrImageEmbed = NativeApi.llava_image_embed_make_with_bytes(this.handle, (int) ctxLlama.Params.Threads, image.ToArray(), image.Length); | ||
bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ptrImageEmbed, (int)ctxLlama.Params.BatchSize, out n_past ); | ||
NativeApi.llava_image_embed_free(ptrImageEmbed); | ||
return result; | ||
} | ||
} | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.