Skip to content

Llava api #563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Llava API + LlavaTest
Preliminary
  • Loading branch information
SignalRT committed Mar 6, 2024
commit 6307a2f635153c82da05d30a0e4c21f0b74695af
2 changes: 2 additions & 0 deletions LLama.Unittest/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@
internal static class Constants
{
public static string ModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
public static string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
}
}
5 changes: 3 additions & 2 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
</ItemGroup>

<Target Name="DownloadContentFiles" BeforeTargets="Build">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true">
</DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/blob/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/blob/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
</Target>

<ItemGroup>
Expand Down
96 changes: 96 additions & 0 deletions LLama.Unittest/LLavaWeigthsTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
using LLama.Common;
using LLama.Native;

namespace LLama.Unittest
{
public sealed class LLavaWeightTests
: IDisposable
{
private readonly LLamaWeights _llamaWeights;
private readonly LLavaWeights _lLavaWeights;
private readonly LLamaContext _context;

public LLavaWeightTests()
{
var @params = new ModelParams(Constants.LLavaMmpPath)
{
// Llava models requires big context
ContextSize = 4096,
};
_llamaWeights = LLamaWeights.LoadFromFile(@params);
_lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath);

_context = _llamaWeights.CreateContext(@params);

}

public void Dispose()
{
_llamaWeights.Dispose();
_lLavaWeights.Dispose();
}

[Fact]
public void CheckProperties()
{
Assert.Equal(768u, _context.ContextSize);
Assert.Equal(4096, _context.EmbeddingSize);
Assert.Equal(32000, _context.VocabCount);
}

[Fact]
public void Tokenize()
{
var tokens = _context.Tokenize("The quick brown fox", true);

Assert.Equal(new LLamaToken[] { 1, 450, 4996, 17354, 1701, 29916 }, tokens);
}

[Fact]
public void TokenizeNewline()
{
var tokens = _context.Tokenize("\n", false, false);

Assert.Equal(new LLamaToken[] { 29871, 13 }, tokens);
}

[Fact]
public void TokenizeRoundtripSpecialStrings()
{
var strings = new[]
{
"\t", "\t\t", "\t\t\t",
"\n\n", "\n\n\n", "\n\n\n\n",
"\t\n", "\t\n\t\n\n\n\n\t\t",
"\b", "\v", "\0"
};

foreach (var s in strings)
{
var tokens = _context.Tokenize(s, false, false);
var decoder = new StreamingTokenDecoder(_context);
decoder.AddRange(tokens);

var str = decoder.Read();

Assert.Equal(s, str.TrimStart(' '));
}
}

[Fact]
public void TokenizeWithoutBOS()
{
var tokens = _context.Tokenize("The quick brown fox", false);

Assert.Equal(new LLamaToken[] { 450, 4996, 17354, 1701, 29916 }, tokens);
}

[Fact]
public void TokenizeEmpty()
{
var tokens = _context.Tokenize("", false);

Assert.Equal(Array.Empty<LLamaToken>(), tokens);
}
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 51 additions & 0 deletions LLama/LLavaWeights.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

using System;
using LLama.Native;

namespace LLama;

public sealed class LLavaWeights : IDisposable
{
public SafeLlavaModelHandle NativeClipHandle { get; }

internal LLavaWeights(SafeLlavaModelHandle weights)
{
NativeClipHandle = weights;
}

public static LLavaWeights LoadFromFile(string mmProject)
{
var weights = SafeLlavaModelHandle.LoadFromFile(mmProject, 1);
return new LLavaWeights(weights);
}

/// <summary>
/// Embed the image from file into llama context
/// </summary>
/// <param name="ctxLlama"></param>
/// <param name="Image"></param>
/// <param name="n_past"></param>
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, string Image, out int n_past )
{
return NativeClipHandle.EmbedImage(ctxLlama, Image, out n_past );
}

/// <summary>
/// Embed the image from binary into llama context.
/// </summary>
/// <param name="ctxLlama"></param>
/// <param name="Image"></param>
/// <param name="n_past"></param>
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, out int n_past )
{
return NativeClipHandle.EmbedImage(ctxLlama, Image, out n_past );
}

public void Dispose()
{
NativeClipHandle.Dispose();
}

}
103 changes: 103 additions & 0 deletions LLama/Native/NativeApi.LLava.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
using System;
using System.Runtime.InteropServices;

namespace LLama.Native;

using clip_ctx = IntPtr;
public static unsafe partial class NativeApi
{

/*/// <summary>
/// Clip Vision Parameters
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct clip_vision_hparams
{
public Int32 image_size;
public Int32 patch_size;
public Int32 hidden_size;
public Int32 n_intermediate;
public Int32 projection_dim;
public Int32 n_head;
public Int32 n_layer;
public float eps;
};*/

/// <summary>
/// LLaVa Image embeddings
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct llava_image_embed
{
public float* embed;
public int n_image_pos;
}

/// <summary>
/// Load MULTI MODAL PROJECTIONS model / Clip Model
/// </summary>
/// <param name="mmProj"> Model path/file</param>
/// <param name="verbosity">Verbosity level</param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)]
public static extern clip_ctx clip_model_load(string mmProj, int verbosity);

/// <summary>
/// Frees MULTI MODAL PROJECTIONS model / Clip Model
/// </summary>
/// <param name="ctx"></param>
[DllImport(llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)]
public static extern void clip_free(clip_ctx ctx);


/// <summary>
/// Sanity check for clip <-> llava embed size match
/// </summary>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)]
public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, clip_ctx ctxClip);

/// <summary>
/// Build an image embed from image file bytes
/// </summary>
/// <param name="ctx_clip"></param>
/// <param name="n_threads"></param>
/// <param name="image_bytes"></param>
/// <param name="image_bytes_length"></param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes",
CallingConvention = CallingConvention.Cdecl)]
public static extern llava_image_embed* llava_image_embed_make_with_bytes(clip_ctx ctx_clip, int n_threads,
byte[] image_bytes, int image_bytes_length);

/// <summary>
/// Build an image embed from a path to an image filename
/// </summary>
/// <param name="ctx_clip"></param>
/// <param name="n_threads"></param>
/// <param name="image_path"></param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename",
CallingConvention = CallingConvention.Cdecl)]
public static extern llava_image_embed* llava_image_embed_make_with_filename(clip_ctx ctx_clip, int n_threads,
[MarshalAs(UnmanagedType.LPStr)] string image_path);

/// <summary>
/// Free an embedding made with llava_image_embed_make_*
/// </summary>
/// <param name="embed"></param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)]
public static extern llava_image_embed* llava_image_embed_free(llava_image_embed* embed);

/// <summary>
/// Write the image represented by embed into the llama context with batch size n_batch, starting at context
/// pos n_past. on completion, n_past points to the next position in the context after the image embed.
/// </summary>
/// <param name="embed">ctx_llama</param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)]
public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, llava_image_embed* embed,
int n_batch, out int n_past);

}
1 change: 1 addition & 0 deletions LLama/Native/NativeApi.Load.cs
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ string TryFindPath(string filename)
}

internal const string libraryName = "llama";
internal const string llavaLibraryName = "llava_shared";
private const string cudaVersionFile = "version.json";
private const string loggingPrefix = "[LLamaSharp Native]";
private static bool enableLogging = false;
Expand Down
92 changes: 92 additions & 0 deletions LLama/Native/SafeLlavaModelHandle.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using LLama;
using LLama.Exceptions;


namespace LLama.Native
{
/// <summary>
/// A reference to a set of llava model weights
/// </summary>
public sealed class SafeLlavaModelHandle
: SafeLLamaHandleBase
{

internal protected SafeLlavaModelHandle(IntPtr handle)
: base(handle, true)
{
}

/// <inheritdoc />
protected override bool ReleaseHandle()
{

NativeApi.clip_free(DangerousGetHandle());
SetHandle(IntPtr.Zero);
return true;
}

/// <summary>
/// Load a model from the given file path into memory
/// </summary>
/// <param name="modelPath"></param>
/// <param name="lparams"></param>
/// <returns></returns>
/// <exception cref="RuntimeError"></exception>
public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity )
{
var ctxContext = NativeApi.clip_model_load(modelPath, verbosity );
if (ctxContext == IntPtr.Zero)
throw new RuntimeError($"Failed to load LLaVa model {modelPath}.");

return new SafeLlavaModelHandle(ctxContext);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can modify clip_model_load to directly return SafeLlavaModelHandle. That way you never have to directly handle the poniter. See here for example.

}

public void LoadImage( string imagePath, int threads )
{
unsafe
{
NativeApi.llava_image_embed_make_with_filename( this.handle, threads, imagePath);
}
}

/// <summary>
/// Embed the image from file in llama context
/// </summary>
/// <param name="ctxLlama"></param>
/// <param name="image"></param>
/// <param name="n_past"></param>
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, string image, out int n_past)
{
unsafe
{
var ptrImageEmbed = NativeApi.llava_image_embed_make_with_filename(this.handle, (int) ctxLlama.Params.Threads, image);
bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ptrImageEmbed, (int)ctxLlama.Params.BatchSize, out n_past );
NativeApi.llava_image_embed_free(ptrImageEmbed);
return result;
}
}

/// <summary>
/// Embed the image from binary in llama context
/// </summary>
/// <param name="ctxLlama"></param>
/// <param name="image"></param>
/// <param name="n_past"></param>
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, out int n_past )
{
unsafe
{
var ptrImageEmbed = NativeApi.llava_image_embed_make_with_bytes(this.handle, (int) ctxLlama.Params.Threads, image.ToArray(), image.Length);
bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ptrImageEmbed, (int)ctxLlama.Params.BatchSize, out n_past );
NativeApi.llava_image_embed_free(ptrImageEmbed);
return result;
}
}
}
}