Skip to content

[pull] master from TheAlgorithms:master #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions Algorithms.Tests/Strings/Similarity/CosineSimilarityTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
using System;
using Algorithms.Strings.Similarity;
using NUnit.Framework;

namespace Algorithms.Tests.Strings.Similarity;

[TestFixture]
public class CosineSimilarityTests
{
[Test]
public void Calculate_IdenticalStrings_ReturnsOne()
{
var str1 = "test";
var str2 = "test";
var result = CosineSimilarity.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1.0).Within(1e-6), "Identical strings should have a cosine similarity of 1.");
}

[Test]
public void Calculate_CompletelyDifferentStrings_ReturnsZero()
{
var str1 = "abc";
var str2 = "xyz";
var result = CosineSimilarity.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0.0).Within(1e-6), "Completely different strings should have a cosine similarity of 0.");
}

[Test]
public void Calculate_EmptyStrings_ReturnsZero()
{
var str1 = "";
var str2 = "";
var result = CosineSimilarity.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0.0).Within(1e-6), "Empty strings should have a cosine similarity of 0.");
}

[Test]
public void Calculate_OneEmptyString_ReturnsZero()
{
var str1 = "test";
var str2 = "";
var result = CosineSimilarity.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0.0).Within(1e-6), "Empty string should have a cosine similarity of 0.");
}

[Test]
public void Calculate_SameCharactersDifferentCases_ReturnsOne()
{
var str1 = "Test";
var str2 = "test";
var result = CosineSimilarity.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1.0).Within(1e-6), "The method should be case-insensitive.");
}

[Test]
public void Calculate_SpecialCharacters_ReturnsCorrectValue()
{
var str1 = "hello!";
var str2 = "hello!";
var result = CosineSimilarity.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1.0).Within(1e-6), "Strings with special characters should have a cosine similarity of 1.");
}

[Test]
public void Calculate_DifferentLengthWithCommonCharacters_ReturnsCorrectValue()
{
var str1 = "hello";
var str2 = "hello world";
var result = CosineSimilarity.Calculate(str1, str2);
var expected = 10 / (Math.Sqrt(7) * Math.Sqrt(19)); // calculated manually
Assert.That(result, Is.EqualTo(expected).Within(1e-6), "Strings with different lengths but some common characters should have the correct cosine similarity.");
}

[Test]
public void Calculate_PartiallyMatchingStrings_ReturnsCorrectValue()
{
var str1 = "night";
var str2 = "nacht";
var result = CosineSimilarity.Calculate(str1, str2);
// Assuming the correct calculation gives an expected value
var expected = 3.0 / 5.0;
Assert.That(result, Is.EqualTo(expected).Within(1e-6), "Partially matching strings should have the correct cosine similarity.");
}
}
116 changes: 116 additions & 0 deletions Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using Algorithms.Strings.Similarity;
using NUnit.Framework;

namespace Algorithms.Tests.Strings.Similarity;

[TestFixture]
public class DamerauLevenshteinDistanceTests
{
[Test]
public void Calculate_IdenticalStrings_ReturnsZero()
{
var str1 = "test";
var str2 = "test";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0), "Identical strings should have a Damerau-Levenshtein distance of 0.");
}

[Test]
public void Calculate_CompletelyDifferentStrings_ReturnsLengthOfLongestString()
{
var str1 = "abc";
var str2 = "xyz";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3),"Completely different strings should have a Damerau-Levenshtein distance equal to the length of the longest string.");
}

[Test]
public void Calculate_OneEmptyString_ReturnsLengthOfOtherString()
{
var str1 = "test";
var str2 = "";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(4),"One empty string should have a Damerau-Levenshtein distance equal to the length of the other string.");
}

[Test]
public void Calculate_BothEmptyStrings_ReturnsZero()
{
var str1 = "";
var str2 = "";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0), "Both empty strings should have a Damerau-Levenshtein distance of 0.");
}

[Test]
public void Calculate_DifferentLengths_ReturnsCorrectValue()
{
var str1 = "short";
var str2 = "longer";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(6), "Strings of different lengths should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_SpecialCharacters_ReturnsCorrectValue()
{
var str1 = "hello!";
var str2 = "hello?";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with special characters should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_DifferentCases_ReturnsCorrectValue()
{
var str1 = "Hello";
var str2 = "hello";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with different cases should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_CommonPrefixes_ReturnsCorrectValue()
{
var str1 = "prefix";
var str2 = "pre";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3), "Strings with common prefixes should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_CommonSuffixes_ReturnsCorrectValue()
{
var str1 = "suffix";
var str2 = "fix";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3), "Strings with common suffixes should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_Transpositions_ReturnsCorrectValue()
{
var str1 = "abcd";
var str2 = "acbd";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with transpositions should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_RepeatedCharacters_ReturnsCorrectValue()
{
var str1 = "aaa";
var str2 = "aaaaa";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(2), "Strings with repeated characters should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_UnicodeCharacters_ReturnsCorrectValue()
{
var str1 = "こんにちは";
var str2 = "こんばんは";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(2), "Strings with Unicode characters should return the correct Damerau-Levenshtein distance.");
}
}
136 changes: 136 additions & 0 deletions Algorithms/Strings/Similarity/CosineSimilarity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
using System;
using System.Collections.Generic;

namespace Algorithms.Strings.Similarity;

public static class CosineSimilarity
{
/// <summary>
/// Calculates the Cosine Similarity between two strings.
/// Cosine Similarity is a measure of similarity between two non-zero vectors of an inner product space.
/// It measures the cosine of the angle between the two vectors.
/// </summary>
/// <param name="left">The first string.</param>
/// <param name="right">The second string.</param>
/// <returns>
/// A double value between 0 and 1 that represents the similarity
/// of the two strings.
/// </returns>
public static double Calculate(string left, string right)
{
// Step 1: Get the vectors for the two strings
// Each vector represents the frequency of each character in the string.
var vectors = GetVectors(left.ToLowerInvariant(), right.ToLowerInvariant());
var leftVector = vectors.leftVector;
var rightVector = vectors.rightVector;

// Step 2: Calculate the intersection of the two vectors
// The intersection is the set of characters that appear in both strings.
var intersection = GetIntersection(leftVector, rightVector);

// Step 3: Calculate the dot product of the two vectors
// The dot product is the sum of the products of the corresponding values of the characters in the intersection.
var dotProduct = DotProduct(leftVector, rightVector, intersection);

// Step 4: Calculate the square magnitude of each vector
// The magnitude is the square root of the sum of the squares of the values in the vector.
var mLeft = 0.0;
foreach (var value in leftVector.Values)
{
mLeft += value * value;
}

var mRight = 0.0;
foreach (var value in rightVector.Values)
{
mRight += value * value;
}

// Step 5: Check if either vector is zero
// If either vector is zero (i.e., all characters are unique), the Cosine Similarity is 0.
if (mLeft <= 0 || mRight <= 0)
{
return 0.0;
}

// Step 6: Calculate and return the Cosine Similarity
// The Cosine Similarity is the dot product divided by the product of the magnitudes.
return dotProduct / (Math.Sqrt(mLeft) * Math.Sqrt(mRight));
}

/// <summary>
/// Calculates the vectors for the given strings.
/// </summary>
/// <param name="left">The first string.</param>
/// <param name="right">The second string.</param>
/// <returns>A tuple containing the vectors for the two strings.</returns>
private static (Dictionary<char, int> leftVector, Dictionary<char, int> rightVector) GetVectors(string left, string right)
{
var leftVector = new Dictionary<char, int>();
var rightVector = new Dictionary<char, int>();

// Calculate the frequency of each character in the left string
foreach (var character in left)
{
leftVector.TryGetValue(character, out var frequency);
leftVector[character] = ++frequency;
}

// Calculate the frequency of each character in the right string
foreach (var character in right)
{
rightVector.TryGetValue(character, out var frequency);
rightVector[character] = ++frequency;
}

return (leftVector, rightVector);
}

/// <summary>
/// Calculates the dot product between two vectors represented as dictionaries of character frequencies.
/// The dot product is the sum of the products of the corresponding values of the characters in the intersection of the two vectors.
/// </summary>
/// <param name="leftVector">The vector of the left string.</param>
/// <param name="rightVector">The vector of the right string.</param>
/// <param name="intersection">The intersection of the two vectors, represented as a set of characters.</param>
/// <returns>The dot product of the two vectors.</returns>
private static double DotProduct(Dictionary<char, int> leftVector, Dictionary<char, int> rightVector, HashSet<char> intersection)
{
// Initialize the dot product to 0
double dotProduct = 0;

// Iterate over each character in the intersection of the two vectors
foreach (var character in intersection)
{
// Calculate the product of the corresponding values of the characters in the left and right vectors
dotProduct += leftVector[character] * rightVector[character];
}

// Return the dot product
return dotProduct;
}

/// <summary>
/// Calculates the intersection of two vectors, represented as dictionaries of character frequencies.
/// </summary>
/// <param name="leftVector">The vector of the left string.</param>
/// <param name="rightVector">The vector of the right string.</param>
/// <returns>A HashSet containing the characters that appear in both vectors.</returns>
private static HashSet<char> GetIntersection(Dictionary<char, int> leftVector, Dictionary<char, int> rightVector)
{
// Initialize a HashSet to store the intersection of the two vectors.
var intersection = new HashSet<char>();

// Iterate over each key-value pair in the left vector.
foreach (var kvp in leftVector)
{
// If the right vector contains the same key, add it to the intersection.
if (rightVector.ContainsKey(kvp.Key))
{
intersection.Add(kvp.Key);
}
}

return intersection;
}
}
Loading