ChatRAG/Services/TextFilter.cs
2025-06-09 23:06:37 -03:00

34 lines
971 B
C#

using System.Globalization;
using System.Text;
namespace ChatRAG.Services
{
public class TextFilter
{
public string ToLowerAndWithoutAccents(string text)
{
return RemoveDiacritics(text.ToLower());
}
public string RemoveDiacritics(string text)
{
var normalizedString = text.Normalize(NormalizationForm.FormD);
var stringBuilder = new StringBuilder(capacity: normalizedString.Length);
for (int i = 0; i < normalizedString.Length; i++)
{
char c = normalizedString[i];
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder
.ToString()
.Normalize(NormalizationForm.FormC);
}
}
}