34 lines
971 B
C#
34 lines
971 B
C#
using System.Globalization;
|
|
using System.Text;
|
|
|
|
namespace ChatRAG.Services
|
|
{
|
|
public class TextFilter
|
|
{
|
|
public string ToLowerAndWithoutAccents(string text)
|
|
{
|
|
return RemoveDiacritics(text.ToLower());
|
|
}
|
|
|
|
public string RemoveDiacritics(string text)
|
|
{
|
|
var normalizedString = text.Normalize(NormalizationForm.FormD);
|
|
var stringBuilder = new StringBuilder(capacity: normalizedString.Length);
|
|
|
|
for (int i = 0; i < normalizedString.Length; i++)
|
|
{
|
|
char c = normalizedString[i];
|
|
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
|
|
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
|
|
{
|
|
stringBuilder.Append(c);
|
|
}
|
|
}
|
|
|
|
return stringBuilder
|
|
.ToString()
|
|
.Normalize(NormalizationForm.FormC);
|
|
}
|
|
}
|
|
}
|