Improved IA currency detection and matching
Some checks failed
Run Integration Tests / test (push) Failing after 38s

This commit is contained in:
partisan 2025-06-29 09:41:00 +02:00
parent 7dbdcbe202
commit e2f1707723

View file

@ -129,8 +129,99 @@ func GetExchangeRate(from, to string) (float64, bool) {
// ParseCurrencyConversion detects and processes currency conversion queries
func ParseCurrencyConversion(query string) (float64, string, string, bool) {
// Match patterns like: "100 USD to EUR", "50 eur in gbp", "¥1000 to USD"
re := regexp.MustCompile(`(?i)([\d,]+(?:\.\d+)?)\s*([$€£¥₩₹₽A-Za-z]{1,6})\s+(?:to|in|➞|→)\s+([$€£¥₩₹₽A-Za-z]{1,6})`)
// Main conversion phrases
conversionPhrases := []string{
// Universal/math
"➞", "→", "⇒", ">", "->", "=", "≈", "~", ":", "≡",
// English
"to", "in", "into", "as", "equals", "equal to", "equals to", "is", "becomes", "be", "makes", "converted to", "convert to", "convert into", "converted into",
"exchange for", "exchanged for", "value in", "as currency", "convert", "equivalent to", "same as", "is equal to", ">", "gives", "makes", "result is", "returns", "will be", "equals:", "is equivalent to", "≈", "~", ":",
// German (DE)
"auf", "in", "zu", "umrechnen in", "umrechnen zu", "als", "gleich", "ist", "ist gleich", "umwandeln in", "wird zu", "ergibt", "macht", "ist", "resultiert in", "gleichwertig mit",
// Spanish (ES)
"en", "a", "como", "igual a", "es", "es igual a", "es igual", "convertir a", "cambiar a", "valor en", "convierte en", "devuelve", "será", "equivale a", "es equivalente a",
// French (FR)
"vers", "en", "comme", "égal à", "est", "c'est", "convertir en", "changer en", "valeur en", "équivaut à", "sera", "fait", "rend", "est égal à", "équivalent à",
// Italian (IT)
"a", "in", "come", "uguale a", "è", "convertire in", "cambiare in", "valore in", "sarà", "fa", "equivale a", "è uguale a",
// Portuguese (PT/BR)
"para", "em", "como", "igual a", "é", "converter para", "trocar por", "valor em", "converte em", "vai ser", "faz", "equivale a", "é igual a", "é equivalente a",
// Dutch (NL)
"naar", "in", "als", "is gelijk aan", "is", "wordt", "omzetten naar", "waarde in", "gelijk aan", "is hetzelfde als",
// Czech (CS)
"na", "do", "jako", "rovná se", "je", "převést na", "výměna za", "hodnota v", "přepočet", "bude", "rovná", "je to", "je rovno", "je stejné jako",
// Slovak (SK)
"na", "do", "ako", "rovná sa", "je", "previesť na", "výměna za", "hodnota v", "prerátať", "bude", "rovná", "je to", "je rovné", "je rovnaké ako",
// Polish (PL)
"na", "w", "jako", "równa się", "jest", "przelicz na", "wymień na", "wartość w", "przelicza się na", "będzie", "to jest", "jest równy", "jest taki sam jak",
// Russian (RU)
"на", "в", "как", "равно", "есть", "конвертировать в", "обменять на", "значение в", "равняется", "будет", "это", "такое же как",
// Ukrainian (UA)
"на", "у", "як", "дорівнює", "є", "конвертувати у", "обміняти на", "значення в", "буде", "це", "таке саме як",
// Croatian / Serbian / Bosnian / Slovenian (HR/SR/BS/SL)
"na", "u", "za", "kao", "jednako", "je", "pretvori u", "zamijeniti za", "vrijednost u", "preračunaj u", "biti", "to je", "jednako kao", "je isto kao",
"v", "kot", "je enako", "pretvoriti v", "zamenjati za", "vrednost v", "je isto kao", "je enakovredno",
// Bulgarian (BG)
"на", "в", "като", "равно на", "е", "преобразувай в", "обмени на", "стойност в", "ще бъде", "това е", "равностойно на",
// Turkish (TR)
"için", "olarak", "eşittir", "bu", "dönüştür to", "değiştir to", "değer olarak", "olur", "eşit", "bu olur", "aynı olarak",
// Greek (EL)
"σε", "ως", "ίσον", "είναι", "μετατροπή σε", "ανταλλαγή με", "τιμή σε", "θα είναι", "αυτό είναι", "ισοδυναμεί με", "ίσο με",
// Chinese (Simplified and Traditional, ZH)
"到", "变为", "換成", "转换为", "等于", "等於", "是", "为", "結果是", "相等於", "等同於", "一樣",
// Japanese (JA)
"に", "として", "等しい", "は", "に変換", "に交換", "の値", "は", "結果は", "となる", "同じ", "等価", "等しく",
// Korean (KO)
"으로", "같이", "같다", "이다", "로 변환", "교환하다", "값", "이 된다", "와 같다", "같음", "동일하다",
// Arabic (AR)
"إلى", "الى", "في", "كـ", "يساوي", "هو", "تحويل إلى", "قيمة في", "يصبح", "يساوي نفس", "تعادل", "تساوي",
// Hebrew (HE)
"ל", "ב", "בתור", "שווה ל", "הוא", "המר ל", "ערך ב", "יהיה", "אותו הדבר כמו", "זהה ל",
// Romanian (RO)
"la", "în", "ca", "egal cu", "este", "converti la", "schimbă în", "valoare în", "va fi", "este egal cu",
// Hungarian (HU)
"ra", "re", "ba", "be", "mint", "egyenlő", "az", "átvált", "értéke", "lesz", "ugyanaz mint",
// Swedish (SE)
"till", "i", "som", "är", "är lika med", "omvandla till", "värde i", "blir", "är samma som",
// Danish (DK)
"til", "i", "som", "er", "er lig med", "konverter til", "værdi i", "bliver", "er det samme som",
// Norwegian (NO)
"til", "i", "som", "er", "er lik", "konverter til", "verdi i", "blir", "er det samme som",
// Finnish (FI)
"ksi", "in", "kuin", "on", "on yhtä kuin", "muunna", "arvo", "tulee olemaan", "sama kuin",
// Estonian (EE)
"ks", "sisse", "nagu", "on", "on võrdne", "teisendada", "väärtus", "saab olema", "sama mis",
// Latvian (LV)
"uz", "iekš", "kā", "ir", "ir vienāds ar", "konvertēt uz", "vērtība", "būs", "tāpat kā",
// Lithuanian (LT)
"į", "kaip", "yra", "yra lygus", "konvertuoti į", "vertė", "bus", "tas pats kaip",
// Persian (FA)
"به", "در", "مثل", "برابر با", "است", "تبدیل به", "ارزش در", "خواهد بود", "همانند",
// Hindi (HI)
"को", "में", "के रूप में", "बराबर", "है", "में बदलें", "मूल्य में", "होगा", "के समान",
// Thai (TH)
"ไปที่", "ใน", "เป็น", "เท่ากับ", "คือ", "แปลงเป็น", "ค่าใน", "จะเป็น", "เท่ากัน",
// Indonesian (ID)
"ke", "dalam", "sebagai", "sama dengan", "adalah", "konversi ke", "nilai dalam", "akan menjadi", "sama dengan",
// Vietnamese (VI)
"thành", "trong", "là", "bằng", "là", "chuyển đổi thành", "giá trị trong", "sẽ là", "tương đương với",
// Malay (MS)
"kepada", "dalam", "sebagai", "sama dengan", "ialah", "tukar ke", "nilai dalam", "akan jadi", "setara dengan",
// Filipino/Tagalog (TL)
"sa", "sa loob ng", "bilang", "katumbas ng", "ay", "i-convert sa", "halaga sa", "magiging", "pareho sa",
}
// Build the OR group for all currency conversion phrases to use in the regex pattern
var orGroup strings.Builder
for i, phrase := range conversionPhrases {
if i > 0 {
orGroup.WriteString("|")
}
// escape for regex with special symbols:
orGroup.WriteString(regexp.QuoteMeta(phrase))
}
regexPattern := fmt.Sprintf(`(?i)([\d,]+(?:\.\d+)?)\s*([^\d,]+?)\s+(?:%s)\s+([^\d,]+)`, orGroup.String())
re := regexp.MustCompile(regexPattern)
matches := re.FindStringSubmatch(query)
if len(matches) < 4 {
return 0, "", "", false
@ -145,35 +236,114 @@ func ParseCurrencyConversion(query string) (float64, string, string, bool) {
// Normalize currency symbols
currencyMap := map[string]string{
"$": "USD", "€": "EUR", "£": "GBP", "¥": "JPY", "₩": "KRW", "₹": "INR", "₽": "RUB",
"usd": "USD", "eur": "EUR", "gbp": "GBP", "jpy": "JPY", "krw": "KRW", "inr": "INR", "rub": "RUB",
"dollar": "USD", "euro": "EUR", "pound": "GBP", "yen": "JPY", "won": "KRW", "rupee": "INR", "ruble": "RUB",
// Major Global Currencies
"$": "USD", "usd": "USD", "dollar": "USD", "dollars": "USD", "buck": "USD", "bucks": "USD", "us dollar": "USD", "american dollar": "USD", "freedom units": "USD",
"€": "EUR", "eur": "EUR", "euro": "EUR", "euros": "EUR",
"£": "GBP", "gbp": "GBP", "pound": "GBP", "pounds": "GBP", "sterling": "GBP", "quid": "GBP", "pound sterling": "GBP",
"¥": "JPY", "jpy": "JPY", "yen": "JPY", "cn¥": "CNY", // Handle ¥ ambiguity with CN¥ for Chinese Yuan
"₩": "KRW", "krw": "KRW", "won": "KRW", "korean won": "KRW",
"₹": "INR", "inr": "INR", "rupee": "INR", "rupees": "INR", "indian rupee": "INR",
"₽": "RUB", "rub": "RUB", "ruble": "RUB", "rubles": "RUB", "russian ruble": "RUB",
// Americas
"c$": "CAD", "cad": "CAD", "canadian dollar": "CAD", "loonie": "CAD",
"a$": "AUD", "aud": "AUD", "australian dollar": "AUD", "aussie dollar": "AUD",
"nz$": "NZD", "nzd": "NZD", "new zealand dollar": "NZD", "kiwi": "NZD", "kiwi dollar": "NZD",
"r$": "BRL", "brl": "BRL", "real": "BRL", "reais": "BRL", "brazilian real": "BRL",
"mx$": "MXN", "mxn": "MXN", "mexican peso": "MXN", "mexican pesos": "MXN",
"col$": "COP", "cop": "COP", "colombian peso": "COP",
"s/": "PEN", "pen": "PEN", "sol": "PEN", "soles": "PEN", "peruvian sol": "PEN",
"clp$": "CLP", "clp": "CLP", "chilean peso": "CLP",
"arg$": "ARS", "ars": "ARS", "argentine peso": "ARS",
// Europe & CIS
"chf": "CHF", "fr": "CHF", "swiss franc": "CHF", "franc suisse": "CHF",
"sek": "SEK", "kr": "SEK", "swedish krona": "SEK", "swedish kronor": "SEK",
"nok": "NOK", "norwegian krone": "NOK", "norwegian kroner": "NOK",
"dkk": "DKK", "danish krone": "DKK", "danish kroner": "DKK",
"zł": "PLN", "pln": "PLN", "zloty": "PLN", "polish zloty": "PLN",
"tl": "TRY", "try": "TRY", "turkish lira": "TRY", "türk lirası": "TRY", "₺": "TRY",
"huf": "HUF", "ft": "HUF", "forint": "HUF", "hungarian forint": "HUF",
"czk": "CZK", "kč": "CZK", "czech koruna": "CZK",
"ron": "RON", "lei": "RON", "romanian leu": "RON",
"bgn": "BGN", "лв": "BGN", "bulgarian lev": "BGN",
"uah": "UAH", "₴": "UAH", "hryvnia": "UAH", "ukrainian hryvnia": "UAH",
"kzt": "KZT", "₸": "KZT", "tenge": "KZT", "kazakhstani tenge": "KZT",
// Asia/Pacific
"cny": "CNY", "rmb": "CNY", "yuan": "CNY", "renminbi": "CNY", "chinese yuan": "CNY",
"hk$": "HKD", "hkd": "HKD", "hong kong dollar": "HKD",
"s$": "SGD", "sgd": "SGD", "singapore dollar": "SGD",
"nt$": "TWD", "twd": "TWD", "taiwan dollar": "TWD", "new taiwan dollar": "TWD",
"฿": "THB", "thb": "THB", "baht": "THB", "thai baht": "THB",
"rp": "IDR", "idr": "IDR", "rupiah": "IDR", "indonesian rupiah": "IDR",
"₱": "PHP", "php": "PHP", "philippine peso": "PHP",
"rm": "MYR", "myr": "MYR", "ringgit": "MYR", "malaysian ringgit": "MYR",
"₫": "VND", "vnd": "VND", "dong": "VND", "vietnamese dong": "VND",
"₭": "LAK", "lak": "LAK", "kip": "LAK", "lao kip": "LAK",
"៛": "KHR", "khr": "KHR", "riel": "KHR", "cambodian riel": "KHR",
// Middle East & Africa
"₪": "ILS", "ils": "ILS", "shekel": "ILS", "new israeli shekel": "ILS",
"﷼": "SAR", "sr": "SAR", "sar": "SAR", "riyal": "SAR", "saudi riyal": "SAR",
"د.إ": "AED", "dh": "AED", "aed": "AED", "dirham": "AED", "uae dirham": "AED",
"egp": "EGP", "e£": "EGP", "egyptian pound": "EGP",
"zar": "ZAR", "r": "ZAR", "rand": "ZAR", "south african rand": "ZAR",
"₦": "NGN", "ngn": "NGN", "naira": "NGN", "nigerian naira": "NGN",
}
fromCurr := strings.ToUpper(matches[2])
if mapped, ok := currencyMap[fromCurr]; ok {
fromCurr = mapped
} else if len(fromCurr) > 3 {
// Try to match longer names
for k, v := range currencyMap {
if strings.EqualFold(k, fromCurr) {
fromCurr = v
break
// Improved normalization function
normalizeCurrency := func(input string) string {
clean := strings.TrimSpace(strings.ToLower(input))
clean = strings.Join(strings.Fields(clean), " ")
// Direct map
if mapped, ok := currencyMap[clean]; ok {
return mapped
}
// Fuzzy match: for last word
words := strings.Fields(clean)
for i := 0; i < len(words); i++ {
sub := strings.Join(words[i:], " ")
if mapped, ok := currencyMap[sub]; ok {
return mapped
}
}
// Fuzzy match: try reducing phrase from the end
for i := len(words) - 1; i >= 0; i-- {
sub := strings.Join(words[:i], " ")
if mapped, ok := currencyMap[sub]; ok {
return mapped
}
}
// Handle currency symbols at the end (e.g. "100usd")
if len(clean) > 1 {
if symbol, ok := currencyMap[string(clean[len(clean)-1])]; ok {
return symbol
}
}
// Currency code fallback
if len(clean) == 3 {
upper := strings.ToUpper(clean)
exchangeCacheMutex.RLock()
defer exchangeCacheMutex.RUnlock()
if _, exists := exchangeRates[upper]; exists {
return upper
}
}
return strings.ToUpper(input)
}
toCurr := strings.ToUpper(matches[3])
if mapped, ok := currencyMap[toCurr]; ok {
toCurr = mapped
} else if len(toCurr) > 3 {
// Try to match longer names
for k, v := range currencyMap {
if strings.EqualFold(k, toCurr) {
toCurr = v
break
}
}
fromCurr := normalizeCurrency(matches[2])
toCurr := normalizeCurrency(matches[3])
// Validate currencies exist in exchange rates
exchangeCacheMutex.RLock()
defer exchangeCacheMutex.RUnlock()
if _, fromExists := exchangeRates[fromCurr]; !fromExists {
return 0, "", "", false
}
if _, toExists := exchangeRates[toCurr]; !toExists {
return 0, "", "", false
}
return amount, fromCurr, toCurr, true