From e2f17077232d7fa5d6c1214b8eb5d14492a7c246 Mon Sep 17 00:00:00 2001 From: partisan Date: Sun, 29 Jun 2025 09:41:00 +0200 Subject: [PATCH] Improved IA currency detection and matching --- ia-currency.go | 220 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 195 insertions(+), 25 deletions(-) diff --git a/ia-currency.go b/ia-currency.go index c1c93c1..2f1de37 100644 --- a/ia-currency.go +++ b/ia-currency.go @@ -129,8 +129,99 @@ func GetExchangeRate(from, to string) (float64, bool) { // ParseCurrencyConversion detects and processes currency conversion queries func ParseCurrencyConversion(query string) (float64, string, string, bool) { - // Match patterns like: "100 USD to EUR", "50 eur in gbp", "¥1000 to USD" - re := regexp.MustCompile(`(?i)([\d,]+(?:\.\d+)?)\s*([$€£¥₩₹₽A-Za-z]{1,6})\s+(?:to|in|➞|→)\s+([$€£¥₩₹₽A-Za-z]{1,6})`) + // Main conversion phrases + conversionPhrases := []string{ + // Universal/math + "➞", "→", "⇒", ">", "->", "=", "≈", "~", ":", "≡", + // English + "to", "in", "into", "as", "equals", "equal to", "equals to", "is", "becomes", "be", "makes", "converted to", "convert to", "convert into", "converted into", + "exchange for", "exchanged for", "value in", "as currency", "convert", "equivalent to", "same as", "is equal to", ">", "gives", "makes", "result is", "returns", "will be", "equals:", "is equivalent to", "≈", "~", ":", + // German (DE) + "auf", "in", "zu", "umrechnen in", "umrechnen zu", "als", "gleich", "ist", "ist gleich", "umwandeln in", "wird zu", "ergibt", "macht", "ist", "resultiert in", "gleichwertig mit", + // Spanish (ES) + "en", "a", "como", "igual a", "es", "es igual a", "es igual", "convertir a", "cambiar a", "valor en", "convierte en", "devuelve", "será", "equivale a", "es equivalente a", + // French (FR) + "vers", "en", "comme", "égal à", "est", "c'est", "convertir en", "changer en", "valeur en", "équivaut à", "sera", "fait", "rend", "est égal à", "équivalent à", + // Italian (IT) + "a", "in", "come", "uguale a", "è", "convertire in", "cambiare in", "valore in", "sarà", "fa", "equivale a", "è uguale a", + // Portuguese (PT/BR) + "para", "em", "como", "igual a", "é", "converter para", "trocar por", "valor em", "converte em", "vai ser", "faz", "equivale a", "é igual a", "é equivalente a", + // Dutch (NL) + "naar", "in", "als", "is gelijk aan", "is", "wordt", "omzetten naar", "waarde in", "gelijk aan", "is hetzelfde als", + // Czech (CS) + "na", "do", "jako", "rovná se", "je", "převést na", "výměna za", "hodnota v", "přepočet", "bude", "rovná", "je to", "je rovno", "je stejné jako", + // Slovak (SK) + "na", "do", "ako", "rovná sa", "je", "previesť na", "výměna za", "hodnota v", "prerátať", "bude", "rovná", "je to", "je rovné", "je rovnaké ako", + // Polish (PL) + "na", "w", "jako", "równa się", "jest", "przelicz na", "wymień na", "wartość w", "przelicza się na", "będzie", "to jest", "jest równy", "jest taki sam jak", + // Russian (RU) + "на", "в", "как", "равно", "есть", "конвертировать в", "обменять на", "значение в", "равняется", "будет", "это", "такое же как", + // Ukrainian (UA) + "на", "у", "як", "дорівнює", "є", "конвертувати у", "обміняти на", "значення в", "буде", "це", "таке саме як", + // Croatian / Serbian / Bosnian / Slovenian (HR/SR/BS/SL) + "na", "u", "za", "kao", "jednako", "je", "pretvori u", "zamijeniti za", "vrijednost u", "preračunaj u", "biti", "to je", "jednako kao", "je isto kao", + "v", "kot", "je enako", "pretvoriti v", "zamenjati za", "vrednost v", "je isto kao", "je enakovredno", + // Bulgarian (BG) + "на", "в", "като", "равно на", "е", "преобразувай в", "обмени на", "стойност в", "ще бъде", "това е", "равностойно на", + // Turkish (TR) + "için", "olarak", "eşittir", "bu", "dönüştür to", "değiştir to", "değer olarak", "olur", "eşit", "bu olur", "aynı olarak", + // Greek (EL) + "σε", "ως", "ίσον", "είναι", "μετατροπή σε", "ανταλλαγή με", "τιμή σε", "θα είναι", "αυτό είναι", "ισοδυναμεί με", "ίσο με", + // Chinese (Simplified and Traditional, ZH) + "到", "变为", "換成", "转换为", "等于", "等於", "是", "为", "結果是", "相等於", "等同於", "一樣", + // Japanese (JA) + "に", "として", "等しい", "は", "に変換", "に交換", "の値", "は", "結果は", "となる", "同じ", "等価", "等しく", + // Korean (KO) + "으로", "같이", "같다", "이다", "로 변환", "교환하다", "값", "이 된다", "와 같다", "같음", "동일하다", + // Arabic (AR) + "إلى", "الى", "في", "كـ", "يساوي", "هو", "تحويل إلى", "قيمة في", "يصبح", "يساوي نفس", "تعادل", "تساوي", + // Hebrew (HE) + "ל", "ב", "בתור", "שווה ל", "הוא", "המר ל", "ערך ב", "יהיה", "אותו הדבר כמו", "זהה ל", + // Romanian (RO) + "la", "în", "ca", "egal cu", "este", "converti la", "schimbă în", "valoare în", "va fi", "este egal cu", + // Hungarian (HU) + "ra", "re", "ba", "be", "mint", "egyenlő", "az", "átvált", "értéke", "lesz", "ugyanaz mint", + // Swedish (SE) + "till", "i", "som", "är", "är lika med", "omvandla till", "värde i", "blir", "är samma som", + // Danish (DK) + "til", "i", "som", "er", "er lig med", "konverter til", "værdi i", "bliver", "er det samme som", + // Norwegian (NO) + "til", "i", "som", "er", "er lik", "konverter til", "verdi i", "blir", "er det samme som", + // Finnish (FI) + "ksi", "in", "kuin", "on", "on yhtä kuin", "muunna", "arvo", "tulee olemaan", "sama kuin", + // Estonian (EE) + "ks", "sisse", "nagu", "on", "on võrdne", "teisendada", "väärtus", "saab olema", "sama mis", + // Latvian (LV) + "uz", "iekš", "kā", "ir", "ir vienāds ar", "konvertēt uz", "vērtība", "būs", "tāpat kā", + // Lithuanian (LT) + "į", "kaip", "yra", "yra lygus", "konvertuoti į", "vertė", "bus", "tas pats kaip", + // Persian (FA) + "به", "در", "مثل", "برابر با", "است", "تبدیل به", "ارزش در", "خواهد بود", "همانند", + // Hindi (HI) + "को", "में", "के रूप में", "बराबर", "है", "में बदलें", "मूल्य में", "होगा", "के समान", + // Thai (TH) + "ไปที่", "ใน", "เป็น", "เท่ากับ", "คือ", "แปลงเป็น", "ค่าใน", "จะเป็น", "เท่ากัน", + // Indonesian (ID) + "ke", "dalam", "sebagai", "sama dengan", "adalah", "konversi ke", "nilai dalam", "akan menjadi", "sama dengan", + // Vietnamese (VI) + "thành", "trong", "là", "bằng", "là", "chuyển đổi thành", "giá trị trong", "sẽ là", "tương đương với", + // Malay (MS) + "kepada", "dalam", "sebagai", "sama dengan", "ialah", "tukar ke", "nilai dalam", "akan jadi", "setara dengan", + // Filipino/Tagalog (TL) + "sa", "sa loob ng", "bilang", "katumbas ng", "ay", "i-convert sa", "halaga sa", "magiging", "pareho sa", + } + + // Build the OR group for all currency conversion phrases to use in the regex pattern + var orGroup strings.Builder + for i, phrase := range conversionPhrases { + if i > 0 { + orGroup.WriteString("|") + } + // escape for regex with special symbols: + orGroup.WriteString(regexp.QuoteMeta(phrase)) + } + regexPattern := fmt.Sprintf(`(?i)([\d,]+(?:\.\d+)?)\s*([^\d,]+?)\s+(?:%s)\s+([^\d,]+)`, orGroup.String()) + re := regexp.MustCompile(regexPattern) matches := re.FindStringSubmatch(query) if len(matches) < 4 { return 0, "", "", false @@ -145,35 +236,114 @@ func ParseCurrencyConversion(query string) (float64, string, string, bool) { // Normalize currency symbols currencyMap := map[string]string{ - "$": "USD", "€": "EUR", "£": "GBP", "¥": "JPY", "₩": "KRW", "₹": "INR", "₽": "RUB", - "usd": "USD", "eur": "EUR", "gbp": "GBP", "jpy": "JPY", "krw": "KRW", "inr": "INR", "rub": "RUB", - "dollar": "USD", "euro": "EUR", "pound": "GBP", "yen": "JPY", "won": "KRW", "rupee": "INR", "ruble": "RUB", + // Major Global Currencies + "$": "USD", "usd": "USD", "dollar": "USD", "dollars": "USD", "buck": "USD", "bucks": "USD", "us dollar": "USD", "american dollar": "USD", "freedom units": "USD", + "€": "EUR", "eur": "EUR", "euro": "EUR", "euros": "EUR", + "£": "GBP", "gbp": "GBP", "pound": "GBP", "pounds": "GBP", "sterling": "GBP", "quid": "GBP", "pound sterling": "GBP", + "¥": "JPY", "jpy": "JPY", "yen": "JPY", "cn¥": "CNY", // Handle ¥ ambiguity with CN¥ for Chinese Yuan + "₩": "KRW", "krw": "KRW", "won": "KRW", "korean won": "KRW", + "₹": "INR", "inr": "INR", "rupee": "INR", "rupees": "INR", "indian rupee": "INR", + "₽": "RUB", "rub": "RUB", "ruble": "RUB", "rubles": "RUB", "russian ruble": "RUB", + + // Americas + "c$": "CAD", "cad": "CAD", "canadian dollar": "CAD", "loonie": "CAD", + "a$": "AUD", "aud": "AUD", "australian dollar": "AUD", "aussie dollar": "AUD", + "nz$": "NZD", "nzd": "NZD", "new zealand dollar": "NZD", "kiwi": "NZD", "kiwi dollar": "NZD", + "r$": "BRL", "brl": "BRL", "real": "BRL", "reais": "BRL", "brazilian real": "BRL", + "mx$": "MXN", "mxn": "MXN", "mexican peso": "MXN", "mexican pesos": "MXN", + "col$": "COP", "cop": "COP", "colombian peso": "COP", + "s/": "PEN", "pen": "PEN", "sol": "PEN", "soles": "PEN", "peruvian sol": "PEN", + "clp$": "CLP", "clp": "CLP", "chilean peso": "CLP", + "arg$": "ARS", "ars": "ARS", "argentine peso": "ARS", + + // Europe & CIS + "chf": "CHF", "fr": "CHF", "swiss franc": "CHF", "franc suisse": "CHF", + "sek": "SEK", "kr": "SEK", "swedish krona": "SEK", "swedish kronor": "SEK", + "nok": "NOK", "norwegian krone": "NOK", "norwegian kroner": "NOK", + "dkk": "DKK", "danish krone": "DKK", "danish kroner": "DKK", + "zł": "PLN", "pln": "PLN", "zloty": "PLN", "polish zloty": "PLN", + "tl": "TRY", "try": "TRY", "turkish lira": "TRY", "türk lirası": "TRY", "₺": "TRY", + "huf": "HUF", "ft": "HUF", "forint": "HUF", "hungarian forint": "HUF", + "czk": "CZK", "kč": "CZK", "czech koruna": "CZK", + "ron": "RON", "lei": "RON", "romanian leu": "RON", + "bgn": "BGN", "лв": "BGN", "bulgarian lev": "BGN", + "uah": "UAH", "₴": "UAH", "hryvnia": "UAH", "ukrainian hryvnia": "UAH", + "kzt": "KZT", "₸": "KZT", "tenge": "KZT", "kazakhstani tenge": "KZT", + + // Asia/Pacific + "cny": "CNY", "rmb": "CNY", "yuan": "CNY", "renminbi": "CNY", "chinese yuan": "CNY", + "hk$": "HKD", "hkd": "HKD", "hong kong dollar": "HKD", + "s$": "SGD", "sgd": "SGD", "singapore dollar": "SGD", + "nt$": "TWD", "twd": "TWD", "taiwan dollar": "TWD", "new taiwan dollar": "TWD", + "฿": "THB", "thb": "THB", "baht": "THB", "thai baht": "THB", + "rp": "IDR", "idr": "IDR", "rupiah": "IDR", "indonesian rupiah": "IDR", + "₱": "PHP", "php": "PHP", "philippine peso": "PHP", + "rm": "MYR", "myr": "MYR", "ringgit": "MYR", "malaysian ringgit": "MYR", + "₫": "VND", "vnd": "VND", "dong": "VND", "vietnamese dong": "VND", + "₭": "LAK", "lak": "LAK", "kip": "LAK", "lao kip": "LAK", + "៛": "KHR", "khr": "KHR", "riel": "KHR", "cambodian riel": "KHR", + + // Middle East & Africa + "₪": "ILS", "ils": "ILS", "shekel": "ILS", "new israeli shekel": "ILS", + "﷼": "SAR", "sr": "SAR", "sar": "SAR", "riyal": "SAR", "saudi riyal": "SAR", + "د.إ": "AED", "dh": "AED", "aed": "AED", "dirham": "AED", "uae dirham": "AED", + "egp": "EGP", "e£": "EGP", "egyptian pound": "EGP", + "zar": "ZAR", "r": "ZAR", "rand": "ZAR", "south african rand": "ZAR", + "₦": "NGN", "ngn": "NGN", "naira": "NGN", "nigerian naira": "NGN", } - fromCurr := strings.ToUpper(matches[2]) - if mapped, ok := currencyMap[fromCurr]; ok { - fromCurr = mapped - } else if len(fromCurr) > 3 { - // Try to match longer names - for k, v := range currencyMap { - if strings.EqualFold(k, fromCurr) { - fromCurr = v - break + // Improved normalization function + normalizeCurrency := func(input string) string { + clean := strings.TrimSpace(strings.ToLower(input)) + clean = strings.Join(strings.Fields(clean), " ") + // Direct map + if mapped, ok := currencyMap[clean]; ok { + return mapped + } + // Fuzzy match: for last word + words := strings.Fields(clean) + for i := 0; i < len(words); i++ { + sub := strings.Join(words[i:], " ") + if mapped, ok := currencyMap[sub]; ok { + return mapped } } + // Fuzzy match: try reducing phrase from the end + for i := len(words) - 1; i >= 0; i-- { + sub := strings.Join(words[:i], " ") + if mapped, ok := currencyMap[sub]; ok { + return mapped + } + } + // Handle currency symbols at the end (e.g. "100usd") + if len(clean) > 1 { + if symbol, ok := currencyMap[string(clean[len(clean)-1])]; ok { + return symbol + } + } + // Currency code fallback + if len(clean) == 3 { + upper := strings.ToUpper(clean) + exchangeCacheMutex.RLock() + defer exchangeCacheMutex.RUnlock() + if _, exists := exchangeRates[upper]; exists { + return upper + } + } + return strings.ToUpper(input) } - toCurr := strings.ToUpper(matches[3]) - if mapped, ok := currencyMap[toCurr]; ok { - toCurr = mapped - } else if len(toCurr) > 3 { - // Try to match longer names - for k, v := range currencyMap { - if strings.EqualFold(k, toCurr) { - toCurr = v - break - } - } + fromCurr := normalizeCurrency(matches[2]) + toCurr := normalizeCurrency(matches[3]) + + // Validate currencies exist in exchange rates + exchangeCacheMutex.RLock() + defer exchangeCacheMutex.RUnlock() + if _, fromExists := exchangeRates[fromCurr]; !fromExists { + return 0, "", "", false + } + if _, toExists := exchangeRates[toCurr]; !toExists { + return 0, "", "", false } return amount, fromCurr, toCurr, true