export const containsChinese = (text) =>  /[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\u3000-\u303F\uFF00-\uFFEF]/.test(text);
const containsKorean = (text) => /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/.test(text);
const containsJapanese = (text) => /[\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]/.test(text);
const containsArabic = (text) => /[\u0600-\u06FF\u0750-\u077F]/.test(text);
const containsCyrillic = (text) => /[\u0400-\u04FF]/.test(text);
const containsHebrew = (text) => /[\u0590-\u05FF\uFB1D-\uFB4F]/.test(text);
const containsThai = (text) => /[\u0E00-\u0E7F]/.test(text);
const containsDevanagari = (text) => /[\u0900-\u097F]/.test(text);


// Define punctuation ranges for various languages
const punctuationMap = {
    chineseText: /[\u3000-\u303F\uFF00-\uFFEF]/,  // Chinese punctuation
    japaneseText: /[\u3000-\u303F\uFF00-\uFFEF\u30FB\uFF1A\uFF1B\uFF0C\uFF0E\uFF1F\uFF01]/,  // Japanese-specific punctuation
    koreanText: /[\u3000-\u303F\uFF00-\uFFEF\u1160-\u11FF]/,  // Korean-specific punctuation
    arabicText: /[\u060C\u061B\u061F]/,  // Arabic punctuation marks: comma, semicolon, question mark
    cyrillicText: /[.,!?]/,  // Basic punctuation for Cyrillic
    hebrewText: /[.,!?]/,  // Basic punctuation for Hebrew
    thaiText: /[ฯ๏]/,  // Thai punctuation marks
    devanagariText: /[।॥]/,  // Devanagari-specific punctuation
    other: /[.,!?]/  // Basic punctuation for other scripts
};

// General function to check the language or script of a character
const getScript = (char) => {
    if (containsChinese(char)) return "chineseText";
    if (containsKorean(char)) return "koreanText";
    if (containsJapanese(char)) return "japaneseText";
    if (containsArabic(char)) return "arabicText";
    if (containsCyrillic(char)) return "cyrillicText";
    if (containsHebrew(char)) return "hebrewText";
    if (containsThai(char)) return "thaiText";
    if (containsDevanagari(char)) return "devanagariText";
    return 'other';
};

// Updated function to split text by language/script and add spaces after punctuation
export const splitText = (text) => {
    const parts = [];
    let currentPart = '';

    // Determine initial script of the first character
    let currentScript = getScript(text[0]);

    Array.from(text).forEach((char) => {
        const script = getScript(char);

        // If the script changes, push the current part to the parts array
        if (script !== currentScript && ![',', ' ', '!', '?', '.', ':'].includes(char)) {

            parts.push({ text: currentPart, script: currentScript });
            currentPart = '';
            currentScript = script;
        }

        currentPart += char;

        // Check if the character is punctuation and belongs to the current script
        if (punctuationMap[currentScript] && punctuationMap[currentScript].test(char)) {
            currentPart += ' ';  // Add a space after the punctuation
        }
    });

    // Add the last part
    if (currentPart) {
        parts.push({ text: currentPart, script: currentScript });
    }

    return parts;
};
