
/*
* 处理分词
* 根据整句分词，将分词结果放入对应的段落
*/
export const pauseInSentence = (paragraph, words) => {
    // 过滤空格, 拼接标点符号
    words = words.filter(f => f.word !== ' ')
    const newWords = []
    words.forEach((word, index) => {

        const lastword = words[index - 1]
        const lastIsNumber = !(String(Number(lastword?.word[lastword.word.length - 1])) === 'NaN')

        if ([',', '!', "'", ";", "?", ":", '"', '+'].includes(word.word) && newWords.length) {
            newWords[newWords.length - 1].word += word.word
        } else if (newWords[newWords.length - 1]?.word.endsWith("'") && newWords.length) {

            newWords[newWords.length - 1].word += word.word
            newWords[newWords.length - 1].end = word.end
        } else if (['.'].includes(word.word) && (lastIsNumber) && newWords.length) {

            newWords[newWords.length - 1].word += word.word
            newWords[newWords.length - 1].end = word.end
        } else if (lastword && lastword?.word[(lastword?.word?.length ?? 0) - 1] === '+' || lastword?.word[(lastword?.word?.length ?? 0) - 1] === ':') {

            newWords[newWords.length - 1].word += word.word

        } else if
            (!(String(Number(word?.word[0])) === 'NaN') && ['.'].includes(lastword?.word) && newWords.length) {
            newWords[newWords.length - 1].word += word.word
            newWords[newWords.length - 1].end = word.end
        } else if (['.'].includes(word.word) && newWords.length) {
            newWords[newWords.length - 1].word += word.word
        }
        else {
            newWords.push(JSON.parse(JSON.stringify(word)))
        }

    })

    words = newWords
    // ----------------------------

    const useParagraph = []
    paragraph.forEach((item, index) => {

        // 先过滤, 再过滤!
        const itemList = item.split(',');
        itemList.forEach((word, index) => {
            if (itemList.length !== index + 1) itemList[index] += ','
            const newItem = itemList[index]
            const itemListByMark = newItem.split('!')
            itemListByMark.forEach((itemByMark, index) => {
                if (itemListByMark.length !== index + 1) itemListByMark[index] += '!'
                if (itemListByMark[index].length > 20) {
                    const lastItemList = itemListByMark[index].split(' ')
                    let lastSentence = ''
                    lastItemList.forEach((lastItem, index) => {
                        if (lastItemList.length !== index + 1) lastItemList[index] += ' '

                        lastSentence += lastItemList[index]

                        if ((index + 1) % 5 === 0) {
                            lastSentence && useParagraph.push(lastSentence)
                            lastSentence = ''
                        }

                        if (lastItemList.length === (index + 1)) {
                            lastSentence && useParagraph.push(lastSentence)
                            lastSentence = ''
                        }

                    })

                } else {
                    useParagraph.push(itemListByMark[index])

                }


            })

        })

    })
    paragraph = useParagraph

    let text = ''
    let paragraphIndex = 1

    // 首先分词
    words.forEach((word) => {
        // 暂时不在分词处理该场景
        // const lastwords = words[index - 1]?.word
        // const lastWordsEnd = lastwords?.[lastwords.length - 1]
        // text += ((text && !['+'].includes(lastWordsEnd) ? ' ' : '') + word.word);
        text += ((text ? ' ' : '') + word.word);
        if (paragraph[paragraphIndex - 1]?.includes(text)) {
            word.paragraph = paragraphIndex
        } else {
            paragraphIndex += 1
            word.paragraph = paragraphIndex
            text = ''
        }
    })

    // 补齐段落
    words.forEach((word, index) => {
        if (!word.paragraph) {
            word.paragraph = words?.[index + 1]?.paragraph
        }
    })

    return words
}

/**
 * 简单分词，3个次为一行，每两行为一个段落
 * @param words
 * @param count
 * @returns
 */
export const easyPauseInSentence = (words, count = 3) => {
    let paragraphIndex = 1
    const outWords = []
    words.forEach((word, index) => {
        word.paragraph = paragraphIndex
        const outWordsItem = outWords.find((item) => item.paragraph === paragraphIndex)
        if (outWordsItem) {
            outWordsItem.word += ' ' + word.word
        } else {
            outWords.push(word)
        }

        if ((index + 1) % count === 0) {
            outWordsItem.end = word.end

            paragraphIndex += 1
        }
    })

    paragraphIndex = 1
    outWords.forEach((word, index) => {
        word.paragraph = paragraphIndex
        if ((index + 1) % 2 === 0) {
            paragraphIndex += 1
        }
    })

    words = outWords

    return outWords

}
