Substring text with HTML tags in Javascript

2023-03-05 17:48 问答作者：

Do you have solution to substring text with HTML tags in Javascript?

For example:

var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.'

html开发者_运维问答_substr(str, 20)
// return Lorem ipsum <a href="#">dolor <strong>si</strong></a>

html_substr(str, 30)
// return Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co

Taking into consideration that parsing html with regex is a bad idea, here is a solution that does just that :)

EDIT: Just to be clear: This is not a valid solution, it was meant as an exercise that made very lenient assumptions about the input string, and as such should be taken with a grain of salt. Read the link above and see why parsing html with regex can never be done.

function htmlSubstring(s, n) {
    var m, r = /<([^>\s]*)[^>]*>/g,
        stack = [],
        lasti = 0,
        result = '';

    //for each tag, while we don't have enough characters
    while ((m = r.exec(s)) && n) {
        //get the text substring between the last tag and this one
        var temp = s.substring(lasti, m.index).substr(0, n);
        //append to the result and count the number of characters added
        result += temp;
        n -= temp.length;
        lasti = r.lastIndex;

        if (n) {
            result += m[0];
            if (m[1].indexOf('/') === 0) {
                //if this is a closing tag, than pop the stack (does not account for bad html)
                stack.pop();
            } else if (m[1].lastIndexOf('/') !== m[1].length - 1) {
                //if this is not a self closing tag than push it in the stack
                stack.push(m[1]);
            }
        }
    }

    //add the remainder of the string, if needed (there are no more tags in here)
    result += s.substr(lasti, n);

    //fix the unclosed tags
    while (stack.length) {
        result += '</' + stack.pop() + '>';
    }

    return result;

}

Example: http://jsfiddle.net/danmana/5mNNU/

Note: patrick dw's solution may be safer regarding bad html, but I'm not sure how well it handles white spaces.

Usage:

var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.';

var res1 = html_substr( str, 20 );
var res2 = html_substr( str, 30 );

alert( res1 ); // Lorem ipsum <a href="#">dolor <strong>si</strong></a>
alert( res2 ); // Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co

Example: http://jsfiddle.net/2ULbK/4/

Function:

function html_substr( str, count ) {

    var div = document.createElement('div');
    div.innerHTML = str;

    walk( div, track );

    function track( el ) {
        if( count > 0 ) {
            var len = el.data.length;
            count -= len;
            if( count <= 0 ) {
                el.data = el.substringData( 0, el.data.length + count );
            }
        } else {
            el.data = '';
        }
    }

    function walk( el, fn ) {
        var node = el.firstChild;
        do {
            if( node.nodeType === 3 ) {
                fn(node);
                    //          Added this >>------------------------------------<<
            } else if( node.nodeType === 1 && node.childNodes && node.childNodes[0] ) {
                walk( node, fn );
            }
        } while( node = node.nextSibling );
    }
    return div.innerHTML;
}

it is solution for single tags

function subStrWithoutBreakingTags(str, start, length) {
    var countTags = 0;
    var returnString = "";
    var writeLetters = 0;
    while (!((writeLetters >= length) && (countTags == 0))) {
        var letter = str.charAt(start + writeLetters);
        if (letter == "<") {
            countTags++;
        }
        if (letter == ">") {
            countTags--;
        }
        returnString += letter;
        writeLetters++;
    }
    return returnString;
}

let str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.'
let plainText = htmlString.replace(/<[^>]+>/g, '');

Extract plain text with above given regular expression then use JS String based ".substr()" function for desired results

Use something similar to = str.replace(/<[^>]*>?/gi, '').substr(0, 20);
I've created an example at: http://fiddle.jshell.net/xpW9j/1/

Javascript has a sub-string method. It makes no difference if the string contains html.

see http://www.w3schools.com/jsref/jsref_substr.asp

继续阅读：javascript substring tags

Substring text with HTML tags in Javascript

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？