How do I get the font color from a piece of HTML source code?
I have a piece of HTML source like this:
<FONT color=#5a6571>Beverly Mitchell</FONT> <FONT color=#5a6571>Shawnee Smith</FONT> <FONT color=#5a6571>Glenn Plummer</FONT> <NOBR>more >></NOBR>
I tried to retrieve the "color" value, like this:
MSHTML::IHTMLDocument2Ptr htmDoc1 = NULL;
SAFEARRAY *psaStrings1 = SafeArrayCreateVector(VT_VARIANT, 0, 1);
CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, (void**) &htmDoc1);
VARIANT *param1 = NULL;
HRESULT hr = SafeArrayAccessData(psaStrings1, 开发者_高级运维(LPVOID*)¶m1);
param1->vt = VT_BSTR;
param1->bstrVal = SysAllocString(varSrc1.bstrVal);
hr = SafeArrayUnaccessData(psaStrings1);
hr = htmDoc1->write(psaStrings1);
MSHTML::IHTMLElementPtr pElemBody1 = NULL;
MSHTML::IHTMLDOMNodePtr pHTMLBodyDOMNode1 =NULL;
hr = htmDoc1->get_body(&pElemBody1);
if(SUCCEEDED(hr))
{
hr = pElemBody1->QueryInterface(IID_IHTMLDOMNode,(void**)&pHTMLBodyDOMNode1);
if(SUCCEEDED(hr))
{
ProcessDomNodeSmartWrapper(pHTMLBodyDOMNode1, ProcTgtTagStrVec);
}
}
long lLength = 0;
MSHTML::IHTMLElementCollectionPtr pElemColl1 = NULL;
MSHTML::IHTMLElementPtr pChElem1 = NULL;
MSHTML::IHTMLStylePtr pStyle1 = NULL;
IDispatchPtr ppvdisp1 = NULL;
hr = htmDoc1->get_all(&pElemColl1);
hr = pElemColl1->get_length(&lLength);
for(long i = 0; i < lLength; i++)
{
_variant_t name(i);
_variant_t index(i);
ppvdisp1 = pElemColl1->item(name, index);
if(ppvdisp1 && SUCCEEDED(hr))
{
hr = ppvdisp1->QueryInterface(IID_IHTMLElement, (void **)&pChElem1);
if(pChElem1 && SUCCEEDED(hr))
{
BSTR bstrTagName = NULL;
pChElem1->get_tagName(&bstrTagName);
hr = pChElem1->get_style(&pStyle1);
if(pStyle1 && SUCCEEDED(hr))
{
_variant_t varFtCol;
hr = pStyle1->get_color(&varFtCol);
if(hr = S_OK && varFtCol)
{
hmStyles1[wstring(varFtCol.bstrVal)] = L"FontColor";
}
}
if(bstrTagName)
SysFreeString(bstrTagName);
} // if pStyle && SUCCEEDED(hr)
}//if ppvdisp && SUCCEEDED(hr)
}//for
But I can never get the "color" value - varFtCol.bstrVal
is a bad pointer when I debug the program. This is what varFtCol
showed when I debug the program:
- varFtCol {???} _variant_t - tagVARIANT BSTR = 0x00000000 tagVARIANT vt 8 unsigned short - BSTR 0x00000000 wchar_t * CXX0030: Error: expression cannot be evaluated
#5a6571
is a hex color represents for RGB value of (90,101,113).
How can I get this color info?
You shouldn't be getting style on pChElem1 because the color is not part of style in your case. Color is part of Font element.
Instead you must call pChElem1->getAttribute("color" . . .)
This will return #5a6571
The following code is in MFC. But you can easily convert to regular Win32 if you are not using MFC.
COLORREF GetColorFromHexString( CString szColor )
{
TCHAR *szScan;
CString strTemp;
CString strColor = szColor;
long lRR = 0,lGG = 0,lBB = 0;
//first we will remove # characters which come from XML document
strColor.TrimLeft(_T('#'));
strColor.TrimRight(_T('#'));
//it should be of the form RRGGBB
if (strColor.GetLength() == 6) {
//get red color, from the hexadecimal string
strTemp = strColor.Left(2);
lRR = _tcstol(LPCTSTR(strTemp),&szScan,16);
//get green color
strTemp = strColor.Mid(2,2);
lGG = _tcstol(LPCTSTR(strTemp),&szScan,16);
//get blue color
strTemp = strColor.Right(2);
lBB = _tcstol(LPCTSTR(strTemp),&szScan,16);
}
return RGB(lRR,lGG,lBB);
}
According to the MSDN documentation, IHTMLStyle::get_color may return either a BSTR or an integer value in the variant. Have you tried assigning varFtCol into an integer value and examining that result?
const int colorValue = static_cast<int>(varFtCol);
As a recommendation, when working with _variant_t, it is usually best to use the built-in casting operators than to direct access the members of the union itself.
精彩评论