libxml2的xpath检索中文

Posted CppSkill

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了libxml2的xpath检索中文相关的知识,希望对你有一定的参考价值。

ZC: xmlXPathEvalExpression(...) 当 xpath的字符串中 包含中文的时候,返回NULL,暂时不知道该怎么处理了...

ZC: 下面是测试的一些代码/文件,留着以后再研究吧...

 

1、Qt5.3.2

2、XML 的节点的属性中包含中文(XML保存成 UTF-8的格式)

<?xml version="1.0" encoding="utf-8" ?>
<root>
    
    <newNode2>content changed</newNode2>
    <newNode3 newAttr="YES">newNode3 content</newNode3>
    <ceshi attribute="测试">测试一下</ceshi>
    <node2 attribute="no">NODE CONTENT</node2>
    
    <son>
        <grandson>This is a grandson node</grandson>
    <newGrandSon>new content</newGrandSon></son>
</root>

 

3、测试代码:

  ZC: 尝试了 使用 UTF-8的字符串、本地编码格式的字符串,都解析不到 我要的节点...

#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
//#include <iconv.h>

#include <QDebug>
#include <QTextCodec>


MainWindow::MainWindow(QWidget *parent) :
    QMainWindow(parent),
    ui(new Ui::MainWindow)
{
    ui->setupUi(this);
}

MainWindow::~MainWindow()
{
    delete ui;
}





int code_convert(char* from_charset, char* to_charset, char* inbuf,
               int inlen, char* outbuf, int outlen)
{
    iconv_t cd;
    char** pin = &inbuf;
    char** pout = &outbuf;
    cd = iconv_open(to_charset,from_charset);
    if(cd == 0)
       return -1;
    memset(outbuf,0,outlen);
    if(iconv(cd,(const char**)pin,(unsigned int *)&inlen,pout,(unsigned int*)&outlen)
       == -1)
       return -1;
    iconv_close(cd);
    return 0;
}

//UNICODE码转为GB2312码
//成功则返回一个动态分配的char*变量,需要在使用完毕后手动free,失败返回NULL
char* u2g(char *inbuf)
{
    int nOutLen = 2 * strlen(inbuf) - 1;
    char* szOut = (char*)malloc(nOutLen);
    if (-1 == code_convert("utf-8","gb2312",inbuf,strlen(inbuf),szOut,nOutLen))
    {
       free(szOut);
       szOut = NULL;
    }
    return szOut;
}

//GB2312码转为UNICODE码
//成功则返回一个动态分配的char*变量,需要在使用完毕后手动free,失败返回NULL
char* g2u(char *inbuf)
{
    int nOutLen = 2 * strlen(inbuf) - 1;
    char* szOut = (char*)malloc(nOutLen);
    if (-1 == code_convert("gb2312","utf-8",inbuf,strlen(inbuf),szOut,nOutLen))
    {
       free(szOut);
       szOut = NULL;
    }
    return szOut;
}

xmlXPathObject* Get_NodeSet(xmlDoc* _pDoc, const xmlChar *szXpath)
{
    xmlXPathContextPtr context;
    xmlXPathObjectPtr result;

    context = xmlXPathNewContext(_pDoc);
    if (context == NULL)
    {
        //printf("context is NULL\n");
        return NULL;
    }

    result = xmlXPathEvalExpression(szXpath, context);
    xmlXPathFreeContext(context);
    if (result == NULL)
    {
        //printf("xmlXPathEvalExpression return NULL\n");
        return NULL;
    }

    if (xmlXPathNodeSetIsEmpty(result->nodesetval))
    {
        xmlXPathFreeObject(result);
        //printf("nodeset is empty\n");
        return NULL;
    }

    return result;
}

void MainWindow::on_pbtnXPath_clicked()
{
    xmlDocPtr doc = NULL;             //定义解析文档指针
    xmlNodePtr curNode = NULL;         //定义结点指针(你需要它为了在各个结点间移动)

    char *szDocName = "F:/ZZ_Qt5/Qt532_vs2010/build-libxml2_zz-z-Debug/debug/ChangedXml.xml";

    doc = xmlReadFile(szDocName, "GB2312", XML_PARSE_RECOVER);  //解析文件
    //doc = xmlReadFile(szDocName, "UTF-8", XML_PARSE_RECOVER);

    if (NULL == doc)
    {
        qDebug() << "Document not parsed successfully.";
        return;
    }

    char* pcCeShi = "测试";
    QTextCodec *pCodec = QTextCodec::codecForName("GBK");
    QString strCeShi = pCodec->toUnicode(pcCeShi);

    //QString str = "/root/node2[@attribute=‘no‘]";
    QString str = "/root/node2[@attribute=‘"+strCeShi+"‘]";
    QByteArray ba = str.toUtf8();
    //QByteArray ba = str.toLocal8Bit();
    char pc[256] = {0};
    memcpy(&pc[0], ba.data(), ba.length());
    //pc[ba.length()] = ‘\0‘;

    char *p0 = "/root/node2[@attribute=‘测试‘]";
    char* p1 = g2u(p0);
    char pc1[256] = {0};
    memcpy(&pc1[0], p1, strlen(p1));

    //xmlChar *szXpath =BAD_CAST ("/root/node2[@attribute=‘no‘]");
    xmlChar *szXpath = BAD_CAST (p1);
    xmlXPathObjectPtr app_result = Get_NodeSet(doc, szXpath);  //查询并得到结果

    if (NULL == app_result)
    {
        qDebug() << "app_result is NULL";
        return;
    }
    xmlChar *szValue = NULL;
    if(app_result)
    {
        xmlNodeSetPtr nodeset = app_result->nodesetval;
        for (int i = 0; i < nodeset->nodeNr; i++)
        {
            curNode = nodeset->nodeTab[i];
            if(curNode != NULL)
            {
                szValue = xmlGetProp(curNode,BAD_CAST "attribute");
                if (szValue != NULL)
                {
                    qDebug() << "attribute = " << (char*)szValue;
                    xmlFree(szValue);
                }

                szValue = xmlNodeGetContent(curNode);
                if (szValue != NULL)
                {
                    qDebug() << "content = " << (char*)szValue;
                    xmlFree(szValue);
                }
            }
        }
        xmlXPathFreeObject (app_result);
    }
    xmlFreeDoc(doc);

    free(p1);
}

 

4、

5、

6、

 

以上是关于libxml2的xpath检索中文的主要内容,如果未能解决你的问题,请参考以下文章

[libxml2]_[XML处理]_[使用libxml2的xpath特性修改xml文件内容]

逐层检索和全局检索--xpath

Cannot open include file: 'libxml/xpath.h': No such file or directory

采用libxml2解析xml资源

在 xpath 中使用 [not] 检索与给定值不匹配的内容 (PLSQL)

xpath 通过ID和Class检索