需要解析一種類xml的數據文件,數據格式如下:
<head> //文件頭
<type>xtype</type>
<condition>
key1=value1
key2=value2
</condition>
<mea>
key3=value3
key4=value4
</mea>
<xxxx>//多個
...
</xxx>
</head>
<data> //數據域,多個
phi rcs ang
1 2 3
2 3 4
</data>
<data>
phi rcs ang
3 4 5
4 5 6
</data>
該數據格式類似xml,我們需要解析的是head中的所有標簽,及標簽中的鍵值對(key=value),並將data域中的數據保存成浮點型數組。
采用類似xml的解析方式,遞歸進行解析
具體代碼如下:
#include <string>
#include <map>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
//首先定義表示數據結的構體
typedef struct stMyDataLabel{
string Label;
vector<string> Content;//對於head域,content代表label中的內容(非鍵值對);對於data域,代表首行數據頭(字符串)
vector<float> Values;//對應data域中的數據;使用一維數組表示二維數組;二維數組的列數為data域中的Content個數;
map<string,string> KeyVal;//head域中解析後的鍵值對
vector<stMyDataLabel> SubItems;//子標簽;標簽會嵌套
stMyDataLabel(){
Label = "";
Content.clear();
Values.clear();
KeyVal.clear();
SubItems.clear();
}
}stMyDataLabel;
class MyDataParse{
public:
vector<stMyDataLabel> m_Data;//對應文件中的data域
stMyDataLabel m_Header;//對應文件的head域
char m_StartLabel;//標簽開始字符
char m_EndLabel;//標簽結束字符
public:
MyDataParse(){
m_StartLabel = '<';
m_EndLabel = '>';
}
void Init(){//初始化
for (vector<stMyDataLabel>::iterator itr = m_Data.begin();itr!=m_Data.end();itr++)
{
itr->Content.clear();
itr->Values.clear();
itr->KeyVal.clear();
itr->SubItems.clear();
}
m_Header.Content.clear();
m_Header.Values.clear();
m_Header.KeyVal.clear();
m_Header.SubItems.clear();
}
//打印解析後的結果
void Print(ostream& myout){
myout<<m_Header.Label<<endl;
for (vector<stMyDataLabel>::iterator itr = m_Header.SubItems.begin();
itr != m_Header.SubItems.end(); itr++)
{
myout<<"\t"<<itr->Label<<"\n";
for (vector<string>::iterator sitr = itr->Content.begin(); sitr != itr->Content.end(); sitr++)
{
myout<<"\t\t"<<*sitr<<"\t";
}
myout<<endl;
for (auto mitr = itr->KeyVal.begin(); mitr != itr->KeyVal.end(); mitr++)
{
myout<<"\t\t"<<mitr->first<<" = "<<mitr->second<<"\t";
}
myout<<endl;
}
for (vector<stMyDataLabel>::iterator ditr = m_Data.begin();ditr!=m_Data.end();ditr++)
{
myout<<ditr->Label<<endl;
for (vector<string>::iterator sitr = ditr->Content.begin(); sitr != ditr->Content.end(); sitr++)
{
myout<<"\t"<<*sitr<<"\t";
}
myout<<endl;
int dsize = ditr->Content.size();
int curdidx = 0;
for(vector<float>::iterator fitr = ditr->Values.begin(); fitr != ditr->Values.end(); fitr++){
myout<<"\t"<<*fitr<<"\t";
curdidx++;
if(curdidx >= dsize){
cout<<endl;
curdidx=0;
}
}
myout<<endl;
}
}
bool Parse(char* filename){//讀入文件,進行解析
if(filename == NULL) return false;
ifstream myin(filename,ios::in);
bool flag = ParseLabel(myin,m_Header);
if(!flag) return false;
while(!myin.eof()){//多個data域
stMyDataLabel data;
flag = ParseLabel(myin,data);
m_Data.push_back(data);
}
return flag;
}
bool ParseLabel(istream& myin,stMyDataLabel& label){//遞歸解析標簽及內部數據
string str;
bool flag = true;
do{
myin >> str;
int sidx=0,eidx=0;
sidx = str.find(m_StartLabel);
eidx = str.find(m_EndLabel);
if(sidx >= 0 && eidx >= 0 && eidx > sidx){//<lable> or </label>
if(str.at(sidx+1) != '/')//start of <label>
{
//string substr(int pos = 0,int n = npos) const;//返回pos開始的n個字符組組成的新字符串
bool isSub=false;
string lableName = str.substr(sidx+1,eidx-sidx-1);
if(label.Label != "") isSub = true;//start of subitem's <label>
//find </label>
int sidx2=0,eidx2=0;
sidx2 = str.rfind(m_StartLabel);
eidx2 = str.rfind(m_EndLabel);
if(eidx != eidx2 && str.at(sidx2+1) == '/')//<label>content</label>, has </label>
{
string strelab = str.substr(sidx2+2,eidx2-sidx2-2);
if(strelab == lableName){
if(isSub){
stMyDataLabel sublabel;
sublabel.Label = lableName;
sublabel.Content.clear();
sublabel.Content.push_back( str.substr(eidx+1,sidx2-eidx-1) );
label.SubItems.push_back(sublabel);
}
else{
label.Label = lableName;
label.Content.clear();
label.Content.push_back( str.substr(eidx+1,sidx2-eidx-1) );
}
continue;
}
else{
return false;
}
}
else{//處理<label>後換行的情況
if(isSub){
stMyDataLabel sublabel;
sublabel.Label = lableName;
label.SubItems.push_back(sublabel);
int curIdx = label.SubItems.size()-1;
bool bres = ParseLabel(myin,label.SubItems.at(curIdx));
if(!bres) return false;//subitem format error
else continue;//
}
else
label.Label = lableName;
}
}
else{// </lable>, 標簽結束
string elabel = str.substr(sidx+2,eidx-sidx-2);
if(elabel == label.Label){//end of this label
return true;
}
else{//format error
return false;
}
}
}
else{//content
if(label.Label == "data" || label.Label == "DATA"){//data block
// 判斷字符串是不是數字
stringstream sin(str);
float val;
if(!(sin >> val))//不是數字
label.Content.push_back(str);
else
label.Values.push_back(val);
}
else//header
{
int idx = str.find('=');
if(idx >= 0){//key=value
string strkey = str.substr(0,idx);
string strval = str.substr(idx+1);
label.KeyVal.insert(make_pair(strkey,strval));
}
else{
label.Content.push_back(str);
}
}
}
}while(flag);
}
};
測試(命令行方式)
MyDataParse parse;
parse.Init();
parse.Parse("test.txt");//數據文件
parse.Print(cout);