2013-09-14 15:34:16
用后缀数组求一个字符串中重复出现的最长的子串。
- 用C++中的string类可以很方便地进行操作,需将后缀数组保存在vector<string>,如下面代码中的string版本所示,但这样就会因为<string>有很大的开销;
- 直接用字符指针指向后缀字符串的首地址,可以节省很大的空间,如下面代码中的char *版本所示.
- 注意使用char *版本时,用qsort函数最后缀字符串数组排序,需要提供comp函数,该函数的写法如下:
1 int pStrcmp(const void *p,const void *q)2 {3 return ( strcmp( *(char **)p , *(char **)q ) );4 }
更多关于该函数的说明,详见博文。
代码(string版本):
1 #include2 #include 3 #include //用string类模板,必须包含该头文件 4 #include //用vector模板,必须包含该头文件 5 #include //用sort函数,必须包含该头文件 6 using namespace std; 7 8 //获取两个字符串的最长公共子串 9 size_t GetLCS(const string &str1,const string &str2) 10 { 11 size_t len1 = str1.length(); 12 size_t len2 = str2.length(); 13 14 size_t end = len1 < len2 ? len1 : len2; 15 16 size_t index = 0; 17 size_t commenLen = 0; 18 19 for (index = 0;index < end;++index) 20 { 21 if (str1.at(index) == str2.at(index)) 22 { 23 ++commenLen; 24 } 25 else 26 { 27 break; 28 } 29 } 30 31 return commenLen; 32 } 33 34 //获取字符串中重复出现且最长的子串 35 size_t FindLongeStringApearTwice(const string &srcStr,string &subStr) 36 { 37 vector vecStr; 38 39 size_t index; 40 size_t end = srcStr.length(); 41 42 string tmpStr; 43 size_t tmpLen = 0; 44 size_t maxLen = 0; 45 46 for ( index = 0;index < end;++index) //生成后缀数组 47 { 48 tmpStr = srcStr.substr(index,(end - 1 - index)); 49 vecStr.push_back(tmpStr); 50 //tmpStr.clear(); //此处的清楚是不必要的,可以删去 51 } 52 53 sort(vecStr.begin(),vecStr.end()); //对后缀字符串数组排序 54 55 vector ::iterator iter; 56 57 for (iter = vecStr.begin();(iter + 1) != vecStr.end();++iter) //求相邻string的LCS 58 { 59 tmpLen = GetLCS(*iter,*(iter + 1)); 60 61 if(tmpLen > maxLen) 62 { 63 maxLen = tmpLen; 64 subStr = (*iter).substr(0,maxLen); 65 } 66 } 67 68 return maxLen; 69 } 70 71 72 73 74 //测试FindLongeStringApearTwice 75 void TestDriver() 76 { 77 string strArray[] = { "0123456","yyabcdabjcabceg","abcbcbcabc","hello,li mei! hello,li lei!"}; 78 size_t arrayLength = 4; 79 80 string srcStr; 81 string subStr; 82 size_t maxLen = 0; 83 84 for (size_t index = 0;index < arrayLength;++index) 85 { 86 //srcStr.clear(); //此处的清楚是不必要的,可以删去 87 srcStr = strArray[index]; 88 maxLen = FindLongeStringApearTwice(srcStr,subStr); 89 90 cout<<"the source string is : "< <
1 #include2 #include 3 #include 4 #include 5 #include 6 using namespace std; 7 8 size_t GetLCS(const char *str1,const char *str2) 9 { 10 assert(str1 != NULL && str2 != NULL); 11 12 size_t len1 = strlen(str1); 13 size_t len2 = strlen(str2); 14 15 size_t end = len1 < len2 ? len1 : len2; 16 17 size_t index = 0; 18 size_t commenLen = 0; 19 20 for (index = 0;index < end;++index) 21 { 22 if ( *(str1 + index) == *(str2 + index) ) 23 { 24 ++commenLen; 25 } 26 else 27 { 28 break; 29 } 30 } 31 32 return commenLen; 33 } 34 35 typedef char * pCHAR; 36 37 int pStrcmp(const void *p,const void *q) 38 { 39 return ( strcmp( *(char **)p , *(char **)q ) ); 40 } 41 42 void DisplayString(const char *pStr) 43 { 44 size_t index = 0; 45 46 while (*(pStr + index)) 47 { 48 cout<<*(pStr + index); 49 ++index; 50 } 51 cout< < end;++index) 68 { 69 pSuffixArray[index] = (char *)pSrcStr + index; 70 //DisplayString(pSuffixArray[index]); 71 } 72 73 qsort(pSuffixArray,lenOfSrcStr,sizeof(pCHAR),pStrcmp); 74 /* 75 for ( index = 0;index < end;++index ) 76 { 77 DisplayString(pSuffixArray[index]); 78 }*/ 79 80 for (index = 0;index + 1 < end;++index) 81 { 82 tmpLen = GetLCS(pSuffixArray[index],pSuffixArray[index + 1]); 83 84 if(tmpLen > maxLen) 85 { 86 maxLen = tmpLen; 87 pSubStr = pSuffixArray[index]; 88 } 89 } 90 91 return maxLen; 92 } 93 94 95 void TestDriver() 96 { 97 pCHAR strArray[] = { "0123456","yyabcdabjcabceg","abcbcbcabc","hello,li mei! hello,li lei!"}; 98 size_t arrayLength = 4; 99 100 pCHAR srcStr;101 pCHAR subStr;102 size_t maxLen = 0;103 104 for (size_t index = 0;index < arrayLength;++index)105 {106 srcStr = strArray[index];107 maxLen = FindLongestStringApearTwice(srcStr,subStr);108 109 cout<<"the source string is : "< < < maxLen;++i)113 {114 cout<<*(subStr + i);115 }116 cout<
测试结果:
the source string is : 0123456the longest sub string is :the max length is : 0the source string is : yyabcdabjcabcegthe longest sub string is : abcthe max length is : 3the source string is : abcbcbcabcthe longest sub string is : bcbcthe max length is : 4the source string is : hello,li mei! hello,li lei!the longest sub string is : hello,lithe max length is : 9请按任意键继续. . .