- UID
- 2
- 精华
- 积分
- 7770
- 威望
- 点
- 宅币
- 个
- 贡献
- 次
- 宅之契约
- 份
- 最后登录
- 1970-1-1
- 在线时间
- 小时
|
楼主 |
发表于 2014-7-26 01:47:59
|
显示全部楼层
下面是这个问题的回溯解法,这种方式适合文本按长度排列的情况:
- // hashcalc.cpp : Defines the entry point for the console application.
- //
- #include <iostream>
- #include <fstream>
- #include <string>
- #include <vector>
- #include <stdlib.h>
- #include <time.h>
- using namespace std;
- typedef unsigned char ubyte;
- vector<string> apinamearray;//存储所有api字符串
- bool flag[0x10000];//hash表
- int index[0x10000];//用于存储已经占用hash表的api在apinamearray的索引
- bool valid(ubyte* coef,int curdepth)
- {
- memset(flag,0,sizeof(flag));
- vector<string>::const_iterator itor=apinamearray.begin();
- int indx=0;
- while(itor != apinamearray.end() && curdepth+1 >= (*itor).size())
- {
- int sum=0;
- //核心算法
- int size=(*itor).size();
- for(int i=0;i<size;i++)
- {
- sum += (*itor).at(i)*coef[i];
- sum &= 0xFFFF;
- }
- if(flag[sum])
- {
- string other=apinamearray.at(index[sum]);
- // printf("%s duplicate with %s\n",(*itor).c_str(),other.c_str());
- return false;
- }
- else
- {
- flag[sum]=true;
- index[sum]=indx;
- }
- itor++;
- indx++;
- }
- return true;
- }
- void showhash(ubyte* coef,int maxsize)
- {
- vector<string>::const_iterator itor=apinamearray.begin();
- while(itor != apinamearray.end())
- {
- int sum=0;
- //核心算法
- int size=(*itor).size();
- for(int i=0;i<size;i++)
- {
- sum += (*itor).at(i)*coef[i];
- sum &= 0xFFFF;
- }
- printf("%s->%d ",(*itor).c_str(),sum);
- itor++;
- }
- }
- int main(int argc, char* argv[])
- {
- int maxcoef=64;//参数界限
- string filename;//存储api字符串的txt
- int minsize=INT_MAX;//获取最短api长度
- int maxsize=0;//获取最长api长度
- //下面变量用于定位当前搜索进程
- int curmaxdepth=0;//搜索到的最大深度
- time_t begin=time(NULL);
- cout<<"input filename"<<endl;
- cin>>filename;
- ifstream file(filename.c_str());
- string temp;
- while(getline(file,temp))
- {
- if(temp.size() > maxsize)
- maxsize=temp.size();//寻找最长api以设置hash表达式系数数组长度
- if(temp.size() < minsize)
- minsize=temp.size();//寻找最短api长度
- apinamearray.push_back(temp);
- }
- ubyte* coef=new ubyte[maxsize+1];//使coef元素不会超过65535
- for(int i=0;i<maxsize;i++)
- {
- coef[i]=0;//初始化系数
- }
- int layer=0;
- while(layer >= 0)
- {
- if(curmaxdepth<layer)
- {
- curmaxdepth=layer;
- printf("max:%d:%d\n",layer,maxsize);
- }
- else if(layer < 2)
- {
- printf("%d:%d/%d\n",layer,coef[layer],maxcoef);
- }
- while(coef[layer]<maxcoef && !valid(coef,layer))
- {
- coef[layer]++;
- }
- if(coef[layer]<maxcoef)
- {
- if(layer == maxsize)
- {
- //显示当前系数矩阵
- for(i=0;i<maxsize;i++)
- {
- if(i%16 == 0)
- printf("\n");
- printf("%d ",coef[i]);
- showhash(coef,maxsize);
- }
- printf("\n");
- layer--;
- coef[layer]++;
- }
- else
- {
- layer++;
- coef[layer]=0;
- }
- }
- else
- {
- layer--;
- if(layer>=0)
- coef[layer]++;
- }
- }
- printf("\ntime used:%d\n",time(NULL)-begin);
-
- delete []coef;
-
- return 0;
- }
复制代码 |
|