运行效果,直接访问站点题库:
源代码:
// hdu-spider-sql.cpp : Defines the entry point for the console application. // /* Author:Jungle Wei Create Date:2013-1-18 version: V001R001C00 抓取HDOJ题目信息,写入MySQL数据库,修复bug */ #include "common.h" struct curl_slist *headerlist=NULL; #define DEBUG_OFF 0 #define DEBUG_ON 1 ULONG g_debug_switch = DEBUG_OFF; #define DEBUG (g_debug_switch == DEBUG_ON)?(1):(0) void set_debug_switch(ULONG ds) { g_debug_switch = ds; } //此函数有风险,当参数含%d%23之类的字符串时会RE void MSG_OUPUT_DBG(const char *fmt, ...) { va_list ap; char buffer[4096]; time_t timep = time(NULL); int l; struct tm *p; if (DEBUG_OFF == DEBUG) { return; } p = localtime(&timep); p->tm_year = p->tm_year + 1900; p->tm_mon = p->tm_mon + 1; printf("%04d-%02d-%02d %02d:%02d:%02d ",p->tm_year, p->tm_mon, p->tm_mday,p->tm_hour,p->tm_min,p->tm_sec); va_start(ap, fmt); l = vsprintf(buffer, fmt, ap); printf("%s\n", buffer); va_end(ap); } ULONG getLanguageNameByID(ULONG id, UCHAR *ucLanguageName) { if (id < 0 || id >= sizeof(gaucLanguageName)/MAX_LANG_SIZE) { return BOOL_FALSE; } strcpy((char *)ucLanguageName, (char *)gaucLanguageName[id]); return BOOL_TRUE; } ULONG getLanguageIDByName(UCHAR *ucLanguageName, ULONG *id) { USHORT usLoop = 0; for (usLoop = 0; usLoop <= sizeof(gaucLanguageName)/MAX_LANG_SIZE; ++usLoop) { if (strcmp((CHAR*)ucLanguageName, (CHAR*)gaucLanguageName[usLoop]) == 0) { *id = usLoop; return BOOL_TRUE; } } return BOOL_FALSE; } bool isSpace(char c) { if(c==' '||c=='\n'||c=='\t') { return true; } return false; } char dec2hexChar(short int n) { if ( 0 <= n && n <= 9 ) return char( short('0') + n ); else if ( 10 <= n && n <= 15 )return char( short('A') + n - 10 ); else return char(0); } short int hexChar2dec(char c) { if ( '0'<=c && c<='9' ) return short(c-'0'); else if ( 'a'<=c && c<='f' ) return ( short(c-'a') + 10 ); else if ( 'A'<=c && c<='F' ) return ( short(c-'A') + 10 ); else return -1; } string escapeURL(const string &URL) { string result = ""; for ( unsigned int i=0; i<URL.size(); i++ ) { char c = URL[i]; if ( ( '0'<=c && c<='9' ) || ( 'a'<=c && c<='z' ) || ( 'A'<=c && c<='Z' ) || c=='/' || c=='.' ) result += c; else { int j = (short int)c; if ( j < 0 ) j += 256; int i1, i0; i1 = j / 16; i0 = j - i1*16; result += '%'; result += dec2hexChar(i1); result += dec2hexChar(i0); } } return result; } string deescapeURL(const string &URL) { string result = ""; for ( unsigned int i=0; i<URL.size(); i++ ) { char c = URL[i]; if ( c != '%' ) result += c; else { char c1 = URL[++i]; char c0 = URL[++i]; int num = 0; num += hexChar2dec(c1) * 16 + hexChar2dec(c0); result += char(num); } } return result; } string getAllFromFile(char *filename) { string res=""; FILE * fp=fopen(filename,"r"); while (fgets(tmps,1000000,fp)) res+=tmps; fclose(fp); return res; } size_t process_data(void *buffer, size_t size, size_t nmemb, void *user_p) { FILE *fp = (FILE *)user_p; size_t return_size = fwrite(buffer, size, nmemb, fp); //cout << (char *)buffer << endl; return return_size; } ULONG login() { FILE * fp=fopen(tfilename,"w+"); CURL *curl; CURLcode res; curl_global_init(CURL_GLOBAL_ALL); curl = curl_easy_init(); MSG_OUPUT_DBG("Do login..."); if(curl) { curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data); curl_easy_setopt(curl, CURLOPT_COOKIEJAR, "hdu.cookie"); curl_easy_setopt(curl, CURLOPT_URL, "http://acm.hdu.edu.cn/userloginex.php?action=login"); string post=(string)"username="+username+"&userpass="+password+"&login=Sign+In"; curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post.c_str()); res = curl_easy_perform(curl); curl_easy_cleanup(curl); } curl_global_cleanup(); fclose(fp); if (res) return BOOL_FALSE; string ts=getAllFromFile(tfilename); if (ts.find("No such user or wrong password.")!=string::npos) { MSG_OUPUT_DBG("Login failed."); return BOOL_FALSE; } return BOOL_TRUE; } ULONG getSubmitError(char *filename, string &res) { string ts; res = ""; FILE * fp=fopen(filename,"r"); int begin_ = 0; int end_ = 0; while (fgets(tmps,1000000,fp)) { ts=tmps; if (ts.find("<form id=\"submit\" name=\"submit\"")!=string::npos) { while (fgets(tmps,1000000,fp)) { ts=tmps; begin_ = ts.find("<span>"); if (begin_!=string::npos) { //cout<<"Sorry! FOUND SUBMIT_INFO"<<endl; end_ = ts.find("</span>"); if (end_ !=string::npos) { begin_ += 6; res = ts.substr(begin_,end_ - begin_); //cout<<res<<endl; fclose(fp); return BOOL_TRUE; } while (fgets(tmps,1000000,fp)) { ts=tmps; end_ = ts.find("</span>"); if (end_ !=string::npos) { begin_ += 6; res = ts.substr(begin_,end_ - begin_); //cout<<res<<endl; fclose(fp); return BOOL_TRUE; } else { res=res+ts; } } break; } } break; } } fclose(fp); return BOOL_FALSE; } ULONG submit(string pid, string lang, string source) { CURL *curl; CURLcode res; FILE * fp=fopen(tfilename,"w+"); if (NULL == fp) { MSG_OUPUT_DBG("Open %s failed...", tfilename); } curl_global_init(CURL_GLOBAL_ALL); curl = curl_easy_init(); headerlist=NULL; static const char buf[] = "Expect:"; headerlist = curl_slist_append(headerlist, buf); MSG_OUPUT_DBG("Do submit..."); MSG_OUPUT_DBG("Problem:%s, Language:%s\nSources:\n%s\n", pid.c_str(), lang.c_str(), source.c_str()); if (source.length() <= 50) { for (int i =0;i <= 50 - source.length() + 50; i++) { source += " \r\n"; } } if(curl) { curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data); curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie"); curl_easy_setopt(curl, CURLOPT_URL, "http://acm.hdu.edu.cn/submit.php?action=submit"); string post= (string)"check=0&problemid=" + pid + "&language=" + lang + "&usercode=" + escapeURL(source); curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post.c_str()); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerlist); res = curl_easy_perform(curl); curl_easy_cleanup(curl); } curl_slist_free_all (headerlist); curl_global_cleanup(); fclose(fp); if (res) { MSG_OUPUT_DBG("curl_easy_perform failed..."); return BOOL_FALSE; } string tss=getAllFromFile(tfilename); if (tss.find("Connect(0) to MySQL Server failed.")!=string::npos||tss.find("<b>One or more following ERROR(s) occurred.")!=string::npos||tss.find("<h2>The requested URL could not be retrieved</h2>")!=string::npos||tss.find("PHP: Maximum execution time of")!=string::npos) { MSG_OUPUT_DBG("One or more ERROR(s) occurred....."); return BOOL_FALSE; } MSG_OUPUT_DBG("Submit success..."); return BOOL_TRUE; } ULONG getResult(string s, string &res) { int pos=s.find("<font color="); if (-1 == pos) { return BOOL_FALSE; } while (s[pos]!='>') pos++; pos++; int st=pos; while (s[pos]!='<') pos++; res = s.substr(st,pos-st); return BOOL_TRUE; } ULONG getRunid(string s, string &res) { int pos=s.find("<td height=22px>"); if (-1 == pos) { return BOOL_FALSE; } while (s[pos]!='>') pos++; pos++; int st=pos; while (s[pos]!='<') pos++; res = s.substr(st,pos-st); return BOOL_TRUE; } string getCEinfo_brief(char *filename) { string res="",ts; FILE * fp=fopen(filename,"r"); while (fgets(tmps,1000000,fp)) { ts=tmps; if (ts.find("View Compilation Error")!=string::npos) { while (fgets(tmps,1000000,fp)) { ts=tmps; int pos = ts.find("<pre>"); if (pos !=string::npos) { res = ts.substr(pos + 5, ts.length() - pos - 5); while (fgets(tmps,1000000,fp)) { ts=tmps; if (ts.find("</pre>")!=string::npos) { MSG_OUPUT_DBG("FOUND CE_INFO"); break; } else { res=res+ts; } } break; } } break; } } fclose(fp); return res; } string getCEinfo(string runid) { FILE *fp = fopen(tfilename, "ab+"); CURL *curl; CURLcode res; curl_global_init(CURL_GLOBAL_ALL); curl = curl_easy_init(); if(curl) { curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L ); curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie"); string url=(string)"http://acm.hdu.edu.cn/viewerror.php?rid="+runid; //cout<<url; curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data); curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); res = curl_easy_perform(curl); curl_easy_cleanup(curl); } curl_global_cleanup(); fclose(fp); string info = getCEinfo_brief(tfilename); return info; } ULONG getUsedTime(string s, string &timeuse) { int pos=s.find("MS</td>"); if (-1 == pos) { return BOOL_FALSE; } int st=pos; while (s[pos]!='>') pos--; pos++; timeuse = s.substr(pos,st-pos); return BOOL_TRUE; } ULONG getUsedMem(string s, string &memuse) { int pos=s.find("K</td>"); if (-1 == pos) { return BOOL_FALSE; } int st=pos; while (s[pos]!='>') pos--; pos++; memuse = s.substr(pos,st-pos); return BOOL_TRUE; } string getLineFromFile(char *filename,int line) { string res=""; FILE * fp=fopen(filename,"r"); int cnt=0; while (fgets(tmps,10000000,fp)) { cnt++; res=tmps; if (res.find("<h1>Realtime Status</h1>")!=string::npos) { fgets(tmps,10000000,fp); res=res+tmps; fgets(tmps,10000000,fp); res=res+tmps; break; } } fclose(fp); return res; } ULONG getStatus(string username, string pid,string lang, string &runid, string &result,string& ce_info,string &tu,string &mu) { ULONG ulRet = BOOL_TRUE; tu=mu="0"; string ts; MSG_OUPUT_DBG("Do get status..."); CURL *curl; CURLcode res; curl_global_init(CURL_GLOBAL_ALL); curl = curl_easy_init(); if ( curl ) { FILE *fp = fopen(tfilename, "ab+"); curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L ); curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie"); char url[255] = {0}; sprintf(url, "http://acm.hdu.edu.cn/status.php?first=&pid=%s&user=%s&lang=&status=0", pid.c_str(), username.c_str()); //MSG_OUPUT_DBG(url); curl_easy_setopt( curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data); curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); res = curl_easy_perform( curl ); curl_easy_cleanup(curl); fclose(fp); } curl_global_cleanup(); ts = getLineFromFile(tfilename,77); if(BOOL_FALSE == getUsedTime(ts, tu)) { ++ulRet; MSG_OUPUT_DBG("getUsedTime failed."); } if(BOOL_FALSE == getUsedMem(ts, mu)) { ++ulRet; MSG_OUPUT_DBG("getUsedMem failed."); } if(BOOL_FALSE == getRunid(ts, runid)) { ++ulRet; MSG_OUPUT_DBG("getRunid failed."); } if(BOOL_FALSE == getResult(ts, result)) { ++ulRet; MSG_OUPUT_DBG("getResult failed."); } if (BOOL_TRUE != ulRet) { MSG_OUPUT_DBG("get record failed."); return BOOL_FALSE; } MSG_OUPUT_DBG("problem:%s, language:%s, verdict:%s, submissionID:%s, time:%s ms, memory:%s kb\r\n", pid.c_str(), lang.c_str(), result.c_str(), runid.c_str(), tu.c_str(), mu.c_str()); MSG_OUPUT_DBG("get status success..."); if (result.find("Compilation Error")!=string::npos) { //获取编译错误信息 string CE_Info = getCEinfo(runid); ce_info = CE_Info; //MSG_OUPUT_DBG(CE_Info.c_str()); } return BOOL_TRUE; } //////////////////////////////////////// //spider //////////////////////////////////////// #define PCRE_STATIC // 静态库编译选项 ULONG isNeed2HTML(ENUM_PROVLEM em) { switch (em) { case PROBLEM_TIME: case PROBLEM_MEMORY: case PROBLEM_TITLE: case PROBLEM_AUTHOR: return BOOL_FALSE; default: return BOOL_TRUE; } return BOOL_TRUE; } void InitMySqlConfig() { GetPrivateProfileString("MySQL","url","",Mysql_url,sizeof(Mysql_url),INI_filename); GetPrivateProfileString("MySQL","username","",Mysql_username,sizeof(Mysql_username),INI_filename); GetPrivateProfileString("MySQL","password","",Mysql_password,sizeof(Mysql_password),INI_filename); GetPrivateProfileString("MySQL","table","",Mysql_table,sizeof(Mysql_table),INI_filename); Mysql_port=GetPrivateProfileInt("MySQL","port",0,INI_filename); //cout<<"MySQL:"<<Mysql_url<<" "<<Mysql_username<<" "<<Mysql_password<<" "<<Mysql_table<<" "<<Mysql_port<<endl; } int InitMySQL() //初始化mysql,并设置字符集 { mysql=mysql_init((MYSQL*)0); if(mysql!=0 && !mysql_real_connect(mysql,Mysql_url, Mysql_username, Mysql_password, Mysql_table,Mysql_port,NULL,CLIENT_MULTI_STATEMENTS )){ MSG_OUPUT_DBG(mysql_error(mysql)); return 0; } strcpy(query,"SET CHARACTER SET gbk"); //设置编码 gbk int ret=mysql_real_query(mysql,query,(unsigned int)strlen(query)); if(ret){ MSG_OUPUT_DBG(mysql_error(mysql)); return 0; } return 1; } int StringToTimeEX(const string &strDateStr,time_t &timeData) { char *pBeginPos = (char*) strDateStr.c_str(); char *pPos = strstr(pBeginPos,"-"); if(pPos == NULL) { MSG_OUPUT_DBG("strDateStr[%s] err \n", strDateStr.c_str()); return -1; } int iYear = atoi(pBeginPos); int iMonth = atoi(pPos + 1); pPos = strstr(pPos + 1,"-"); if(pPos == NULL) { MSG_OUPUT_DBG("strDateStr[%s] err \n", strDateStr.c_str()); return -1; } int iDay = atoi(pPos + 1); int iHour=0; int iMin=0; int iSec=0; pPos = strstr(pPos + 1," "); //为了兼容有些没精确到时分秒的 if(pPos != NULL) { iHour=atoi(pPos + 1); pPos = strstr(pPos + 1,":"); if(pPos != NULL) { iMin=atoi(pPos + 1); pPos = strstr(pPos + 1,":"); if(pPos != NULL) { iSec=atoi(pPos + 1); } } } struct tm sourcedate; memset((void*)&sourcedate,0,sizeof(sourcedate)); sourcedate.tm_sec = iSec; sourcedate.tm_min = iMin; sourcedate.tm_hour = iHour; sourcedate.tm_mday = iDay; sourcedate.tm_mon = iMonth - 1; sourcedate.tm_year = iYear - 1900; timeData = mktime(&sourcedate); return 0; } int API_TimeToString(string &strDateStr,const time_t &timeData) { char chTmp[25]; memset(chTmp,0,sizeof(chTmp)); struct tm *p; p = localtime(&timeData); p->tm_year = p->tm_year + 1900; p->tm_mon = p->tm_mon + 1; sprintf(chTmp,"%04d-%02d-%02d %02d:%02d:%02d",p->tm_year, p->tm_mon, p->tm_mday,p->tm_hour,p->tm_min,p->tm_sec); strDateStr = chTmp; return 0; } string GetLocalTimeAsString(const char* format) { time_t t = time(NULL); struct tm *p; p = localtime(&t); char buf[1024]; strftime(buf, sizeof(buf), format, p); return buf; } string getCurrentTime() { time_t s_t; string time_string; time(&s_t); API_TimeToString(time_string,s_t); return time_string; } string& replace_all_distinct(string& str, const string& old_value, const string& new_value) { for(string::size_type pos(0); pos!=string::npos; pos+=new_value.length()) { if((pos=str.find(old_value,pos))!=string::npos) { str.replace(pos,old_value.length(),new_value); } else { break; } } return str; } void SQL_updateProblemInfo(string v_ojname, string v_pid) { string val_str=""; /* val_str = g_problem_string[0] + "," + g_problem_string[1]+ "," + "'" + g_problem_string[2] + "'" + "," + "'" + g_problem_string[3] + "'" + "," + "'" + g_problem_string[4] + "'" + "," + "'" + g_problem_string[5] + "'" + "," + "'" + g_problem_string[6] + "'" + "," + "'" + g_problem_string[7] + "'" + "," + "'" + g_problem_string[8] + "'" + "," + "'" + getCurrentTime() + "', 'N', 0,0,0,0,0,1, '" + v_ojname +"', " + v_pid + ""; */ MSG_OUPUT_DBG("In SQL_updateProblemInfo, (%s)", v_pid.c_str()); for(int i=0; i<PROBLEM_TAG_MAX; i++) { //char *end; //char *string_ = (char*)malloc(sizeof(char)*g_problem_string[i].length()+1); //strcpy(string_,g_problem_string[i].c_str()); /* end = string_; end += strlen(string_); //point sql tail //convert NUL(ASCII 0)、'\n'、'\r'、'\'’、'''、'"'和Control-Z and so on *end++ = '\''; end += mysql_real_escape_string(mysql, end, query, strlen(string_)); *end++ = '\"'; *end++ = ')'; cout<<string_<<endl; */ if (i == PROBLEM_TITLE) { replace_all_distinct(g_problem_string[i], "\"", " "); g_problem_string[i] = "HDU." + v_pid + " - " + g_problem_string[i]; } if (BOOL_TRUE == isNeed2HTML((ENUM_PROVLEM)i)) { replace_all_distinct(g_problem_string[i], "\"", """); replace_all_distinct(g_problem_string[i], "src=/data/images/", "src=http://acm.hdu.edu.cn/data/images/"); replace_all_distinct(g_problem_string[i], "src=../../data/images/", "src=http://acm.hdu.edu.cn/data/images/"); replace_all_distinct(g_problem_string[i], "\n", "<br>"); } //val_str += g_problem_string[i]; } val_str = g_problem_string[0] + "," + g_problem_string[1]+ "," + "\"" + g_problem_string[2] + "\"" + "," + "\"" + g_problem_string[3] + "\"" + "," + "\"" + g_problem_string[4] + "\"" + "," + "\"" + g_problem_string[5] + "\"" + "," + "\"" + g_problem_string[6] + "\"" + "," + "\"" + g_problem_string[7] + "\"" + "," + "\"" + g_problem_string[8] + "\"" + "," + "'" + getCurrentTime() + "', 'N', 0,0,0,0,0,0,1, '" + v_ojname +"', " + v_pid + ""; if (val_str.length() >= MAX_SIZE_BUF) { MSG_OUPUT_DBG("ERROR, too large size of buffer..."); return; } sprintf(query,"insert into problem(time_limit,memory_limit,title,description,input,output,sample_input,sample_output,author,create_date,defunct,spj,accepted,solved,submit,submit_user,contest_id,isvirtual,oj_name,oj_pid) values(%s);",val_str.c_str()); //MSG_OUPUT_DBG(query); int ret=mysql_real_query(mysql,query,(unsigned int)strlen(query)); if(ret) { //write_log(ERROR,mysql_error(mysql)); MSG_OUPUT_DBG(mysql_error(mysql)); return ; } MSG_OUPUT_DBG("End SQL_updateProblemInfo OK, (%s)", v_pid.c_str()); } ULONG checkStringExsit(char *filename, char *pattern) { pcre *re; const char *error; int erroffset; int ovector[OVECCOUNT]; int rc; string ts; FILE * fp=fopen(filename,"r"); while (fgets(tmps, MAX_SIZE_BUF, fp)) { ts +=tmps; } fclose(fp); //title re = pcre_compile(pattern, 0, &error, &erroffset, NULL); if (re == NULL) { //如果编译失败,返回错误信息 MSG_OUPUT_DBG("PCRE compilation failed at offset %d: %s\n", erroffset, error); return BOOL_FALSE; } rc = pcre_exec(re,NULL, ts.c_str(), strlen(ts.c_str()), 0, 0, ovector, OVECCOUNT); // 返回值:匹配成功返回非负数,没有匹配返回负数 if (rc < 0) { //如果没有匹配,返回错误信息 if (rc == PCRE_ERROR_NOMATCH) printf("Sorry, no match ...\n"); else MSG_OUPUT_DBG("Matching error %d\n", rc); pcre_free(re); return BOOL_FALSE; } pcre_free(re); return BOOL_TRUE; } ULONG getInfoByTag(char *src, char *pattern, ENUM_PROVLEM enProblem, char *res) { pcre *re; const char *error; int erroffset; int ovector[OVECCOUNT]; int rc, i; MSG_OUPUT_DBG("In getInfoByTag..."); //title re = pcre_compile(pattern, 0, &error, &erroffset, NULL); if (re == NULL) { //如果编译失败,返回错误信息 MSG_OUPUT_DBG("PCRE compilation failed at offset %d: %s\n", erroffset, error); return BOOL_FALSE; } rc = pcre_exec(re,NULL, src, strlen(src), 0, 0, ovector, OVECCOUNT); // 返回值:匹配成功返回非负数,没有匹配返回负数 if (rc < 0) { //如果没有匹配,返回错误信息 if (rc == PCRE_ERROR_NOMATCH) MSG_OUPUT_DBG("Sorry, no match ...\n"); else { MSG_OUPUT_DBG("Matching error %d\n", rc); g_problem_string[enProblem] = "Not Found"; } pcre_free(re); return BOOL_FALSE; } MSG_OUPUT_DBG("In getInfoByTag..."); i = (rc==0)?(0):(rc-1); printf("iiiiiiii=%d , rc=%d\n",i,rc); // for (i = 0; i < rc; i++) //分别取出捕获分组 $0整个正则公式 $1第一个() { char *substring_start = src + ovector[2*i]; int substring_length = ovector[2*i+1] - ovector[2*i]; MSG_OUPUT_DBG("In getInfoByTag 1 substring_length=%d...",substring_length); char *str_tmp = (char*)malloc(sizeof(char)*substring_length+100); // char str_tmp[MAX_SIZE_BUF] ={0}; MSG_OUPUT_DBG("In getInfoByTag 2..."); sprintf(str_tmp, "%.*s\n", substring_length, substring_start); MSG_OUPUT_DBG("In getInfoByTag 3..."); // printf("%s",str_tmp); //string string_ = str_tmp; MSG_OUPUT_DBG("In getInfoByTag 4...(length = %d)", strlen(str_tmp)); g_problem_string[enProblem].assign(str_tmp,strlen(str_tmp)); MSG_OUPUT_DBG("End getInfoByTag success..."); //MSG_OUPUT_DBG(pattern); //MSG_OUPUT_DBG(string_.c_str()); // free(substring_start); free(str_tmp); } pcre_free(re); return BOOL_TRUE; } int getProblemInfo_Brief(string pid) { ULONG ulRet = 0; int loop = 0; string res="",ts; FILE * fp=fopen(tfilename,"r"); while (fgets(tmps, MAX_SIZE_BUF, fp)) { ts +=tmps; } fclose(fp); char patternTime [] = "(\\d*) MS"; // 将要被编译的字符串形式的正则表达式 char patternMemory [] = "(\\d*) K"; // 将要被编译的字符串形式的正则表达式 char patternTitle [] = "<h1 style='color:#1A5CC8'>([\\s\\S]*?)</h1>"; // 将要被编译的字符串形式的正则表达式 char patternDescription [] = "Problem Description</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom> </div>"; // 将要被编译的字符串形式的正则表达式 char patternInput [] = "Input</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom> </div>"; // 将要被编译的字符串形式的正则表达式 char patternOutput [] = "Output</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom> </div>"; // 将要被编译的字符串形式的正则表达式 char patternSampleInput [] = "Sample Input</div><div class=panel_content><pre><div style=\"font-family:Courier New,Courier,monospace;\">([\\s\\S]*?)</div></pre></div><div class=panel_bottom> </div>"; // 将要被编译的字符串形式的正则表达式 char patternSampleOutput [] = "Sample Output</div><div class=panel_content><pre><div style=\"font-family:Courier New,Courier,monospace;\">([\\s\\S]*?)</div></pre></div><div class=panel_bottom> </div>"; // 将要被编译的字符串形式的正则表达式 char patternAuthor [] = "Author</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom> </div>"; // 将要被编译的字符串形式的正则表达式 //char patternTitle [] = "<h1 style='color:#1A5CC8'>([\\s\\S]*?)</h1>"; // 将要被编译的字符串形式的正则表达式 for (loop = 0; loop < PROBLEM_TAG_MAX; loop++) { g_problem_string[loop] = ""; } MSG_OUPUT_DBG("Start Problem %s ...", pid.c_str()); MSG_OUPUT_DBG("Time"); ulRet = getInfoByTag((char*)ts.c_str(), patternTime, PROBLEM_TIME ,NULL); if(ulRet == 0) { g_problem_string[0] = "1000"; } MSG_OUPUT_DBG("Memoty"); ulRet = getInfoByTag((char*)ts.c_str(), patternMemory, PROBLEM_MEMORY, NULL); if(ulRet == 0) { g_problem_string[1] = "65535"; } ulRet = 0; MSG_OUPUT_DBG("Title"); ulRet += getInfoByTag((char*)ts.c_str(), patternTitle, PROBLEM_TITLE,NULL); MSG_OUPUT_DBG("Description"); ulRet += getInfoByTag((char*)ts.c_str(), patternDescription, PROBLEM_DESCRIPTION, NULL); MSG_OUPUT_DBG("Input"); ulRet += getInfoByTag((char*)ts.c_str(), patternInput, PROBLEM_INPUT, NULL); MSG_OUPUT_DBG("Output"); ulRet += getInfoByTag((char*)ts.c_str(), patternOutput, PROBLEM_OUTPUT, NULL); MSG_OUPUT_DBG("Sample Input"); ulRet += getInfoByTag((char*)ts.c_str(), patternSampleInput, PROBLEM_SAMPLE_INPUT, NULL); MSG_OUPUT_DBG("Sample Output"); ulRet += getInfoByTag((char*)ts.c_str(), patternSampleOutput, PROBLEM_SAMPLE_OUTPUT, NULL); MSG_OUPUT_DBG("Author"); ulRet += getInfoByTag((char*)ts.c_str(), patternAuthor, PROBLEM_AUTHOR, NULL); if (ulRet != 0) { if (BOOL_TRUE == checkStringExsit(tfilename, "No such problem")) { MSG_OUPUT_DBG("No such problem %s", pid.c_str()); return 0; } } SQL_updateProblemInfo("HDU",pid); MSG_OUPUT_DBG("Get Problem %s OK.", pid.c_str()); return 0; } ULONG getProblemInfo(string pid) { CURL *curl; CURLcode res; curl = curl_easy_init(); if (access(tfilename, 0) == 0) { DeleteFile(tfilename); } if ( curl ) { FILE *fp = fopen(tfilename, "ab+"); curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L ); curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie"); char url[255] = {0}; sprintf(url, "http://acm.hdu.edu.cn/showproblem.php?pid=%s", pid.c_str()); //cout<<url; curl_easy_setopt( curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data); curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); res = curl_easy_perform( curl ); curl_easy_cleanup(curl); fclose(fp); } getProblemInfo_Brief(pid); return BOOL_TRUE; } ////////////////// // DLL exp ////////////////// extern "C" _declspec(dllexport)ULONG DLL_HDUDebugSwitch(ULONG st) { set_debug_switch(st); return BOOL_TRUE; } extern "C" _declspec(dllexport)ULONG DLL_HDU_SpiderInit(int pid) { if(InitMySQL()==0) //初始化mysql { printf("Init MySQL ERROR...\n"); return BOOL_FALSE; } if (access(tfilename, 0) == 0) { DeleteFile(tfilename); } return BOOL_TRUE; } extern "C" _declspec(dllexport)ULONG DLL_GetProblemInfoFromHDU(int pid) { char tmp[10]={0}; itoa(pid,tmp,10); string pid_s = tmp; if (BOOL_TRUE != getProblemInfo(pid_s)) { return BOOL_FALSE; } return BOOL_TRUE; } extern "C" _declspec(dllexport)ULONG DLL_HDULogin() { if (BOOL_TRUE != login()) { return BOOL_FALSE; } return BOOL_TRUE; } extern "C" _declspec(dllexport)ULONG DLL_HDUSubmit(int pid, int langid, string source) { char tmp[10]={0}; itoa(pid,tmp,10); string pid_s = tmp; char tmplang[10]={0}; itoa(langid,tmplang,10); string lang_string = tmplang; if (BOOL_TRUE != submit(pid_s, lang_string, source)) { return BOOL_FALSE; } return BOOL_TRUE; } extern "C" _declspec(dllexport)ULONG DLL_HDUGetStatus(string username, int pid, int langid, string &runid, string &result,string& ce_info,string &tu,string &mu) { char tmp[10]={0}; itoa(pid,tmp,10); string pid_s = tmp; //string runid,result,ce_info,tu, mu; char tmplang[10]={0}; itoa(langid,tmplang,10); string lang_string = tmplang; if (BOOL_TRUE != getStatus(username, pid_s, lang_string, runid, result, ce_info, tu, mu)) { MSG_OUPUT_DBG("DLL_HDUGetStatus getStatus error..."); return BOOL_FALSE; } else { MSG_OUPUT_DBG("DLL_HDUGetStatus getStatus success..."); } return BOOL_TRUE; } //////////////////////////////////////////////// ///////common.h头文件 ///////////////////////////////////////////////// #ifndef _COMMON_H_ #define _COMMON_H_ #include <io.h> #include <windows.h> #include <iostream> #include <cstdio> #include <string> #include <fstream> #include <stdio.h> #include "include\mysql.h" #include "curl\curl.h" #include "pcre.h" using namespace std; #pragma comment(lib,"ws2_32") #pragma comment(lib, "lib/curllib.lib") #pragma comment(lib, "lib/openldap.lib") #pragma comment(lib, "lib/ssleay32.lib") #pragma comment(lib, "lib/pcre.lib") #pragma comment(lib, "lib/libmysql.lib") #ifdef __cplusplus extern "C" { #endif #define MAX_SIZE_BUF 10000000 #define DEBUG_PRINT(X) X #define UCHAR unsigned char #define ULONG unsigned long #define CHAR char #define BOOL_TRUE 0 #define BOOL_FALSE 1 #define OVECCOUNT 30 /* should be a multiple of 3 */ #define MAX_LANG_SIZE 255 enum ENUM_PROVLEM { PROBLEM_TIME = 0, PROBLEM_MEMORY, PROBLEM_TITLE, PROBLEM_DESCRIPTION, PROBLEM_INPUT, PROBLEM_OUTPUT, PROBLEM_SAMPLE_INPUT, PROBLEM_SAMPLE_OUTPUT, PROBLEM_AUTHOR, PROBLEM_TAG_MAX }; string g_problem_string[PROBLEM_TAG_MAX]; MYSQL *mysql; //mysql连接 char query[MAX_SIZE_BUF]; //查询语句 const char INI_filename[]="data.ini"; char Mysql_url[255] = "localhost"; char Mysql_username[255] = "root"; char Mysql_password[255] = "password"; char Mysql_table[255] = "gdoj"; int Mysql_port = 3306; char Mysql_Character[255] = "gbk"; //编码 char tmps[MAX_SIZE_BUF]; char username[1000]="hdu_username"; char password[1000]="hdu_password"; char tfilename[1000]="tmpfile.txt"; char domain[255]="acm.hdu.edu.cn"; //acm.guet.edu.cn /* hdu language list */ UCHAR gaucLanguageName[][MAX_LANG_SIZE] = { "G++", "GCC", "C++", "C", "Pascal", "Java" }; //spider extern "C" _declspec(dllexport)ULONG DLL_HDUDebugSwitch(ULONG status); extern "C" _declspec(dllexport)ULONG DLL_GetProblemInfoFromHDU(int pid); extern "C" _declspec(dllexport)ULONG DLL_HDU_SpiderInit(int pid); //judger extern "C" _declspec(dllexport)ULONG DLL_HDULogin(); extern "C" _declspec(dllexport)ULONG DLL_HDUSubmit(int pid, int langid, string source); extern "C" _declspec(dllexport)ULONG DLL_HDUGetStatus(string username, int pid, int langid, string &runid, string &result,string& ce_info,string &tu,string &mu); #ifdef __cplusplus } /* extern "C" */ #endif #endif /* End of common.h */
他们网站的管理员知道你抓题么=。=