【GUET-Virtual Judge】——HDOJ Virtual Judge
ACSolo  post at 12 years ago 2.4k 1 0

运行效果,直接访问站点题库:

源代码:

// hdu-spider-sql.cpp : Defines the entry point for the console application.

//
/*
Author:Jungle Wei
Create Date:2013-1-18
version: V001R001C00
抓取HDOJ题目信息,写入MySQL数据库,修复bug

*/
#include "common.h"

struct curl_slist *headerlist=NULL;

#define DEBUG_OFF 0
#define DEBUG_ON 1

ULONG g_debug_switch = DEBUG_OFF;

#define DEBUG (g_debug_switch == DEBUG_ON)?(1):(0)

void set_debug_switch(ULONG ds)
{
g_debug_switch = ds;
}

//此函数有风险,当参数含%d%23之类的字符串时会RE
void MSG_OUPUT_DBG(const char *fmt, ...)
{
va_list ap;
char buffer[4096];
time_t  timep = time(NULL);
int l;
struct tm *p;

if (DEBUG_OFF == DEBUG)
{
return;
}

    p = localtime(&timep);
    p->tm_year = p->tm_year + 1900;
    p->tm_mon = p->tm_mon + 1;
printf("%04d-%02d-%02d %02d:%02d:%02d ",p->tm_year, p->tm_mon, p->tm_mday,p->tm_hour,p->tm_min,p->tm_sec);
va_start(ap, fmt);
l = vsprintf(buffer, fmt, ap);

printf("%s\n", buffer);
va_end(ap);
}

ULONG getLanguageNameByID(ULONG id, UCHAR *ucLanguageName)
{
if (id < 0 || id >= sizeof(gaucLanguageName)/MAX_LANG_SIZE)
{
return BOOL_FALSE;
}

strcpy((char *)ucLanguageName, (char *)gaucLanguageName[id]);
return BOOL_TRUE;
}

ULONG getLanguageIDByName(UCHAR *ucLanguageName, ULONG *id)
{
USHORT usLoop = 0;

for (usLoop = 0; usLoop <= sizeof(gaucLanguageName)/MAX_LANG_SIZE; ++usLoop)
{
if (strcmp((CHAR*)ucLanguageName, (CHAR*)gaucLanguageName[usLoop]) == 0)
{
*id = usLoop;
return BOOL_TRUE;
}
}
return BOOL_FALSE;
}

bool isSpace(char c)
{
if(c==' '||c=='\n'||c=='\t')
{
return true;
}
return false;
}


char dec2hexChar(short int n)
{
    if ( 0 <= n && n <= 9 ) return char( short('0') + n );
    else if ( 10 <= n && n <= 15 )return char( short('A') + n - 10 );
    else return char(0);
}
short int hexChar2dec(char c)
{
    if ( '0'<=c && c<='9' ) return short(c-'0');
    else if ( 'a'<=c && c<='f' ) return ( short(c-'a') + 10 );
    else if ( 'A'<=c && c<='F' ) return ( short(c-'A') + 10 );
    else return -1;
}

string escapeURL(const string &URL)
{
    string result = "";
    for ( unsigned int i=0; i<URL.size(); i++ )
    {
        char c = URL[i];
        if (
            ( '0'<=c && c<='9' ) ||
            ( 'a'<=c && c<='z' ) ||
            ( 'A'<=c && c<='Z' ) ||
            c=='/' || c=='.'
) result += c;
        else {
            int j = (short int)c;
            if ( j < 0 ) j += 256;
            int i1, i0;
            i1 = j / 16;
            i0 = j - i1*16;
            result += '%';
            result += dec2hexChar(i1);
            result += dec2hexChar(i0);
        }
    }
    return result;
}

string deescapeURL(const string &URL)
{
    string result = "";
    for ( unsigned int i=0; i<URL.size(); i++ )
    {
        char c = URL[i];
        if ( c != '%' ) result += c;
        else {
            char c1 = URL[++i];
            char c0 = URL[++i];
            int num = 0;
            num += hexChar2dec(c1) * 16 + hexChar2dec(c0);
            result += char(num);
        }
    }
    return result;
}


string getAllFromFile(char *filename)
{
    string res="";
    FILE * fp=fopen(filename,"r");
    while (fgets(tmps,1000000,fp)) res+=tmps;
    fclose(fp);
    return res;
}

size_t process_data(void *buffer, size_t size, size_t nmemb, void *user_p)
{
FILE *fp = (FILE *)user_p;
size_t return_size = fwrite(buffer, size, nmemb, fp);
//cout << (char *)buffer << endl;
return return_size;
}


ULONG login()
{
    FILE * fp=fopen(tfilename,"w+");
CURL *curl;
CURLcode res;

curl_global_init(CURL_GLOBAL_ALL);
    curl = curl_easy_init();

MSG_OUPUT_DBG("Do login...");
    
if(curl)
{
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data);
        curl_easy_setopt(curl, CURLOPT_COOKIEJAR, "hdu.cookie");
        curl_easy_setopt(curl, CURLOPT_URL, "http://acm.hdu.edu.cn/userloginex.php?action=login");
        string post=(string)"username="+username+"&userpass="+password+"&login=Sign+In";
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post.c_str());
res = curl_easy_perform(curl);

curl_easy_cleanup(curl);
    }
curl_global_cleanup();
    
fclose(fp);

    if (res) return BOOL_FALSE;

    string ts=getAllFromFile(tfilename);
    if (ts.find("No such user or wrong password.")!=string::npos)
{
MSG_OUPUT_DBG("Login failed.");
return BOOL_FALSE;
}
    return BOOL_TRUE;
}

ULONG getSubmitError(char *filename, string &res)
{
string ts;
res = "";
    FILE * fp=fopen(filename,"r");
int begin_ = 0;
int end_ = 0;
    while (fgets(tmps,1000000,fp))
    {
        ts=tmps;
        if (ts.find("<form id=\"submit\" name=\"submit\"")!=string::npos)
        {
            while (fgets(tmps,1000000,fp))
{
                ts=tmps;
begin_ = ts.find("<span>");
                if (begin_!=string::npos)
{
//cout<<"Sorry! FOUND SUBMIT_INFO"<<endl;
end_ = ts.find("</span>");
if (end_ !=string::npos)
{
begin_ += 6;
res = ts.substr(begin_,end_ - begin_);
//cout<<res<<endl;
fclose(fp);
return BOOL_TRUE;
}
while (fgets(tmps,1000000,fp))
{
ts=tmps;
end_ = ts.find("</span>");
if (end_ !=string::npos)
{
begin_ += 6;
res = ts.substr(begin_,end_ - begin_);
//cout<<res<<endl;
fclose(fp);
return BOOL_TRUE;
}
else
{
res=res+ts;
}
}
break;
}
}
            break;
        }
    }
    fclose(fp);
    return BOOL_FALSE;
}

ULONG submit(string pid, string lang, string source)
{
CURL *curl;
CURLcode res;
FILE * fp=fopen(tfilename,"w+");
if (NULL == fp)
{
MSG_OUPUT_DBG("Open %s failed...", tfilename);
}

curl_global_init(CURL_GLOBAL_ALL);
    curl = curl_easy_init();

headerlist=NULL;
static const char buf[] = "Expect:";
headerlist = curl_slist_append(headerlist, buf);

MSG_OUPUT_DBG("Do submit...");
MSG_OUPUT_DBG("Problem:%s, Language:%s\nSources:\n%s\n", pid.c_str(), lang.c_str(), source.c_str());

if (source.length() <= 50)
{
for (int i =0;i <= 50 - source.length() + 50; i++)
{
source += " \r\n";
}
}

    if(curl)
{
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data);
        curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie");
        curl_easy_setopt(curl, CURLOPT_URL, "http://acm.hdu.edu.cn/submit.php?action=submit");
string post= (string)"check=0&problemid=" + pid + "&language=" + lang + "&usercode=" + escapeURL(source);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerlist);

        res = curl_easy_perform(curl);
        curl_easy_cleanup(curl);
    }
curl_slist_free_all (headerlist);
curl_global_cleanup();
    
fclose(fp);
    
if (res)
{
MSG_OUPUT_DBG("curl_easy_perform failed...");
return BOOL_FALSE;
}

    string tss=getAllFromFile(tfilename);
    if (tss.find("Connect(0) to MySQL Server failed.")!=string::npos||tss.find("<b>One or more following ERROR(s) occurred.")!=string::npos||tss.find("<h2>The requested URL could not be retrieved</h2>")!=string::npos||tss.find("PHP: Maximum execution time of")!=string::npos) 
{
MSG_OUPUT_DBG("One or more ERROR(s) occurred.....");
return BOOL_FALSE;
}

MSG_OUPUT_DBG("Submit success...");
    return BOOL_TRUE;
}

ULONG getResult(string s, string &res)
{
    int pos=s.find("<font color=");
if (-1 == pos)
{
return BOOL_FALSE;
}

    while (s[pos]!='>') pos++;
    pos++;

    int st=pos;
    while (s[pos]!='<') pos++;
    res = s.substr(st,pos-st);

return BOOL_TRUE;
}

ULONG getRunid(string s, string &res) {
    int pos=s.find("<td height=22px>");
if (-1 == pos)
{
return BOOL_FALSE;
}

    while (s[pos]!='>') pos++;
    pos++;

    int st=pos;
    while (s[pos]!='<') pos++;

    res = s.substr(st,pos-st);
return BOOL_TRUE;
}


string getCEinfo_brief(char *filename)
{
string res="",ts;
    FILE * fp=fopen(filename,"r");
    while (fgets(tmps,1000000,fp))
    {
        ts=tmps;
        if (ts.find("View Compilation Error")!=string::npos)
        {
            while (fgets(tmps,1000000,fp))
{
                ts=tmps;
int pos = ts.find("<pre>");
                if (pos !=string::npos)
{
res = ts.substr(pos + 5, ts.length() - pos - 5);

while (fgets(tmps,1000000,fp))
{
ts=tmps;
if (ts.find("</pre>")!=string::npos)
{
MSG_OUPUT_DBG("FOUND CE_INFO");
break;
}
else
{
res=res+ts;
}
}
break;
}
}
            break;
        }
    }
    fclose(fp);
    return res;
}

string getCEinfo(string runid)
{
FILE *fp = fopen(tfilename, "ab+");
CURL *curl;
CURLcode res;

curl_global_init(CURL_GLOBAL_ALL);
    curl = curl_easy_init();
    if(curl)
    {
curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L );
curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie");
        string url=(string)"http://acm.hdu.edu.cn/viewerror.php?rid="+runid;
        //cout<<url;
        curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
        res = curl_easy_perform(curl);
        curl_easy_cleanup(curl);
    }

curl_global_cleanup();

    fclose(fp);
    string info = getCEinfo_brief(tfilename);
    return info;
}

ULONG getUsedTime(string s, string &timeuse)
{
    int pos=s.find("MS</td>");
if (-1 == pos)
{
return BOOL_FALSE;
}

    int st=pos;
    while (s[pos]!='>') pos--;
    pos++;

    timeuse =  s.substr(pos,st-pos);
return BOOL_TRUE;
}

ULONG getUsedMem(string s, string &memuse)
{
int pos=s.find("K</td>");
if (-1 == pos)
{
return BOOL_FALSE;
}

int st=pos;
while (s[pos]!='>') pos--;
pos++;
memuse = s.substr(pos,st-pos);
return BOOL_TRUE;
}

string getLineFromFile(char *filename,int line)
{
    string res="";
    FILE * fp=fopen(filename,"r");
    int cnt=0;
    while (fgets(tmps,10000000,fp))
{
        cnt++;
        res=tmps;
        if (res.find("<h1>Realtime Status</h1>")!=string::npos)
{
            fgets(tmps,10000000,fp);
            res=res+tmps;
            fgets(tmps,10000000,fp);
            res=res+tmps;
            break;
        }
    }
    fclose(fp);
    return res;
}

ULONG getStatus(string username, string pid,string lang, string &runid, string &result,string& ce_info,string &tu,string &mu)
{
    ULONG ulRet = BOOL_TRUE;
    tu=mu="0";
    string ts;

MSG_OUPUT_DBG("Do get status...");

CURL *curl;
CURLcode res;
    curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();

    if ( curl ) 
{
FILE *fp = fopen(tfilename, "ab+");
curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L );
curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie");
char url[255] = {0};
sprintf(url, "http://acm.hdu.edu.cn/status.php?first=&pid=%s&user=%s&lang=&status=0", pid.c_str(), username.c_str());
//MSG_OUPUT_DBG(url);
curl_easy_setopt( curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);

res = curl_easy_perform( curl );
curl_easy_cleanup(curl);

fclose(fp);
}

curl_global_cleanup();

ts = getLineFromFile(tfilename,77);
if(BOOL_FALSE == getUsedTime(ts, tu))
{
++ulRet;
MSG_OUPUT_DBG("getUsedTime failed.");
}

    if(BOOL_FALSE == getUsedMem(ts, mu))
{
++ulRet;
MSG_OUPUT_DBG("getUsedMem failed.");
}

if(BOOL_FALSE == getRunid(ts, runid))
{
++ulRet;
MSG_OUPUT_DBG("getRunid failed.");
}

if(BOOL_FALSE == getResult(ts, result))
{
++ulRet;
MSG_OUPUT_DBG("getResult failed.");
}

if (BOOL_TRUE != ulRet)
{
MSG_OUPUT_DBG("get record failed.");
return BOOL_FALSE;
}

MSG_OUPUT_DBG("problem:%s, language:%s, verdict:%s, submissionID:%s, time:%s ms, memory:%s kb\r\n", pid.c_str(), lang.c_str(), result.c_str(), runid.c_str(), tu.c_str(), mu.c_str());

MSG_OUPUT_DBG("get status success...");

if (result.find("Compilation Error")!=string::npos)
{
//获取编译错误信息
string CE_Info = getCEinfo(runid);
ce_info = CE_Info;
//MSG_OUPUT_DBG(CE_Info.c_str());
}

    return BOOL_TRUE;
}

////////////////////////////////////////
//spider
////////////////////////////////////////

#define PCRE_STATIC // 静态库编译选项

ULONG isNeed2HTML(ENUM_PROVLEM em)
{
switch (em)
{
case PROBLEM_TIME:
case PROBLEM_MEMORY:
case PROBLEM_TITLE:
case PROBLEM_AUTHOR:
return BOOL_FALSE;
default:
return BOOL_TRUE;
}
return BOOL_TRUE;
}

void InitMySqlConfig()
{
GetPrivateProfileString("MySQL","url","",Mysql_url,sizeof(Mysql_url),INI_filename);
GetPrivateProfileString("MySQL","username","",Mysql_username,sizeof(Mysql_username),INI_filename);
GetPrivateProfileString("MySQL","password","",Mysql_password,sizeof(Mysql_password),INI_filename);
GetPrivateProfileString("MySQL","table","",Mysql_table,sizeof(Mysql_table),INI_filename);
Mysql_port=GetPrivateProfileInt("MySQL","port",0,INI_filename);
//cout<<"MySQL:"<<Mysql_url<<" "<<Mysql_username<<" "<<Mysql_password<<" "<<Mysql_table<<" "<<Mysql_port<<endl;
}

int InitMySQL()   //初始化mysql,并设置字符集
{ 
mysql=mysql_init((MYSQL*)0); 
if(mysql!=0 && !mysql_real_connect(mysql,Mysql_url, Mysql_username, Mysql_password, Mysql_table,Mysql_port,NULL,CLIENT_MULTI_STATEMENTS )){
MSG_OUPUT_DBG(mysql_error(mysql));
return 0;  
}  
strcpy(query,"SET CHARACTER SET gbk"); //设置编码 gbk
int ret=mysql_real_query(mysql,query,(unsigned int)strlen(query));  
if(ret){  
MSG_OUPUT_DBG(mysql_error(mysql));
return 0;
}
return 1;
}


int StringToTimeEX(const string &strDateStr,time_t &timeData)
{
    char *pBeginPos = (char*) strDateStr.c_str();
    char *pPos = strstr(pBeginPos,"-");
    if(pPos == NULL)
    {
        MSG_OUPUT_DBG("strDateStr[%s] err \n", strDateStr.c_str());
        return -1;
    }
    int iYear = atoi(pBeginPos);
    int iMonth = atoi(pPos + 1);
    pPos = strstr(pPos + 1,"-");
    if(pPos == NULL)
    {
        MSG_OUPUT_DBG("strDateStr[%s] err \n", strDateStr.c_str());
        return -1;
    }
    int iDay = atoi(pPos + 1);
    int iHour=0;
    int iMin=0;
    int iSec=0;
    pPos = strstr(pPos + 1," ");
    //为了兼容有些没精确到时分秒的
    if(pPos != NULL)
    {
        iHour=atoi(pPos + 1);
        pPos = strstr(pPos + 1,":");
        if(pPos != NULL)
        {
            iMin=atoi(pPos + 1);
            pPos = strstr(pPos + 1,":");
            if(pPos != NULL)
            {
                iSec=atoi(pPos + 1);
            }
        }
    }
    struct tm sourcedate;
    memset((void*)&sourcedate,0,sizeof(sourcedate));
    sourcedate.tm_sec = iSec;
    sourcedate.tm_min = iMin; 
    sourcedate.tm_hour = iHour;
    sourcedate.tm_mday = iDay;
    sourcedate.tm_mon = iMonth - 1; 
    sourcedate.tm_year = iYear - 1900;
    timeData = mktime(&sourcedate);  
    return 0;
}
int API_TimeToString(string &strDateStr,const time_t &timeData)
{
    char chTmp[25];
    memset(chTmp,0,sizeof(chTmp));
    struct tm *p;
    p = localtime(&timeData);
    p->tm_year = p->tm_year + 1900;
    p->tm_mon = p->tm_mon + 1;
sprintf(chTmp,"%04d-%02d-%02d %02d:%02d:%02d",p->tm_year, p->tm_mon, p->tm_mday,p->tm_hour,p->tm_min,p->tm_sec);
    strDateStr = chTmp;
    return 0;
}

string GetLocalTimeAsString(const char* format)
{
time_t t = time(NULL);
struct tm *p;
p = localtime(&t);
char buf[1024];
strftime(buf, sizeof(buf), format, p);
return buf;
}

string getCurrentTime()
{
time_t s_t;
string time_string; 
    
time(&s_t);
API_TimeToString(time_string,s_t);

return time_string;
}

string&  replace_all_distinct(string& str, const string& old_value, const string& new_value)   
{   
for(string::size_type pos(0); pos!=string::npos; pos+=new_value.length())
{   
if((pos=str.find(old_value,pos))!=string::npos)   
{
str.replace(pos,old_value.length(),new_value);   
}
else
{
break;
}
}   

return str;   
} 

void SQL_updateProblemInfo(string v_ojname, string v_pid)
{
string val_str="";
/*
val_str = g_problem_string[0] + "," + g_problem_string[1]+ "," + "'" + g_problem_string[2] + "'" + "," + 
"'" + g_problem_string[3] + "'" + "," + "'" + g_problem_string[4] + "'" + "," + "'" + g_problem_string[5] + "'" + "," +
"'" + g_problem_string[6] + "'" + "," + "'" + g_problem_string[7] + "'" + "," + "'" + g_problem_string[8] + "'" + "," +
"'" + getCurrentTime() + "', 'N', 0,0,0,0,0,1, '" + v_ojname +"', " + v_pid + "";
    */

MSG_OUPUT_DBG("In SQL_updateProblemInfo, (%s)", v_pid.c_str());

for(int i=0; i<PROBLEM_TAG_MAX; i++)
{
//char *end;
//char *string_ = (char*)malloc(sizeof(char)*g_problem_string[i].length()+1);

//strcpy(string_,g_problem_string[i].c_str());
/*
end = string_;
end += strlen(string_);                //point sql tail
//convert NUL(ASCII 0)、'\n'、'\r'、'\'’、'''、'"'和Control-Z and so on
*end++ = '\'';
end += mysql_real_escape_string(mysql, end, query, strlen(string_));
*end++ = '\"';
   *end++ = ')';
cout<<string_<<endl;
*/

if (i == PROBLEM_TITLE)
{
replace_all_distinct(g_problem_string[i], "\"", " ");
g_problem_string[i] = "HDU." + v_pid + " - " + g_problem_string[i];
}

if (BOOL_TRUE == isNeed2HTML((ENUM_PROVLEM)i))
{
replace_all_distinct(g_problem_string[i], "\"", "&quot;");
replace_all_distinct(g_problem_string[i], "src=/data/images/", "src=http://acm.hdu.edu.cn/data/images/");
replace_all_distinct(g_problem_string[i], "src=../../data/images/", "src=http://acm.hdu.edu.cn/data/images/");
replace_all_distinct(g_problem_string[i], "\n", "<br>");
}
//val_str += g_problem_string[i];
}

val_str = g_problem_string[0] + "," + g_problem_string[1]+ "," + "\"" + g_problem_string[2] + "\"" + "," + 
"\"" + g_problem_string[3] + "\"" + "," + "\"" + g_problem_string[4] + "\"" + "," + "\"" + g_problem_string[5] + "\"" + "," +
"\"" + g_problem_string[6] + "\"" + "," + "\"" + g_problem_string[7] + "\"" + "," + "\"" + g_problem_string[8] + "\"" + "," +
"'" + getCurrentTime() + "', 'N', 0,0,0,0,0,0,1, '" + v_ojname +"', " + v_pid + "";

if (val_str.length() >= MAX_SIZE_BUF)
{
MSG_OUPUT_DBG("ERROR, too large size of buffer...");
return;
}

sprintf(query,"insert into problem(time_limit,memory_limit,title,description,input,output,sample_input,sample_output,author,create_date,defunct,spj,accepted,solved,submit,submit_user,contest_id,isvirtual,oj_name,oj_pid) values(%s);",val_str.c_str());

//MSG_OUPUT_DBG(query);

int ret=mysql_real_query(mysql,query,(unsigned int)strlen(query));  
if(ret)
{  
//write_log(ERROR,mysql_error(mysql));
MSG_OUPUT_DBG(mysql_error(mysql));
return ;
}

MSG_OUPUT_DBG("End SQL_updateProblemInfo OK, (%s)", v_pid.c_str());
}

ULONG checkStringExsit(char *filename, char *pattern)
{
    pcre  *re;    
    const char *error;    
    int  erroffset;    
    int  ovector[OVECCOUNT];    
    int  rc;    
string ts;
    FILE * fp=fopen(filename,"r");
    while (fgets(tmps, MAX_SIZE_BUF, fp))
    {
        ts +=tmps;
    }

    fclose(fp);

//title
re = pcre_compile(pattern, 0, &error, &erroffset, NULL);         
    if (re == NULL) {                 //如果编译失败,返回错误信息    
        MSG_OUPUT_DBG("PCRE compilation failed at offset %d: %s\n", erroffset, error);    
        return BOOL_FALSE;    
    }    
    rc = pcre_exec(re,NULL, ts.c_str(), strlen(ts.c_str()), 0, 0, ovector, OVECCOUNT);    
// 返回值:匹配成功返回非负数,没有匹配返回负数    
    if (rc < 0) {                     //如果没有匹配,返回错误信息    
        if (rc == PCRE_ERROR_NOMATCH) printf("Sorry, no match ...\n");    
        else MSG_OUPUT_DBG("Matching error %d\n", rc);    
        pcre_free(re);    
        return BOOL_FALSE;    
    }
pcre_free(re); 
return BOOL_TRUE;
}

ULONG getInfoByTag(char *src, char *pattern, ENUM_PROVLEM enProblem, char *res)
{
    pcre  *re;    
    const char *error;    
    int  erroffset;    
    int  ovector[OVECCOUNT];    
    int  rc, i;    
   
MSG_OUPUT_DBG("In getInfoByTag...");

//title
re = pcre_compile(pattern, 0, &error, &erroffset, NULL);         
    if (re == NULL) {                 //如果编译失败,返回错误信息    
        MSG_OUPUT_DBG("PCRE compilation failed at offset %d: %s\n", erroffset, error);    
        return BOOL_FALSE;    
    }    

    rc = pcre_exec(re,NULL, src, strlen(src), 0, 0, ovector, OVECCOUNT);    
// 返回值:匹配成功返回非负数,没有匹配返回负数    
    if (rc < 0) {                     //如果没有匹配,返回错误信息    
if (rc == PCRE_ERROR_NOMATCH) MSG_OUPUT_DBG("Sorry, no match ...\n");    
else {
MSG_OUPUT_DBG("Matching error %d\n", rc);  
g_problem_string[enProblem] = "Not Found";
}
pcre_free(re);    
return BOOL_FALSE;    
}

MSG_OUPUT_DBG("In getInfoByTag...");

i = (rc==0)?(0):(rc-1);

printf("iiiiiiii=%d , rc=%d\n",i,rc);

// for (i = 0; i < rc; i++) //分别取出捕获分组 $0整个正则公式 $1第一个()  
{               
        char *substring_start =  src + ovector[2*i];    
        int substring_length = ovector[2*i+1] - ovector[2*i];    
        MSG_OUPUT_DBG("In getInfoByTag 1 substring_length=%d...",substring_length);
char *str_tmp = (char*)malloc(sizeof(char)*substring_length+100);
// char str_tmp[MAX_SIZE_BUF] ={0};
MSG_OUPUT_DBG("In getInfoByTag 2...");
sprintf(str_tmp, "%.*s\n", substring_length, substring_start);
 
MSG_OUPUT_DBG("In getInfoByTag 3...");
// printf("%s",str_tmp);

//string string_ = str_tmp;
MSG_OUPUT_DBG("In getInfoByTag 4...(length = %d)", strlen(str_tmp));

g_problem_string[enProblem].assign(str_tmp,strlen(str_tmp));

MSG_OUPUT_DBG("End getInfoByTag success...");

//MSG_OUPUT_DBG(pattern);
//MSG_OUPUT_DBG(string_.c_str());
// free(substring_start);
free(str_tmp);
    }   


pcre_free(re); 
return BOOL_TRUE;
}

int getProblemInfo_Brief(string pid)    
{    
ULONG ulRet = 0;
int loop = 0;
string res="",ts;
    FILE * fp=fopen(tfilename,"r");
    while (fgets(tmps, MAX_SIZE_BUF, fp))
    {
        ts +=tmps;
    }
    fclose(fp);

char  patternTime [] = "(\\d*) MS";  // 将要被编译的字符串形式的正则表达式    

char  patternMemory [] = "(\\d*) K";  // 将要被编译的字符串形式的正则表达式    

char  patternTitle [] = "<h1 style='color:#1A5CC8'>([\\s\\S]*?)</h1>";  // 将要被编译的字符串形式的正则表达式    
    
char  patternDescription [] = "Problem Description</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom>&nbsp;</div>";  // 将要被编译的字符串形式的正则表达式    
    
char  patternInput [] = "Input</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom>&nbsp;</div>";  // 将要被编译的字符串形式的正则表达式    
    
char  patternOutput [] = "Output</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom>&nbsp;</div>";  // 将要被编译的字符串形式的正则表达式    
    
char  patternSampleInput [] = "Sample Input</div><div class=panel_content><pre><div style=\"font-family:Courier New,Courier,monospace;\">([\\s\\S]*?)</div></pre></div><div class=panel_bottom>&nbsp;</div>";  // 将要被编译的字符串形式的正则表达式    
char  patternSampleOutput [] = "Sample Output</div><div class=panel_content><pre><div style=\"font-family:Courier New,Courier,monospace;\">([\\s\\S]*?)</div></pre></div><div class=panel_bottom>&nbsp;</div>";  // 将要被编译的字符串形式的正则表达式    
    
char  patternAuthor [] = "Author</div> <div class=panel_content>([\\s\\S]*?)</div><div class=panel_bottom>&nbsp;</div>";  // 将要被编译的字符串形式的正则表达式    
    
//char  patternTitle [] = "<h1 style='color:#1A5CC8'>([\\s\\S]*?)</h1>";  // 将要被编译的字符串形式的正则表达式    
    

for (loop = 0; loop < PROBLEM_TAG_MAX; loop++)
{
g_problem_string[loop] = "";
}

MSG_OUPUT_DBG("Start Problem %s ...", pid.c_str());

MSG_OUPUT_DBG("Time");
ulRet = getInfoByTag((char*)ts.c_str(), patternTime, PROBLEM_TIME ,NULL);
if(ulRet == 0)
{
g_problem_string[0] = "1000";
}
MSG_OUPUT_DBG("Memoty");
ulRet = getInfoByTag((char*)ts.c_str(), patternMemory, PROBLEM_MEMORY, NULL);
if(ulRet == 0)
{
g_problem_string[1] = "65535";
}

ulRet = 0;

MSG_OUPUT_DBG("Title");
ulRet += getInfoByTag((char*)ts.c_str(), patternTitle, PROBLEM_TITLE,NULL);

MSG_OUPUT_DBG("Description");
ulRet += getInfoByTag((char*)ts.c_str(), patternDescription, PROBLEM_DESCRIPTION, NULL);

MSG_OUPUT_DBG("Input");
ulRet += getInfoByTag((char*)ts.c_str(), patternInput, PROBLEM_INPUT, NULL);

MSG_OUPUT_DBG("Output");
ulRet += getInfoByTag((char*)ts.c_str(), patternOutput, PROBLEM_OUTPUT, NULL);

MSG_OUPUT_DBG("Sample Input");
ulRet += getInfoByTag((char*)ts.c_str(), patternSampleInput, PROBLEM_SAMPLE_INPUT, NULL);

MSG_OUPUT_DBG("Sample Output");
ulRet += getInfoByTag((char*)ts.c_str(), patternSampleOutput, PROBLEM_SAMPLE_OUTPUT, NULL);

MSG_OUPUT_DBG("Author");
ulRet += getInfoByTag((char*)ts.c_str(), patternAuthor, PROBLEM_AUTHOR, NULL);

if (ulRet != 0)
{
if (BOOL_TRUE == checkStringExsit(tfilename, "No such problem"))
{
MSG_OUPUT_DBG("No such problem %s", pid.c_str());

return 0;
}
}

SQL_updateProblemInfo("HDU",pid);

MSG_OUPUT_DBG("Get Problem %s OK.", pid.c_str());

    return 0;    
}    


ULONG getProblemInfo(string pid)
{
CURL *curl;
    CURLcode res;
    
curl = curl_easy_init();
if (access(tfilename, 0) == 0)
{
DeleteFile(tfilename);
}

    if ( curl ) {
FILE *fp = fopen(tfilename, "ab+");
curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L );
curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "hdu.cookie");
char url[255] = {0};
sprintf(url, "http://acm.hdu.edu.cn/showproblem.php?pid=%s", pid.c_str());
//cout<<url;
curl_easy_setopt( curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &process_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform( curl );
curl_easy_cleanup(curl);
fclose(fp);
}
getProblemInfo_Brief(pid);
return BOOL_TRUE;
}


//////////////////
// DLL exp
//////////////////

extern "C" _declspec(dllexport)ULONG DLL_HDUDebugSwitch(ULONG st)
{
set_debug_switch(st);

return BOOL_TRUE;
}

extern "C" _declspec(dllexport)ULONG DLL_HDU_SpiderInit(int pid)
{
if(InitMySQL()==0) //初始化mysql
{
printf("Init MySQL ERROR...\n");
return BOOL_FALSE;
}

if (access(tfilename, 0) == 0)
{
DeleteFile(tfilename);
}

return BOOL_TRUE;
}

extern "C" _declspec(dllexport)ULONG DLL_GetProblemInfoFromHDU(int pid)
{
char tmp[10]={0};
itoa(pid,tmp,10);
string pid_s = tmp;

if (BOOL_TRUE != getProblemInfo(pid_s))
{
return BOOL_FALSE;
}

return BOOL_TRUE;
}

extern "C" _declspec(dllexport)ULONG DLL_HDULogin()
{
if (BOOL_TRUE != login())
{
return BOOL_FALSE;
}
return BOOL_TRUE;
}

extern "C" _declspec(dllexport)ULONG DLL_HDUSubmit(int pid, int langid, string source)
{
char tmp[10]={0};
itoa(pid,tmp,10);
string pid_s = tmp;
char tmplang[10]={0};
itoa(langid,tmplang,10);
string lang_string = tmplang;
if (BOOL_TRUE != submit(pid_s, lang_string, source))
{
return BOOL_FALSE;
}
return BOOL_TRUE;
}

extern "C" _declspec(dllexport)ULONG DLL_HDUGetStatus(string username, int pid, int langid, string &runid, string &result,string& ce_info,string &tu,string &mu)
{
char tmp[10]={0};
itoa(pid,tmp,10);
string pid_s = tmp;
//string runid,result,ce_info,tu, mu;

char tmplang[10]={0};
itoa(langid,tmplang,10);
string lang_string = tmplang;
if (BOOL_TRUE != getStatus(username, pid_s, lang_string, runid, result, ce_info, tu, mu))
{
MSG_OUPUT_DBG("DLL_HDUGetStatus getStatus error...");
return BOOL_FALSE;
}
else
{
MSG_OUPUT_DBG("DLL_HDUGetStatus getStatus success...");
}

return BOOL_TRUE;
}

////////////////////////////////////////////////
///////common.h头文件                         
/////////////////////////////////////////////////

#ifndef _COMMON_H_
#define _COMMON_H_

#include <io.h>
#include <windows.h>
#include <iostream>
#include <cstdio>
#include <string>
#include <fstream>
#include <stdio.h>

#include "include\mysql.h"
#include "curl\curl.h"
#include "pcre.h"


using namespace std;

#pragma comment(lib,"ws2_32")
#pragma comment(lib, "lib/curllib.lib") 
#pragma comment(lib, "lib/openldap.lib") 
#pragma comment(lib, "lib/ssleay32.lib") 
#pragma comment(lib, "lib/pcre.lib") 
#pragma comment(lib, "lib/libmysql.lib")

#ifdef __cplusplus
extern "C" {
#endif

#define MAX_SIZE_BUF 10000000

#define DEBUG_PRINT(X)   X

#define UCHAR unsigned char
#define ULONG unsigned long
#define CHAR char

#define BOOL_TRUE 0
#define BOOL_FALSE 1

#define OVECCOUNT 30    /* should be a multiple of 3 */

#define MAX_LANG_SIZE 255

enum ENUM_PROVLEM
{
PROBLEM_TIME = 0,
PROBLEM_MEMORY,
PROBLEM_TITLE,
PROBLEM_DESCRIPTION,
PROBLEM_INPUT,
PROBLEM_OUTPUT,
PROBLEM_SAMPLE_INPUT,
PROBLEM_SAMPLE_OUTPUT,
PROBLEM_AUTHOR,
PROBLEM_TAG_MAX
};
string g_problem_string[PROBLEM_TAG_MAX];

MYSQL *mysql;     //mysql连接
char query[MAX_SIZE_BUF]; //查询语句
const char INI_filename[]="data.ini";
char Mysql_url[255] = "localhost";
char Mysql_username[255] = "root";
char Mysql_password[255] = "password";
char Mysql_table[255] = "gdoj";
int  Mysql_port = 3306;  
char Mysql_Character[255] = "gbk";  //编码

char tmps[MAX_SIZE_BUF];

char username[1000]="hdu_username";
char password[1000]="hdu_password";
char tfilename[1000]="tmpfile.txt";
char domain[255]="acm.hdu.edu.cn";  //acm.guet.edu.cn

/* hdu language list */
UCHAR gaucLanguageName[][MAX_LANG_SIZE] = {
"G++",
"GCC",
"C++",
"C",
"Pascal",
"Java"
};

//spider
extern "C" _declspec(dllexport)ULONG DLL_HDUDebugSwitch(ULONG status);
extern "C" _declspec(dllexport)ULONG DLL_GetProblemInfoFromHDU(int pid);
extern "C" _declspec(dllexport)ULONG DLL_HDU_SpiderInit(int pid);
//judger
extern "C" _declspec(dllexport)ULONG DLL_HDULogin();
extern "C" _declspec(dllexport)ULONG DLL_HDUSubmit(int pid, int langid, string source);
extern "C" _declspec(dllexport)ULONG DLL_HDUGetStatus(string username, int pid, int langid, string &runid, string &result,string& ce_info,string &tu,string &mu);


#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif /* End of common.h */


Comments ( 1 )

  • kjyixw  post at 12 years ago Vote: I like it   0   Vote: I do not like it

    他们网站的管理员知道你抓题么=。=