c++网页抓取_c++抓取网页内容源代码下载
2015-08-01 17:19:08 By: shinyuu
本DLL是基于Libcurl写的傻瓜化C++网页获取类封装、本项目不适用于一些大型需要功能较多的项目、适用于小型的只需要单纯地用Get、Post提交方式、可以保存cookies、需要的可以下载研究一下、代码比较简单
#include <vjspider_c.h> #include <vjspider.h> #include <vjspider_helper.h> map<int, VJSpider*> __vjspiders; map<int, string> __cookies; map<int, get_data> __tmpgetdata; map<int, post_data> __tmppostdata; map<int, string> __getstring; map<int, string> __poststring; bool vj_init_global_envi() { return VJSpider::init_global_envi(); } void vj_release_global_envi() { return VJSpider::release_global_envi(); } int vj_create_spider() { int idx = VJRand(); while(__vjspiders.find(idx) != __vjspiders.end()) idx = VJRand(); __cookies[idx] = ""; __poststring[idx] = ""; __getstring[idx] = ""; __vjspiders[idx] = new VJSpider(); __tmpgetdata[idx].clear(); __tmppostdata[idx].clear(); return idx; } void vj_release_spider(int idx) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it != __vjspiders.end()) { delete it->second; __vjspiders.erase(it); } map<int, get_data>::iterator getit = __tmpgetdata.find(idx); if(getit != __tmpgetdata.end()) { getit->second.clear(); __tmpgetdata.erase(getit); } map<int, post_data>::iterator postit = __tmppostdata.find(idx); if(postit != __tmppostdata.end()) { postit->second.clear(); __tmppostdata.erase(postit); } __cookies[idx] = ""; __getstring[idx] = ""; __poststring[idx] = ""; return; } bool vj_init_context(int idx, bool auto_del_cookie, const char* cookie_file, bool use_cookie) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) return false; return __vjspiders[idx]->init_context(auto_del_cookie, cookie_file, use_cookie); } const char* vj_get_cookie_filename(int idx) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) { __cookies[idx] = ""; return __cookies[idx].c_str(); } __cookies[idx] = it->second->get_cookie_filename(); return __cookies[idx].c_str(); } void vj_del_cookie_file(int idx) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) return; return it->second->del_cookie_file(); } bool vj_push_get_data(int idx, const char* name, const char* value) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) return false; __tmpgetdata[idx].push_back(get_data_item(name, value)); return true; } bool vj_push_post_data(int idx, const char* name, const char* value) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) return false; __tmppostdata[idx].push_back(post_data_item(name, value)); return true; } void vj_clear_get_data(int idx) { map<int, get_data>::iterator it = __tmpgetdata.find(idx); if(it == __tmpgetdata.end()) return; it->second.clear(); } void vj_clear_post_data(int idx) { map<int, post_data>::iterator it = __tmppostdata.find(idx); if(it == __tmppostdata.end()) return; it->second.clear(); } const char* vj_get(int idx, const char* url, unsigned int code, int cookie) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) { __getstring[idx] = ""; return __getstring[idx].c_str(); } __getstring[idx] = it->second->get(url, __tmpgetdata[idx], code, (VJ_USE_COOKIE_TYPE)cookie); return __getstring[idx].c_str(); } const char* vj_post(int idx, const char* url, unsigned int code, int cookie) { map<int, VJSpider*>::iterator it = __vjspiders.find(idx); if(it == __vjspiders.end()) { __poststring[idx] = ""; return __poststring[idx].c_str(); } __poststring[idx] = it->second->post(url, __tmppostdata[idx], code, (VJ_USE_COOKIE_TYPE)cookie); return __poststring[idx].c_str(); }
若资源对你有帮助、扫描下方的二维码、关注DD博客微信公众号(ddblogs)吧
最后给贴上Demo的源代码、希望对大家有用、有兴趣的哥们可以下载看看
源代码下载链接: http://dwtedx.com/download.html?bdkey=s/1bn8qswR 密码: p9v4
若资源对你有帮助、浏览后有很大收获、不妨小额打赏我一下、你的鼓励是维持我不断写博客最大动力
想获取DD博客最新代码、你可以扫描下方的二维码、关注DD博客微信公众号(ddblogs)
或者你也可以关注我的新浪微博、了解DD博客的最新动态:DD博客官方微博(dwtedx的微博)
如对资源有任何疑问或觉得仍然有很大的改善空间、可以对该博文进行评论、希望不吝赐教
为保证及时回复、可以使用博客留言板给我留言: DD博客留言板(dwtedx的留言板)
感谢你的访问、祝你生活愉快、工作顺心、欢迎常来逛逛