C語言實現抓取網頁原始碼
阿新 • • 發佈:2019-01-30
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> #include <netdb.h> #include <unistd.h> int port = 80; int main(int argc,char **argv) { char buffer[512]; int isock; struct sockaddr_in pin; struct hostent *hptr; char message[512]; char *ptr, **pptr; char str[32]; int done = 0; int chars = 0; int l = 0; if(argc!=2) { printf("%s url\n",argv[0]); return -1; } if( (hptr = gethostbyname(argv[1])) == 0 ) { printf("gethostbyname is fail\n"); exit(1); } #if 1 printf("official hostname:%s\n",hptr->h_name); for(pptr = hptr->h_aliases; *pptr != NULL; pptr++) printf(" alias:%s\n",*pptr); switch(hptr->h_addrtype) { case AF_INET: case AF_INET6: pptr=hptr->h_addr_list; for(; *pptr!=NULL; pptr++) printf(" address:%s\n", inet_ntop(hptr->h_addrtype, *pptr, str, sizeof(str))); printf(" first address: %s\n", inet_ntop(hptr->h_addrtype, hptr->h_addr, str, sizeof(str))); break; default: printf("unknown address type\n"); break; } #endif bzero(message,sizeof(message)); bzero(&pin,sizeof(pin)); pin.sin_family = AF_INET; pin.sin_port = htons(port); pin.sin_addr.s_addr = ( (struct in_addr *)(hptr->h_addr) )->s_addr; if( (isock = socket(AF_INET, SOCK_STREAM, 0)) == -1) { printf("Error opening socket!\n"); exit(1); } sprintf(message, "GET / HTTP/1.1\r\n"); strcat(message, "Host:"); strcat(message, argv[1]); strcat(message, "\r\n"); strcat(message, "Accept: */*\r\n"); strcat(message, "User-Agent: Mozilla/4.0(compatible)\r\n"); strcat(message, "connection:Keep-Alive\r\n"); strcat(message, "\r\n\r\n"); //printf("%s",message); if( connect(isock, (const struct sockaddr *) &pin, sizeof(pin)) == -1 ) { printf("Error connecting to socket\n"); exit(1); } if( send(isock, message, strlen(message), 0) == -1) { printf("Error in send\n"); exit(1); } struct timeval timeout = {1,0}; //設定超時時間1秒,0代表秒後面的微秒數,左邊這個就是1秒0微秒 //設定接收超時 setsockopt(isock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(struct timeval)); while(done == 0) { l = recv(isock, buffer, 1, 0); if( l < 0 ) done = 1; switch(*buffer) { case '\r': break; case '\n': if(chars == 0) done = 1; chars = 0; break; default: chars++; break; } // printf("%c",*buffer); } // recv(isock, buffer, 5, 0); /* delete over char */ do { l = recv(isock, buffer, sizeof(buffer) - 1, 0); if( l < 0 ) break; *(buffer + l) = 0; fputs(buffer, stdout); }while( l > 0 ); close(isock); return 0; }