#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <errno.h>
#include <string.h>
#include <alloca.h>

#define MAXLINE 1024

struct addrinfo *host_serv(const char *hostname,const char *service,int family,int sockettype)
{
  int n;
  struct addrinfo  hints,*res;

  bzero(&hints,sizeof(struct addrinfo));
  hints.ai_flags = AI_CANONNAME;
  hints.ai_family = family;
  hints.ai_socktype = sockettype;

  if((n = getaddrinfo(hostname,service,&hints,&res))!=0)
   return NULL;
 
 return res;
}

int tcp_connect(const char* host,const char *serv)
{
 int sockfd,n;
 struct addrinfo *res,*ressave;
 if((res=host_serv(host,serv,AF_UNSPEC,SOCK_STREAM)) == NULL)
 {
  printf("tcp_connect error for %s,%s:%s\n",host,serv,strerror(errno));
  exit(-1);
 }
 ressave=res;
 while(res != NULL)
 {
  sockfd = socket(res->ai_family,res->ai_socktype,res->ai_protocol);

  if(sockfd < 0)
   continue;
  if(connect(sockfd,res->ai_addr,res->ai_addrlen) == 0)
   break;

  close(sockfd);
  res = res -> ai_next;
 }
 if(res == NULL)
 {
  printf("tcp_connect error for %s,%s",host,serv);
  exit(-1);
 }
 freeaddrinfo(ressave);
 return sockfd;
}

int read_html(const char *host,const char *file,int fd)
{
 int sockfd,n;
 char recvline[256];
 char line[256];
 sockfd = tcp_connect(host,"80");
 int m=0;
 if(file != NULL)     
  m=snprintf(line,sizeof(line),"GET /%s HTTP/1.0\r\n\r\n",file);
 else                   //if there is no file specfied, the index.html will be got.
  m=snprintf(line,sizeof(line),"GET / HTTP/1.0\r\n\r\n");
 printf("%s\n",line);
 write(sockfd,line,m);
 while((n=read(sockfd,recvline,255)) >0)
 {
  recvline[n]=0;
  write(fd,recvline,sizeof(recvline));
 }
 close(sockfd);
 return 0;
}

void *printaddr_ip(struct addrinfo *test)
{
 if(test == NULL)
 {
  printf("the input parameter addrinfo is NULL");
  return;
 }
 while(test != NULL)
 {
  if( test -> ai_family == AF_INET)
  {
   struct sockaddr_in *sin = (struct sockaddr_in *) test-> ai_addr;
   printf("canonname:%s  IP:%s\n",test->ai_canonname,inet_ntoa( sin -> sin_addr));
   test = test -> ai_next;
  }
 }
}

int getlength(const char* str)
{
 const char *start = str;
 int i=0;
 while(*str != '\0')
 {
  i++;
  str++;
 }
 str = start;
 return i;

}

char *gethost(const char* hostfile,char *result,char *file)
{
 if( result == NULL)
  return NULL;
 const char *start = hostfile;
 const char *protocol[]={"http://","ftp://"};  //example protocol prefix
 
 int i=0;
 
 for(i=0;i< (sizeof(protocol)/sizeof(char *));i++)
 {
  if(strncmp(hostfile,protocol[i],sizeof(protocol[i])) == 0)
  {
   break;
  }
 }

 if( i != (sizeof(protocol)/sizeof(char *)) )   //find procotol match
 {
   hostfile = hostfile + getlength(protocol[i]) ;
 }

 strcpy(result,hostfile);
 char *end = strstr(result,"/");
 if( end != NULL)        //find file seperate
 {
  if( file != NULL)
   strcpy(file,end+1);
  *end = '\0';
 }
 else
  file[0]='\0';

 hostfile = start;
 return result;
}
int main(int argc,char *argv[])
{
 if(argc != 2 )
 {
  printf("usage: ./gethtml htmlpath\n");
  return -1;
 }
 
 char result[MAXLINE],file[MAXLINE];
 gethost(argv[1],result,file);
  read_html(result,file,STDOUT_FILENO);
 return 0;
}
转载请注明来源:Leoncom-《unix socket取得网页html源文件》
Trackback

no comment untill now

Add your comment now