Need help regarding HTTP parsing

Hi..

I've got a program that can connects to a remote server and displays some garbage value and closes the connection.

The code goes like this

#include <stdio.h>
#include <netdb.h>
#include <netinet/in.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>


int main(int argc,char argv[])
{
int connectres,bytes,i=0,sock,len;
struct sockaddr_in connto;

struct hostent *server;
char source[2048];
char buffer[256];
char *outmsg="GET  / HTTP/1.1";
sock=socket(AF_INET,SOCK_STREAM,0);
connto.sin_family=AF_INET;
connto.sin_port=htons(80);
connto.sin_addr.s_addr=inet_addr("74.125.67.100");

if(connectres=connect(sock,(struct sockaddr*)&connto,sizeof(struct sockaddr))==-1)
{
perror("unable to connect");
return -3;
}

printf("connection successful \n");
len=strlen(outmsg);
bytes=send(sock,outmsg,len,0);
printf("bytes sent are %d \n",bytes);

bzero(buffer,256);

do
{
i=recv(sock,buffer,sizeof(buffer),0);
printf("still recieving data \n");
strcat(source,buffer);
printf("%s",source);
bzero(buffer,256);
}while(i!=0);

//closing socket
printf("closing socket \n");

close(sock);
}

can someone pls help me out...
I've really got no clue as to wat to do????

recv() doesn't add null terminators to anything, you can't just use strcpy on raw binary data.

Thanks for your advice.

I came up with the following program

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>

#define MAXMESS 9999

int main(int argc, char** argv)
{
        struct sockaddr_in servaddr;
        struct hostent *hp;
        int sock_id,i=0;
        char message[MAXMESS];
        char msglen;
        char request[] = "GET /about.html HTTP/1.1\r\n               Host:www.google.com\r\n\r\n";



        //Get a socket
        if((sock_id = socket(AF_INET, SOCK_STREAM, 0)) == -1)
        {
                fprintf(stderr,"Couldn't get a socket.\n");
                exit(EXIT_FAILURE);
        }
        else
        {
                fprintf(stderr,"Got a socket.\n");
        }

        //book uses bzero which my man pages say is deprecated
        //the man page said to use memset instead. :-)
        memset(&servaddr,'\0',sizeof(servaddr));

        //get address for google.com
        if((hp = gethostbyname("www.google.com")) == NULL)
        {
             fprintf(stderr,"Couldn't get an address.\n");
             exit(EXIT_FAILURE);
}
        else
       {
        fprintf(stderr,"Got an address.\n");
        }

        //bcopy is deprecated also, using memcpy instead
        memcpy((char *)&servaddr.sin_addr.s_addr, (char *)hp->h_addr, hp->h_length);

        //fill int port number and type
        servaddr.sin_port = htons(80);
        servaddr.sin_family = AF_INET;

        //make the connection
        if(connect(sock_id, (struct sockaddr *)&servaddr, sizeof(servaddr)) != 0)
        {
          fprintf(stderr, "Connection error.\n");
        }
        else
        {
           fprintf(stderr,"Got a connection!!!\n");
        }

        //NOW THE HTTP PART!!!

        //send the request
         read(sock_id,request,strlen(request));

       //read the response
      while(i!=0)
     {
        write(sock_id,message,9999);
        printf("%s",message);
        }
   return 0;
   }

but i am still not able to display the source code of the webpage.Can someone tell me wat i'm doing wrong.

You are using 'read' to send and 'write' to receive, this seems backwards.

And you still have not added any null terminators! printf will not know how long the string is! And the receiving loop will loop infinitely since you never change i. Actually, the receive loop will never start since i starts at zero, but would loop infinitely if it wasn't.

ssize_t len=recv(sock_id, message, MAXMESS-1);
  if(len <= 0) break;
  message[len]='\0'; // Null terminator!  printf needs one!  It says where the string ends!
  printf("%s\n", message);

Thanks for the reply.I eventually came up with this program as a result of a few modifications

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>

#define MAXBUFF 99999

int main(int argc, char** argv)
{
         struct sockaddr_in servaddr;
         struct hostent *hp;
         int sock_id,i=1;
         int len;
         char buffer[MAXBUFF],message[9999];
         char msglen;
         char request[] = "GET /en/index.html HTTP/1.1\r\nHost:bandvalley.com\r\nUser-Agent:HTMLGET 

1.1\r\n\r\n";

                                       //Get a socket

        if((sock_id = socket(AF_INET, SOCK_STREAM, 0)) == -1)
           {
                fprintf(stderr,"Couldn't get a socket.\n");
                exit(EXIT_FAILURE);
           }

        else
        {
               fprintf(stderr,"Got a socket.\n");
        }

                                       //book uses bzero which my man pages say is deprecated
                                       //the man page said to use memset instead. :-)
        memset(&servaddr,'\0',sizeof(servaddr));

                                      //get address for bandvalley.com
        if((hp = gethostbyname("174.36.228.144")) == NULL)
        {
            fprintf(stderr,"Couldn't get an address.\n");
            exit(EXIT_FAILURE);
        }
        else
        {
           fprintf(stderr,"Got an address.\n");
        }

                                       //bcopy is deprecated also, using memcpy instead
        memcpy((char *)&servaddr.sin_addr.s_addr, (char *)hp->h_addr, hp->h_length);

                                        //fill int port number and type
        servaddr.sin_port = htons(80);
        servaddr.sin_family = AF_INET;

                                        //make the connection
        if(connect(sock_id, (struct sockaddr *)&servaddr, sizeof(servaddr)) != 0)
        {
          fprintf(stderr, "Connection error.\n");
        }
        else
        {
           fprintf(stderr,"Got a connection!!!\n");
        }

                                          //NOW THE HTTP PART!!!

                                          //send the request

      send(sock_id,request,strlen(request),0);



     //read the response
     if(i==1)
     {
      do
      {
      len=recv(sock_id,buffer,MAXBUFF-1,0);
      buffer[len]='\0';
      printf("%s",buffer);
      }while(i=1);
     }
     else
    {
      return 0;
      }
    }

but it still doesnt get the last two lines of code.I know my mistake also.
i's value never changes.But I dont know the exact size of the page source so how do I correct this error.

Any advices on this???

You don't have to know the exact size of the page. You just keep receiving data until recv() returns less than or equal to zero.

I set the null terminator after checking its return value since buffer[-1]='\0' writes out of array bounds. This may cause a segmentation fault, or at the very least, mess up other variables.