/* -*- linux-c -*- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <unistd.h>

#define min(a,b) ( ( (a) > (b)) ? (b) : (a))
#define max(a,b) ( ( (a) < (b)) ? (b) : (a))

int parse_url (char *, char *, int *, char *);
int make_outfile (char *, char *, int);
void mkdirp (char *);
int decode_header (int, void *);


int main (int argc, char *argv[])
{
	char url[1024];
	char hostname[64];
	int port;
	char page[1024];
	char request[512];
	struct hostent *hostinfo;
	struct sockaddr_in caddr;
	int cfd, ofd;
	FILE *ofile;
	char buffer[131072];
	int length;
  
	if (argc > 1) {
		freopen (argv[1], "r", stdin);
	}

	while (fgets (url, 1024, stdin)) {
		if (strchr (url, '\n')) strchr (url, '\n')[0] = 0;

		if (parse_url (url, hostname, &port, page)) {
			fprintf (stderr, "bad url: %s\r\n", url);
			continue;
		}

		ofd = make_outfile (hostname, page, 1);
		if (ofd == -1) {
			perror (url);
			continue;
		}

		cfd = socket (AF_INET, SOCK_STREAM, 0);
		if (cfd == -1) {
			perror (url);
			continue;
		};

		if (! (inet_aton (hostname, (struct in_addr *)&caddr.sin_addr))) {
			if (! (hostinfo = gethostbyname (hostname))) {
#ifdef herror
				herror (url);
#else
				fprintf (stderr, "gethostbyname error: %s\n", url);
#endif
				continue;
			}
			caddr.sin_addr.s_addr = ((unsigned long int *)hostinfo->h_addr)[0];
		}
    
		caddr.sin_port = htons (port);
		caddr.sin_family = AF_INET;
		if (connect (cfd, (struct sockaddr *)&caddr, sizeof (caddr))) {
			perror (url);
			close (ofd);
			close (cfd);
			continue;
		}
		printf ("Downloading: %s\r\n", url);

		ofile = fdopen (ofd, "w");
		if (!ofile) {
			close (ofd);
			close (cfd);
			continue;
		}

		sprintf (request, "GET %s HTTP/1.1\r\n"
			 "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, "
			 "application/vnd.ms-excel, application/msword, "
			 "application/vnd.ms-powerpoint, */*\r\n"
			 "Accept-Language: en-gb\r\n"
			 "User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows 95)\r\n"
			 "Host: %s:%d\r\n"
			 "Connection: close\r\n"
			 "\r\n",
			 page, hostname, port);

		write (cfd, request, strlen (request));

		if (decode_header (cfd, NULL)) {
			close (ofd);
			close (cfd);
			continue;
		}
		
		while ( (length = read (cfd, buffer, 131072))) {
			write (ofd, buffer, length);
		}

		close (cfd);
		close (ofd);
	}
}


/*
 * parse_url ()
 * Takes:
 * url      - Source URL to be parsed
 * hostname - Buffer for hostname to be put into
 * port     - Pointer to an int for the port number to be put into
 * page     - For the page part of the URL
 *
 * Returns:
 * -1       - Failure
 * 0        - Success
 */
int parse_url (char *url, char *hostname, int *port, char *page)
{
	char *idx;
	char *tmp;
	char portstr[8];

	// only http supported for the time being
	if (strncasecmp ("http://", url, 7)) return -1;

	// get the hostname
	idx = url + 7;
	tmp = strchr (idx, ':');
	if (tmp) {
		*tmp = 0;
		strcpy (hostname, idx);
		idx = tmp + 1;

		// now get the port
		if (! (tmp = strchr (idx, '/'))) return -1;
		*tmp = 0;
		*port = atol (idx);
		idx = tmp;
	} else {
		if (! (tmp = strchr (idx, '/'))) return -1;
		*tmp = 0;
		strcpy (hostname, idx);
		idx = tmp;
		
		// default port
		*port = 80;
	}

	// now do the page
	*idx = '/';
	strcpy (page, idx);
	
	return 0;
}


/*
 * make_outfile()
 * Takes:
 * hostname - The host directory to store files in. NULL
 *            for no host directory.
 * page     - The page to be downloaded
 * mode     - The mode to use when creating the output file
 *            1 - Create a directory for each directory in
 *                the URL
 *            2 - The filename is location of the page with
 *                the '/' chars turned into '_' chars.
 *            3 - Use just the target name for the filename.
 *
 * Returns:
 * -1       - Failed
 * fd       - A file descriptor for the output file
 */
int make_outfile (char *hostname, char *page, int mode)
{
	char buffer[1024];
	char *tmp;
	int fd;

	// if the hostname directory is needed prefix the path
	if (hostname) {
		sprintf (buffer, "%s%s", hostname, page);
		mkdirp (hostname);
	} else {
		strcpy (buffer, page + 1);
	}

	// if the page requested is directory (ie ending in '/')
	// give it a name
	if (buffer[strlen (buffer) - 1] == '/') strcat (buffer, "index.html");

	switch (mode) {
	case 1:
		tmp = strrchr (buffer, '/');
		if (tmp) *tmp = 0;
		mkdirp (buffer);
		if (tmp) *tmp = '/';
		break;

	case 2:
		if (hostname) {
			tmp = buffer + strlen (hostname) + 1;
		} else {
			tmp = buffer;
		}

		while ( (tmp = strchr (tmp, '/'))) {
			*tmp = '_';
		}
		break;

	case 3:
		if ( (tmp = strrchr (buffer, '/'))) {
			strcpy (buffer, tmp);
			break;
		}
		break;

	default:
		fprintf (stderr, "mode %d not implemented: %s, %s\r\n", hostname, page);
		return -1;
	}

	fd = open (buffer, O_CREAT | O_TRUNC | O_WRONLY, 0644);
	return fd;
}


/*
 * mkdirp()
 * Takes:
 * path     - The full path to create
 */
void mkdirp (char *path)
{
	char *tmp;
	struct stat fib;

	tmp = strchr (path, '/');
	if (tmp) {
		*tmp = 0;
		if (stat (path, &fib)) {
			mkdir (path, 0755);
		}
		chdir (path);
		*tmp = '/';
		mkdirp (tmp + 1);
		chdir ("..");
	} else {
		mkdir (path, 0755);
	}
}


/*
 * decode_header()
 * Takes:
 * fd       - The file descriptor of the open connection
 * res1     - A pointer to a yet to be defined structure
 *
 * Returns:
 * -1       - Failed
 * 0        - Success
 */
int decode_header (int fd, void *res1)
{
	char buffer[256];
	int idx = 0;

	while (read (fd, &buffer[idx], 1)) {
		if (buffer[idx] == '\r') {
			if (read (fd, &buffer[++idx], 1)) {
				if (buffer[idx] != '\n') return -1;

				buffer[--idx] = 0;
				if (strlen (buffer) == 0) return 0;
				
                                /*
				if (strncasecmp ("Content-Length: ", buffer, 16)) {
					idx = 0;
					continue;
				}

				*clen = atol (&buffer[16]);
				*/
				idx = 0;
				continue;
			} else {
				return -1;
			}
		}
		idx++;
	}

	return -1;
}
