224 lines
5.9 KiB
C
224 lines
5.9 KiB
C
#include "tmbstr.h"
|
|
|
|
#include "httpio.h"
|
|
|
|
int
|
|
makeConnection ( HTTPInputSource *pHttp )
|
|
{
|
|
struct sockaddr_in sock;
|
|
struct hostent *pHost;
|
|
|
|
/* Get internet address of the host. */
|
|
if (!(pHost = gethostbyname ( pHttp->pHostName )))
|
|
{
|
|
return -1;
|
|
}
|
|
/* Copy the address of the host to socket description. */
|
|
memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length);
|
|
|
|
/* Set port and protocol */
|
|
sock.sin_family = AF_INET;
|
|
sock.sin_port = htons( pHttp->nPort );
|
|
|
|
/* Make an internet socket, stream type. */
|
|
if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1)
|
|
return -1;
|
|
|
|
/* Connect the socket to the remote host. */
|
|
if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock )))
|
|
{
|
|
if (errno == ECONNREFUSED)
|
|
return ECONNREFUSED;
|
|
else
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
int parseURL( HTTPInputSource *pHttp, tmbstr url )
|
|
{
|
|
int i, j = 0;
|
|
ctmbstr pStr;
|
|
|
|
pStr = tmbsubstr( url, "://" );
|
|
|
|
/* If protocol is there, but not http, bail out, else assume http. */
|
|
if (NULL != pStr)
|
|
{
|
|
if (tmbstrncasecmp( url, "http://", 7 ))
|
|
return -1;
|
|
}
|
|
|
|
if (NULL != pStr)
|
|
j = pStr - url + 3;
|
|
for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {}
|
|
if (i == j)
|
|
return -1;
|
|
|
|
/* Get the hostname. */
|
|
pHttp->pHostName = tmbstrndup (&url[j], i - j );
|
|
|
|
if (url[i] == ':')
|
|
{
|
|
/* We have a colon delimiting the hostname. It should mean that
|
|
a port number is following it */
|
|
pHttp->nPort = 0;
|
|
if (isdigit( url[++i] )) /* A port number */
|
|
{
|
|
for (; url[i] && url[i] != '/'; i++)
|
|
{
|
|
if (isdigit( url[i] ))
|
|
pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0');
|
|
else
|
|
return -1;
|
|
}
|
|
if (!pHttp->nPort)
|
|
return -1;
|
|
}
|
|
else /* or just a misformed port number */
|
|
return -1;
|
|
}
|
|
else
|
|
/* Assume default port. */
|
|
pHttp->nPort = 80;
|
|
|
|
/* skip past the delimiting slash (we'll add it later ) */
|
|
while (url[i] && url[i] == '/')
|
|
i++;
|
|
pHttp->pResource = tmbstrdup (url + i );
|
|
return 0;
|
|
}
|
|
|
|
|
|
int fillBuffer( HTTPInputSource *in )
|
|
{
|
|
if (0 < in->s)
|
|
{
|
|
in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0);
|
|
in->nextBytePos = 0;
|
|
if (in->nBufSize < sizeof( in->buffer ))
|
|
in->buffer[in->nBufSize] = '\0';
|
|
}
|
|
else
|
|
in->nBufSize = 0;
|
|
return in->nBufSize;
|
|
}
|
|
|
|
|
|
int openURL( HTTPInputSource *in, tmbstr pUrl )
|
|
{
|
|
int rc = -1;
|
|
#ifdef WIN32
|
|
WSADATA wsaData;
|
|
|
|
rc = WSAStartup( 514, &wsaData );
|
|
#endif
|
|
|
|
in->tis.getByte = (TidyGetByteFunc) HTTPGetByte;
|
|
in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte;
|
|
in->tis.eof = (TidyEOFFunc) HTTPIsEOF;
|
|
in->tis.sourceData = (uint) in;
|
|
in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0;
|
|
parseURL( in, pUrl );
|
|
if (0 == (rc = makeConnection( in )))
|
|
{
|
|
char ch, lastCh = '\0';
|
|
int blanks = 0;
|
|
|
|
char *getCmd = MemAlloc( 48 + strlen( in->pResource ));
|
|
sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource );
|
|
send( in->s, getCmd, strlen( getCmd ), 0 );
|
|
MemFree( getCmd );
|
|
|
|
/* skip past the header information */
|
|
while ( in->nextBytePos >= in->nBufSize
|
|
&& 0 < (rc = fillBuffer( in )))
|
|
{
|
|
if (1 < blanks)
|
|
break;
|
|
for (; in->nextBytePos < sizeof( in->buffer )
|
|
&& 0 != in->buffer[ in->nextBytePos ];
|
|
in->nextBytePos++ )
|
|
{
|
|
ch = in->buffer[ in->nextBytePos ];
|
|
if (ch == '\r' || ch == '\n')
|
|
{
|
|
if (ch == lastCh)
|
|
{
|
|
/* Two carriage returns or two newlines in a row,
|
|
that's good enough */
|
|
blanks++;
|
|
}
|
|
if (lastCh == '\r' || lastCh == '\n')
|
|
{
|
|
blanks++;
|
|
}
|
|
}
|
|
else
|
|
blanks = 0;
|
|
lastCh = ch;
|
|
if (1 < blanks)
|
|
{
|
|
/* end of header, scan to first non-white and return */
|
|
while ('\0' != ch && isspace( ch ))
|
|
ch = in->buffer[ ++in->nextBytePos ];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
void closeURL( HTTPInputSource *source )
|
|
{
|
|
if (0 < source->s)
|
|
closesocket( source->s );
|
|
source->s = -1;
|
|
source->tis.sourceData = 0;
|
|
#ifdef WIN32
|
|
WSACleanup();
|
|
#endif
|
|
}
|
|
|
|
|
|
int HTTPGetByte( HTTPInputSource *source )
|
|
{
|
|
if (source->nextUnGotBytePos)
|
|
return source->unGetBuffer[ --source->nextUnGotBytePos ];
|
|
if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize)
|
|
{
|
|
fillBuffer( source );
|
|
}
|
|
if (0 == source->nBufSize)
|
|
return EndOfStream;
|
|
return source->buffer[ source->nextBytePos++ ];
|
|
}
|
|
|
|
void HTTPUngetByte( HTTPInputSource *source, uint byteValue )
|
|
{
|
|
if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */
|
|
source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue;
|
|
}
|
|
|
|
Bool HTTPIsEOF( HTTPInputSource *source )
|
|
{
|
|
if (source->nextUnGotBytePos)
|
|
/* pending ungot bytes, not done */
|
|
return no;
|
|
|
|
if ( 0 != source->nBufSize
|
|
&& source->nextBytePos >= source->nBufSize)
|
|
/* We've consumed the existing buffer, get another */
|
|
fillBuffer( source );
|
|
|
|
if (source->nextBytePos < source->nBufSize)
|
|
/* we have stuff in the buffer, must not be done. */
|
|
return no;
|
|
|
|
/* Nothing in the buffer, and the last receive failed, must be done. */
|
|
return yes;
|
|
}
|
|
|