A user recently reported on the microsoft.public.inetserver.iis NNTP newsgroup seeing MSNBot causing lots of 406s while indexing static files (like HTML) on his IIS web server. Alas, there is no built in feature in IIS to take a snapshot of a request which match certain criteria (in this case, a request whose User-Agent indicates MSNBot, and we want all the request headers, especially the Accept: header). So, it motivated me to whip together the following ISAPI Filter to do the job.
I've been having this problem with the MSNbot for several months.
Most requests that it sneds end up with IIS sending back a 406 error.
I've read that a 406 is usually caused by the request headers missing an accept-type header.
Is there anything I can do on IIS v5 (Win2K) to correct this problem with these 406 errors? I think it's really hurting my MSN search engine placement
Unfortunately, there is no built-in functionality in IIS that can take a snapshot of the request given some criteria. However, IIS extensibility allows this functionality to be added, as the following ISAPI Filter illustrates.
After installing this filter (either globally to apply to all requests [restart IIS required] or per-site [no restart required]), this filter basically:
In addition to the "usual" sorts of ISAPI Filter behavior, this filter also illustrates a correct way to do something as common as logging while accounting for one of the potential breaking changes in ISAPI Filter behavior from IIS5 to IIS6: ISAPI Filters are no longer singletons on the system.
As always, an ISAPI Filter must log data to a single shared resource (log file) in a thread-safe manner. However, an ISAPI Filter on IIS6 must also be aware that in IIS6 Worker Process Isolation Mode, multiple w3wp.exe can simultaneously run the ISAPI Filter DLL - thus, any process-wide synchronization mechanisms need to be scaled across processes.
Enjoy.
//David
#include <windows.h> #include <httpfilt.h> #include <stdio.h> #define DEFAULT_BUFFER_SIZE 1024 #define MAX_BUFFER_SIZE 4096 #define LOG_FILENAME "C:\\inetpub\\wwwroot\\LogFile" HANDLE g_hLogFile = INVALID_HANDLE_VALUE; CRITICAL_SECTION g_LogFileLock; DWORD OnPreprocHeaders( HTTP_FILTER_CONTEXT * pfc, HTTP_FILTER_PREPROC_HEADERS * pPPH ); BOOL WINAPI GetFilterVersion( HTTP_FILTER_VERSION * pVer ) /*++ Purpose: Required entry point for ISAPI filters. This function is called when the server initially loads this DLL. Arguments: pVer - Points to the filter version info structure Returns: TRUE on successful initialization FALSE on initialization failure --*/ { CHAR szLogFile[ MAX_PATH ]; // // Initialize the logging critical section // InitializeCriticalSection( &g_LogFileLock ); // // TODO: Choose a location where IIS process identity can write to // sprintf( szLogFile, "%s.%d.log", LOG_FILENAME, GetCurrentProcessId() ); // // Initialize the logfile name // g_hLogFile = CreateFile( szLogFile, GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); if ( g_hLogFile == INVALID_HANDLE_VALUE ) { // // Failed to open log file due to GetLastError(). Bail. // return FALSE; } pVer->dwFilterVersion = HTTP_FILTER_REVISION; lstrcpyn( pVer->lpszFilterDesc, "ISAPI Filter to log request headers based on User-Agent", SF_MAX_FILTER_DESC_LEN ); pVer->dwFlags = SF_NOTIFY_ORDER_HIGH | SF_NOTIFY_PREPROC_HEADERS ; return TRUE; } DWORD WINAPI HttpFilterProc( IN HTTP_FILTER_CONTEXT * pfc, DWORD dwNotificationType, LPVOID pvNotification ) /*++ Purpose: Required filter notification entry point. This function is called whenever one of the events (as registered in GetFilterVersion) occurs. Arguments: pfc - A pointer to the filter context for this notification NotificationType - The type of notification pvNotification - A pointer to the notification data Returns: One of the following valid filter return codes: - SF_STATUS_REQ_FINISHED - SF_STATUS_REQ_FINISHED_KEEP_CONN - SF_STATUS_REQ_NEXT_NOTIFICATION - SF_STATUS_REQ_HANDLED_NOTIFICATION - SF_STATUS_REQ_ERROR - SF_STATUS_REQ_READ_NEXT --*/ { switch ( dwNotificationType ) { case SF_NOTIFY_PREPROC_HEADERS: return OnPreprocHeaders( pfc, (HTTP_FILTER_PREPROC_HEADERS *) pvNotification ); } return SF_STATUS_REQ_NEXT_NOTIFICATION; } DWORD OnPreprocHeaders( HTTP_FILTER_CONTEXT * pfc, HTTP_FILTER_PREPROC_HEADERS * pPPH ) { DWORD dwRet = SF_STATUS_REQ_NEXT_NOTIFICATION; BOOL fRet = FALSE; CHAR pBuf[ DEFAULT_BUFFER_SIZE ]; CHAR pBuf2[ DEFAULT_BUFFER_SIZE ]; CHAR * pszBuf = pBuf; CHAR * pszBuf2 = pBuf2; DWORD cbBuf = DEFAULT_BUFFER_SIZE; DWORD cbBuf2 = DEFAULT_BUFFER_SIZE; CHAR szUserAgent[] = "User-Agent:"; CHAR szUrl[] = "url"; CHAR szMSNBot[] = "msnbot/"; CHAR cbMSNBot = 7; SetLastError( NO_ERROR ); if ( pfc == NULL || pPPH == NULL ) { SetLastError( ERROR_INVALID_PARAMETER ); goto Finished; } fRet = pPPH->GetHeader( pfc, szUserAgent, pszBuf, &cbBuf ); if ( fRet == FALSE ) { if ( GetLastError() == ERROR_INVALID_INDEX ) { // // User-Agent was not given // Since we are looking for a particular user-agent, // not finding on is ok. // OutputDebugString( "Nothing to do.\n" ); SetLastError( NO_ERROR ); goto Finished; } else if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER && cbBuf < MAX_BUFFER_SIZE ) { pszBuf = new CHAR[ cbBuf ]; if ( pszBuf == NULL ) { SetLastError( ERROR_NOT_ENOUGH_MEMORY ); goto Finished; } fRet = pPPH->GetHeader( pfc, szUserAgent, pszBuf, &cbBuf ); if ( fRet == FALSE ) { goto Finished; } OutputDebugString( "Found User-Agent header.\n" ); } else { goto Finished; } } // // At this point, pszBuf points to the value of User-Agent header. // Figure out what to do. // OutputDebugString( pszBuf ); OutputDebugString( "\n" ); // // If the User-Agent matches the MSNBot, log request headers // if ( _strnicmp( pszBuf, szMSNBot, cbMSNBot ) == 0 ) { fRet = pfc->GetServerVariable( pfc, "ALL_RAW", pszBuf2, &cbBuf2 ); if ( fRet == FALSE ) { if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER && cbBuf2 < MAX_BUFFER_SIZE ) { pszBuf2 = new CHAR[ cbBuf2 ]; if ( pszBuf2 == NULL ) { SetLastError( ERROR_NOT_ENOUGH_MEMORY ); goto Finished; } fRet = pfc->GetServerVariable( pfc, "ALL_RAW", pszBuf2, &cbBuf2 ); if ( fRet == FALSE ) { goto Finished; } OutputDebugString( "Read ALL_RAW\n" ); } else { goto Finished; } } // // At this point, pszBuf2 points to the raw request headers // But first, retrieve the URL of this request by reusing pszBuf. // Then append it to disk, thread-safe. // if ( pszBuf != pBuf ) { delete pszBuf; pszBuf = pBuf; } fRet = pPPH->GetHeader( pfc, szUrl, pszBuf, &cbBuf ); if ( fRet == FALSE ) { if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER && cbBuf < MAX_BUFFER_SIZE ) { pszBuf = new CHAR[ cbBuf ]; if ( pszBuf == NULL ) { SetLastError( ERROR_NOT_ENOUGH_MEMORY ); goto Finished; } fRet = pPPH->GetHeader( pfc, szUrl, pszBuf, &cbBuf ); if ( fRet == FALSE ) { goto Finished; } OutputDebugString( "Found URL.\n" ); } else { goto Finished; } } // // At this point, pszBuf points to the raw URL... // OutputDebugString( "URL = " ); OutputDebugString( pszBuf ); OutputDebugString( "\n" ); OutputDebugString( "ALL_RAW =\n" ); OutputDebugString( pszBuf2 ); // // Write the URL and raw headers to disk // if ( g_hLogFile != INVALID_HANDLE_VALUE ) { EnterCriticalSection( &g_LogFileLock ); SetFilePointer( g_hLogFile, 0, NULL, FILE_END ); cbBuf--; fRet = WriteFile( g_hLogFile, pszBuf, cbBuf, &cbBuf, NULL ); cbBuf = 1; fRet = WriteFile( g_hLogFile, "\n", cbBuf, &cbBuf, NULL ); cbBuf2--; fRet = WriteFile( g_hLogFile, pszBuf2, cbBuf2, &cbBuf2, NULL ); LeaveCriticalSection( &g_LogFileLock ); } } SetLastError( NO_ERROR ); Finished: if ( pszBuf != pBuf ) { delete pszBuf; } if ( GetLastError() != NO_ERROR ) { OutputDebugString( "Error!\n" ); dwRet = SF_STATUS_REQ_ERROR; } return dwRet; } BOOL WINAPI TerminateFilter( DWORD dwFlags ) /*++ Optional filter entry point. This function is called by the server before this DLL is unloaded. Arguments: dwFlags - No flags have been defined at this time Returns: Always returns TRUE; --*/ { if ( g_hLogFile != INVALID_HANDLE_VALUE ) { CloseHandle( g_hLogFile ); g_hLogFile = INVALID_HANDLE_VALUE; } DeleteCriticalSection( &g_LogFileLock ); return TRUE; }