December, 2011

  • The Old New Thing

    2011 year-end link clearance

    • 13 Comments

    Another round of the semi-annual link clearance.

    And, as always, the obligatory plug for my column in TechNet Magazine:

  • The Old New Thing

    Using the MNS_DRAGDROP style: Menu rearrangement

    • 5 Comments

    In order to do drag-drop rearrangement of menus, you need four things, most of which we already know how to do.

    1. Dragging an item out of a menu. Check.
    2. Dropping an item into a menu. Check.
    3. Connecting the drag with the drop.
    4. Rearranging menu items in response to the operation.

    Let's do step 4 first, just to mix things up. And since this is just a demonstration rather than production code, I'm only going to support string menu items of up to 255 characters in length.

    BOOL MoveMenuItem(HMENU hmenu, UINT uPosFrom, UINT uPosInsertAfter)
    {
     BOOL fRc = FALSE;
     TCHAR sz[256];
     if (GetMenuString(hmenu, uPosFrom, sz, 256, MF_BYPOSITION) &&
         InsertMenu(hmenu, uPosInsertAfter, MF_BYPOSITION,
                    GetMenuItemID(hmenu, uPosFrom), sz)) {
      if (uPosFrom > uPosInsertAfter) uPosFrom++;
      fRc = DeleteMenu(hmenu, uPosFrom, MF_BYPOSITION);
     }
     return fRc;
    }
    

    One thing you might not have noticed is that I inserted the copy before deleting the original. That way, we don't get stuck in the situation where we deleted the original, then the reinsertion fails, and now we've lost the menu item. (We can still get stuck if the deletion of the original fails, but the hope is that that is much more unlikely than the failure of an insertion.)

    Okay, the next part is connecting the drag with the drop. To do that, I'll need some helper COM objects. But first, I'm going to introduce something that I should have introduced earlier: Objects that do nothing! (Just like our scratch program, they start out doing nothing, and then we'll modify them to do something.)

    // dummy data object
    class CEmptyDataObject : public IDataObject
    {
    public:
      // IUnknown
      STDMETHODIMP QueryInterface(REFIID riid, void **ppvObj)
      {
        IUnknown *punk = NULL;
        if (riid == IID_IUnknown) {
          punk = static_cast<IUnknown*>(this);
        } else if (riid == IID_IDataObject) {
          punk = static_cast<IDataObject*>(this);
        }
      
        *ppvObj = punk;
        if (punk) {
          punk->AddRef();
          return S_OK;
        } else {
          return E_NOINTERFACE;
        }
      }
    
      STDMETHODIMP_(ULONG) AddRef()
      {
        return ++m_cRef;
      }
      STDMETHODIMP_(ULONG) Release()
      {
        ULONG cRef = --m_cRef;
        if (cRef == 0) delete this;
        return cRef;
      }
    
      // IDataObject
      STDMETHODIMP GetData(FORMATETC *pfe, STGMEDIUM *pmed)
      {
        ZeroMemory(pmed, sizeof(*pmed));
        return DV_E_FORMATETC;
      }
      STDMETHODIMP GetDataHere(FORMATETC *pfe, STGMEDIUM *pmed)
      {
        return E_NOTIMPL;
      }
      STDMETHODIMP QueryGetData(FORMATETC *pfe)
      {
        return DV_E_FORMATETC;
      }
      STDMETHODIMP GetCanonicalFormatEtc(FORMATETC *pfeIn,
                                         FORMATETC *pfeOut)
      {
        *pfeOut = *pfeIn;
        pfeOut->ptd = NULL;
        return DATA_S_SAMEFORMATETC;
      }
      STDMETHODIMP SetData(FORMATETC *pfe, STGMEDIUM *pmed,
                           BOOL fRelease)
      {
        return E_NOTIMPL;
      }
      STDMETHODIMP EnumFormatEtc(DWORD dwDirection,
                                 LPENUMFORMATETC *ppefe)
      {
      *ppefe = NULL;
      return E_NOTIMPL;
      }
      STDMETHODIMP DAdvise(FORMATETC *pfe, DWORD grfAdv,
                        IAdviseSink *pAdvSink, DWORD *pdwConnection)
      {
        *pdwConnection = 0;
        return OLE_E_ADVISENOTSUPPORTED;
      }
      STDMETHODIMP DUnadvise(DWORD dwConnection)
      {
        return OLE_E_ADVISENOTSUPPORTED;
      }
      STDMETHODIMP EnumDAdvise(LPENUMSTATDATA *ppefe)
      {
        *ppefe = NULL;
        return OLE_E_ADVISENOTSUPPORTED;
      }
    
      CEmptyDataObject() : m_cRef(1) { }
      virtual ~CEmptyDataObject() { }
    
    private:
      ULONG m_cRef;
    };
    

    The CEmpty­Data­Object is simply a data object that contains no data. And here's an equally uninteresting CEmpty­Drop­Target:

    class CEmptyDropTarget : public IDropTarget
    {
    public:
      // IUnknown
      STDMETHODIMP QueryInterface(REFIID riid, void **ppvObj)
      {
        IUnknown *punk = NULL;
        if (riid == IID_IUnknown) {
          punk = static_cast<IUnknown*>(this);
        } else if (riid == IID_IDropTarget) {
          punk = static_cast<IDropTarget*>(this);
        }
      
        *ppvObj = punk;
        if (punk) {
          punk->AddRef();
          return S_OK;
        } else {
          return E_NOINTERFACE;
        }
      }
      STDMETHODIMP_(ULONG) AddRef()
      {
        return ++m_cRef;
      }
      STDMETHODIMP_(ULONG) Release()
      {
        ULONG cRef = --m_cRef;
        if (cRef == 0) delete this;
        return cRef;
      }
    
      // IDropTarget
      STDMETHODIMP DragEnter(IDataObject *pdto, DWORD grfKeyState,
                             POINTL pt, DWORD *pdwEffect)
      {
        *pdwEffect = DROPEFFECT_NONE;
        return E_NOTIMPL;
      }
      STDMETHODIMP DragOver(DWORD grfKeyState, POINTL pt, DWORD *pdwEffect)
      {
        *pdwEffect = DROPEFFECT_NONE;
        return E_NOTIMPL;
      }
      STDMETHODIMP DragLeave()
      {
        return E_NOTIMPL;
      }
      STDMETHODIMP Drop(IDataObject *pdto, DWORD grfKeyState,
                        POINTL pt, DWORD *pdwEffect)
      {
        *pdwEffect = DROPEFFECT_NONE;
        return E_NOTIMPL;
      }
    
      CEmptyDropTarget() : m_cRef(1) { }
      virtual ~CEmptyDropTarget() { }
    
    private:
      ULONG m_cRef;
    };
    

    Okay, now back to item 3: Connecting the drag with the drop. Your initial reaction might be to create a new clipboard format called, say, Dragged­Menu­Item which takes the form of a TYMED_HGLOBAL consisting of a struct like

    struct MENUANDITEM
    {
     HMENU hmenu;
     UINT uItem;
    };
    

    But once you do that, you already have a problem: What happens if this item is dragged out of a 32-bit process and dropped into a 64-bit process? The size of HMENU is different between the two processes, so the 32-bit and 64-bit MENU­AND­ITEM structures are not compatible. This is an example of how you need to be aware of inter-process communications scenarios when developing persistence formats. In this case, we are passing a pointer-sized object between processes. Although most people think of a persistence format as a file format, here's a case where a persistence format takes the form of an in-memory storage format.

    You might decide to solve this problem by tweaking the structure to accommodate 32-bit and 64-bit Windows:

    struct MENUANDITEM
    {
     __int64 i64Menu;
     UINT uItem;
    
     void SetMenu(HMENU hmenu) { i64Menu = (INT_PTR)hmenu; }
     HMENU GetMenu() const { return (HMENU)(INT_PTR)i64Menu; }
    };
    

    But there's an easier way out: Since we only want to support drag/drop menu editing from within the same menu (we don't care about dragging an item from one menu to another menu), the drag source and drop target reside in the same process, so all we need to do is verify the data object's identity, and if it's our data object, we can consult side data to determine what is being dragged.

    Okay, so let's start with a fresh scratch program, and paste in the following:

    Okay, enough shopping. Now to teach our drop target how to recognize that the data object being dropped on it is our own:

    class CMenuDataObject : public CEmptyDataObject
    {
    public:
      CMenuDataObject(HMENU hmenu, UINT uPos)
        : m_hmenu(hmenu), m_uPos(uPos) { }
    
    public:
      const HMENU m_hmenu;
      const UINT m_uPos;
    };
    
    CMenuDataObject *g_pdtoDrag;
    

    Our special data object when dragging a menu item merely carries around the menu and item so we can find it later. The magical bit is that we also keep track of the object being dragged. (Exercise: Since this is a demo program, the object is just a global variable. What is the correct way of keeping track of g_pdtoDrag?)

    Now we get to teach our drop target to recognize CMenu­Data­Object and only CMenu­Data­Object:

    class CMenuDropTarget : public CEmptyDropTarget
    {
    public:
      // IDropTarget
      STDMETHODIMP DragEnter(IDataObject *pdto, DWORD grfKeyState,
                             POINTL pt, DWORD *pdwEffect);
      STDMETHODIMP DragOver(DWORD grfKeyState, POINTL pt, DWORD *pdwEffect);
      STDMETHODIMP DragLeave();
      STDMETHODIMP Drop(IDataObject *pdto, DWORD grfKeyState,
                        POINTL pt, DWORD *pdwEffect);
    
      CMenuDropTarget(HMENU hmenu, UINT uPos)
        : m_hmenu(hmenu), m_uPos(uPos), m_uPosDrag(uPosNone) { }
      void Reset() { m_uPosDrag = uPosNone; }
    
    private:
      static const UINT uPosNone = 0xFFFFFFFF;
    private:
      HMENU m_hmenu;   // menu being dropped on
      UINT m_uPos;     // menu item being dropped on
      UINT m_uPosDrag; // menu item being dragged, if from the same menu
                       // else uPosNone
    };
    
    HRESULT CMenuDropTarget::DragEnter(
        IDataObject *pdto, DWORD grfKeyState, POINTL pt, DWORD *pdwEffect)
    {
      Reset();
      IUnknown *punk;
      if (SUCCEEDED(pdto->QueryInterface(IID_PPV_ARGS(&punk)))) {
        punk->Release();
      }
      if (punk == g_pdtoDrag && g_pdtoDrag->m_hmenu == m_hmenu) {
        m_uPosDrag = g_pdtoDrag->m_uPos;
      } else {
        m_uPosDrag = uPosNone;
      }
      return DragOver(grfKeyState, pt, pdwEffect);
    }
    

    The job of CMenu­Drop­Target::­Drag­Enter is to determine whether the item being dragged is a menu item from the same menu. We detect that the object being dragged is g_pdtoDrag by first querying for the canonical unknown, to remove any layers of wrapping COM may have placed around the object. We compare this against g_pdtoDrag, which is a bit of a cheat; more properly we should call g_pdtoDrag->Query­Interface to get the canonical unknown for g_pdtoDrag, but we can cheat because we know that CMenu­Data­Object is singly-derived from IUnknown and that it does not support aggregation (and therefore it is its own canonical unknown). (Exercise: Why is it okay to use punk after releasing it?)

    Anyway, if the item is confirmed to be our item after all, then we copy the menu item position so we can move it on the drop.

    HRESULT CMenuDropTarget::DragOver(
        DWORD grfKeyState, POINTL pt, DWORD *pdwEffect)
    {
      if (m_uPosDrag == uPosNone) {
        *pdwEffect = DROPEFFECT_NONE;
      } else {
        *pdwEffect &= DROPEFFECT_MOVE;
      }
      return S_OK;
    }
    
    HRESULT CMenuDropTarget::DragLeave()
    {
      Reset();
      return S_OK;
    }
    

    The Drag­Over and Drag­Leave methods are largely uninteresting. Drag­Over just gives appropriate feedback, and Drag­Leave forgets about the data object that is no longer being dragged over us. The real excitement is in the Drop method.

    HRESULT CMenuDropTarget::Drop(
        IDataObject *pdto, DWORD grfKeyState, POINTL pt, DWORD *pdwEffect)
    {
      DragEnter(pdto, grfKeyState, pt, pdwEffect);
      if (*pdwEffect & DROPEFFECT_MOVE) {
         MoveMenuItem(m_hmenu, m_uPosDrag, m_uPos);
      }
      return S_OK;
    }
    

    When the drop happens, we move the menu item. Kind of anticlimactic, isn't it.

    Okay, at this point the WM_MENU­DRAG and WM_MENU­GET­OBJECT handlers are old hat:

    LRESULT OnMenuDrag(HWND hwnd, UINT uPos, HMENU hmenu)
    {
     LRESULT lres = MND_CONTINUE;
     if (g_pdtoDrag == NULL && hmenu == GetSubMenu(GetMenu(hwnd), 0)) {
      g_pdtoDrag = new(std::nothrow) CMenuDataObject(hmenu, uPos);
      if (g_pdtoDrag) {
       IDropSource *pds = new(std::nothrow) CDropSource();
       if (pds) {
        DWORD dwEffect;
        DoDragDrop(g_pdtoDrag, pds, DROPEFFECT_MOVE, &dwEffect);
        pds->Release();
       }
       g_pdtoDrag->Release();
       g_pdtoDrag = NULL;
      }
     }
     return lres;
    }
    
    LRESULT OnMenuGetObject(HWND hwnd, MENUGETOBJECTINFO *pmgoi)
    {
     HRESULT hr = E_NOTIMPL;
     if (pmgoi->hmenu == GetSubMenu(GetMenu(hwnd), 0) &&
         (pmgoi->dwFlags & (MNGOF_BOTTOMGAP | MNGOF_TOPGAP))) {
      IDropTarget *pdt = new(std::nothrow)
          CMenuDropTarget(pmgoi->hmenu, pmgoi->uPos);
      if (pdt) {
       hr = pdt->QueryInterface(*(IID*)pmgoi->riid, &pmgoi->pvObj);
       pdt->Release();
      }
     }
     return SUCCEEDED(hr) ? MNGO_NOERROR : MNGO_NOINTERFACE;
    }
    
        HANDLE_MSG(hwnd, WM_MENUDRAG, OnMenuDrag);
        HANDLE_MSG(hwnd, WM_MENUGETOBJECT, OnMenuGetObject);
    
    // and change CoInitialize and CoUninitialize
    // to OleInitialize and OleUninitialize, respectively
    

    There is a tricky part in On­Menu­Get­Object, namely that we only return a drop target if the mouse is between items, because it is only when you are between items that you are actually rearranging.

    And there you have it, some menu drag/drop stuff. It was a lot of typing (mostly for those dummy objects), but not a lot of work.

  • The Old New Thing

    Using the MNS_DRAGDROP style: Dropping in

    • 8 Comments

    Last time, we looked at using the MNS_DRAG­DROP style for dragging items out of a menu. Today, we'll look at dropping them in.

    Take the program from last time and make the following additions. First, let's add a second item to the menu.

    // resource header file
    #define IDM_MAIN 1
    #define IDC_CLOCK 100
    #define IDC_WMP 101
    
    // resource file
    IDM_MAIN MENU PRELOAD
    BEGIN
        POPUP "&Test"
        BEGIN
            MENUITEM "&Clock", IDC_CLOCK
            MENUITEM "&WMP", IDC_WMP
        END
    END
    
    // scratch.cpp
    HRESULT GetMenuObject(HWND hwnd, HMENU hmenu, UINT uPos,
                          REFIID riid, void **ppvOut)
    {
     HRESULT hr = E_NOTIMPL;
     *ppvOut = NULL;
     if (hmenu == GetSubMenu(GetMenu(hwnd), 0)) {
      switch (GetMenuItemID(hmenu, uPos)) {
      case IDC_CLOCK:
       hr = GetUIObjectOfFile(hwnd, L"C:\\Windows\\clock.avi",
                                                 riid, ppvOut);
       break;
      case IDC_WMP:
       hr = GetUIObjectOfFile(hwnd, L"C:\\Program Files"
                      L"\\Windows Media Player\\wmplayer.exe",
                                                 riid, ppvOut);
       break;
      }
     }
     return hr;
    }
    

    Yes, I hard-coded another path. This is a demo, not production code.

    Anyway, it's time to hook up the WM_MENU­GET­OBJECT message:

    #define HANDLE_WM_MENUGETOBJECT(hwnd, wParam, lParam, fn) \
     (fn)((hwnd), (MENUGETOBJECTINFO*)(lParam))
    
    LRESULT OnMenuGetObject(HWND hwnd, MENUGETOBJECTINFO *pmgoi)
    {
     LRESULT lres = MNGO_NOINTERFACE;
     if (!(pmgoi->dwFlags & (MNGOF_BOTTOMGAP | MNGOF_TOPGAP)) &&
         SUCCEEDED(GetMenuObject(hwnd, pmgoi->hmenu, pmgoi->uPos,
                   *(IID*)pmgoi->riid, &pmgoi->pvObj))) {
      lres = MNGO_NOERROR;
     }
     return lres;
    }
        HANDLE_MSG(hwnd, WM_MENUGETOBJECT, OnMenuGetObject);
    

    To handle the WM_MENU­GET­OBJECT message, you convert the hmenu, uPos pair into a COM object, requesting the interface provided by the riid member, and putting the result into the pvObj member. (Exercise: Why is the riid member typed as void * rather than REFIID?)

    When the user tries to drop on a menu item, we just give them the corresponding object in the shell namespace. Notice that I filter out the GAP messages, since they indicate that the user is trying to drop between items rather than on them.

    Run this program, open the Test menu, and drag the Clock menu item onto the WMP menu item. If all goes well (assuming you changed the path for clock.avi to some other AVI file), the AVI file will be opened by Windows Media Player, since that's the behavior of Windows Media Player when you drop an AVI file on it.

    So that's menu drag/drop. It's really not all that exciting. Of course, what people tend to be most interested in is not generic drag/drop for menus but menu customization via drag/drop. That's not something that MNS_DRAG­DROP gives you directly; that's something you need to build yourself out of the building blocks provided.

    We'll snap some blocks together next time.

  • The Old New Thing

    Using the MNS_DRAGDROP style: Dragging out

    • 8 Comments

    Windows 2000 introduced the MNS_DRAG­DROP menu style, which permits drag/drop operations in a menu. Nobody uses this style, probably because it's totally undiscoverable by the end-user. But I'll write a sample program anyway.

    Mind you, I knew nothing about the MNS_DRAG­DROP menu style until I started writing this entry. But I simply read the documentation, which says that if you set this style, you will receive WM_MENU­DRAG and WM_MENU­GET­OBJECT messages. The WM_MENU­DRAG message is sent when the user drags a menu item, so let's go with that first. The documentation says that you get information about the item that was dragged, and then you return a code that specifies whether you want the menu to remain up or whether you want it torn down.

    Simple enough. Let's do it.

    Start with the scratch program, add the function Get­UI­Object­Of­File and the class CDrop­Source, and change the calls to Co­Initialize and Co­Uninitialize into Ole­Initialize and Ole­Uninitialize, respectively. Next, define the menu we're going to play with:

    // resource header file
    #define IDM_MAIN 1
    #define IDC_CLOCK 100
    
    // resource file
    IDM_MAIN MENU PRELOAD
    BEGIN
        POPUP "&Test"
        BEGIN
            MENUITEM "&Clock", IDC_CLOCK
        END
    END
    

    Now we can add some new code to our scratch program. First, we add a menu to our window and enable drag/drop on it:

    BOOL
    OnCreate(HWND hwnd, LPCREATESTRUCT lpcs)
    {
     MENUINFO mi = { sizeof(mi), MIM_STYLE, MNS_DRAGDROP };
     return SetMenuInfo(GetMenu(hwnd), &mi);
    }
    
    // InitApp
     // wc.lpszMenuName = NULL;
     wc.lpszMenuName = MAKEINTRESOURCE(IDM_MAIN);
    

    For both dragging and dropping, we need a way to obtain the COM object associated with a menu item, so I'll put them in this common helper function:

    HRESULT GetMenuObject(HWND hwnd, HMENU hmenu, UINT uPos,
                          REFIID riid, void **ppvOut)
    {
     HRESULT hr = E_NOTIMPL;
     *ppvOut = NULL;
     if (hmenu == GetSubMenu(GetMenu(hwnd), 0)) {
      switch (GetMenuItemID(hmenu, uPos)) {
      case IDC_CLOCK:
       hr = GetUIObjectOfFile(hwnd, L"C:\\Windows\\clock.avi",
                                                 riid, ppvOut);
       break;
      }
     }
     return hr;
    }
    

    If the menu is our "Test" popup menu, then we know how to map the menu items to COM objects. For now, we have only one item, namely Clock, which corresponds to the C:\Windows\clock.avi¹ file.

    Now we can hook up a handler to the WM_MENU­DRAG message:

    #define HANDLE_WM_MENUDRAG(hwnd, wParam, lParam, fn) \
     (fn)((hwnd), (UINT)(wParam), (HMENU)(lParam))
    
    LRESULT OnMenuDrag(HWND hwnd, UINT uPos, HMENU hmenu)
    {
     LRESULT lres = MND_CONTINUE;
     IDataObject *pdto;
     if (SUCCEEDED(GetMenuObject(hwnd, hmenu, uPos,
                                     IID_PPV_ARGS(&pdto)))) {
      IDropSource *pds = new(std::nothrow) CDropSource();
      if (pds) {
       DWORD dwEffect;
       if (DoDragDrop(pdto, pds, DROPEFFECT_COPY | DROPEFFECT_LINK,
                      &dwEffect) == DRAGDROP_S_DROP) {
        lres = MND_ENDMENU;
       }
       pds->Release();
      }
      pdto->Release();
     }
     return lres;
    }
    

    This function is where the magic happens, but it's really not all that magical. We get the data object for the menu item being dragged and tell OLE to do a drag/drop operation with it. Just to make things interesting, I'll say that the menu should be dismissed if the user dropped the object somewhere; otherwise, the menu remains on the screen.

    Finally, we hook up the message handler to our window procedure:

    HANDLE_MSG(hwnd, WM_MENUDRAG, OnMenuDrag);
    

    And there you have it. A program that calls up a menu with drag enabled. If you drag the item labeled Clock, then the drag/drop operation proceeds as if you were dragging the clock.avi file.

    Next time, we'll look at the drop half of drag and drop.

    Footnote

    ¹ I hard-coded the clock.avi file for old time's sake. Yes, I know the file is no longer included with Windows. That'll teach people to use hard-coded paths!

  • The Old New Thing

    Introducing the for-if anti-pattern

    • 40 Comments

    Over the years, I've seen a bunch of coding anti-patterns. I figured maybe I'll share a few.

    Today, I'll introduce what I'm calling the for-if anti-pattern, also known as "We'll sell you the whole seat, but you'll only need the edge." This is a special case of the for-case anti-pattern, where all but one of the cases is null.

    for (int i = 0; i < 100; i++) {
      if (i == 42) { do_something(i); }
    }
    

    This can naturally be simplified to

    do_something(42);
    

    The for-if anti-pattern arises in many forms. For example:

    foreach (string filename in Directory.GetFiles("."))
    {
        if (filename.Equals("desktop.ini", StringComparison.OrdinalIgnoreCase))
        {
            return new StreamReader(filename);
        }
    }
    

    This enumerates all the files in a directory looking for a specific one, and if it's found, it returns a stream on it. The slightly-less-crazy version would be

    if (File.Exists("desktop.ini"))
    {
        return new StreamReader("desktop.ini");
    }
    

    Note that both versions of the code fragment have the same race condition: If the file desktop.ini initially exists but gets deleted before you get around to creating a new Stream­Reader, you will get a File­Not­Found­Exception.

    One final example:

    foreach (object o in hashtable.Keys)
    {
        if (o == "target") return hashtable["target"];
    }
    

    Also known as

    return hashtable["target"];
    

    I bet these people hate going to the library to get a book by title, because it takes so darn long: They go up to the librarian and say, "Please give me all the books you have," and then they fill up their cart with thousands of books, then sit in the corner saying, "Nope, the title of this book is wrong. Nope, not that one either. Still the wrong title. How about this book? Nope, not this one either. Man, this is taking forever..."

  • The Old New Thing

    Celebrating the end of the gluttony season, but the effects linger

    • 11 Comments

    The Washington State Ferry system has reduced the rated carrying capacity of its fleet because people have gotten fatter: The average weight of an adult passenger has been officially revised from 160 pounds to 185 pounds. (That's from 11 stone 6 to 13 stone 3 in the UK, or from 73kg to 84kg for the rest of the world.)

    This has happened before: In 1999, the rated capacity of Washington State ferries dropped when the previous method for determining seating density was abandoned due to passengers' big butts. (I recall that The Seattle Times printed a ruler next to the article so that for readers could assess their own situation.)

    In other news: Many hospitals, such as our own University of Washington Medical Center, have installed ceiling lifts in all their rooms due to the high number of obese patients.

    (The gluttony seasons begins on Hallowe'en and ends on Christmas, as officially declared by me.)

  • The Old New Thing

    Why is the file size reported incorrectly for files that are still being written to?

    • 28 Comments

    The shell team often gets questions like these from customers:

    Attached please find a sample program which continuously writes data to a file. If you open the folder containing the file in Explorer, you can see that the file size is reported as zero. Even manually refreshing the Explorer window does not update the file size. Even the dir command shows the file size as zero. On the other hand, calling Get­File­Size reports the correct file size. If I close the file handle, then Explorer and the dir command both report the correct file size. We can observe this behavior on Windows Server 2008 R2, but on Windows Server 2003, the file sizes are updated in both Explorer and dir. Can anybody explain what is happening?
    We have observed that Windows gives the wrong file size for files being written. We have a log file that our service writes to, and we like to monitor the size of the file by watching it in Explorer, but the file size always reports as zero. Even the dir command reports the file size as zero. Only when we stop the service does the log file size get reported correctly. How can we get the file size reported properly?
    We have a program that generates a large number of files in the current directory. When we view the directory in Explorer, we can watch the files as they are generated, but the file size of the last file is always reported as zero. Why is that?

    Note that this is not even a shell issue. It's a file system issue, as evidenced by the fact that a dir command exhibits the same behavior.

    Back in the days of FAT, all the file metadata was stored in the directory entry.

    The designers of NTFS had to decide where to store their metadata. If they chose to do things the UNIX way, the directory entry would just be a name and a reference to the file metadata (known in UNIX-land as an inode). The problem with this approach is that every directory listing would require seeking all over the disk to collect the metadata to report for each file. This would have made NTFS slower than FAT at listing the contents of a directory, a rather embarrassing situation.

    Okay, so some nonzero amount of metadata needs to go into the directory entry. But NTFS supports hard links, which complicates matters since a file with multiple hard links has multiple directory entries. If the directory entries disagree, who's to say which one is right? One way out would be try very hard to keep all the directory entries in sync and to make the chkdsk program arbitrary choose one of the directory entries as the "correct" one in the case a conflict is discovered. But this also means that if a file has a thousand hard links, then changing the file size would entail updating a thousand directory entries.

    That's where the NTFS folks decided to draw the line.

    In NTFS, file system metadata is a property not of the directory entry but rather of the file, with some of the metadata replicated into the directory entry as a tweak to improve directory enumeration performance. Functions like Find­First­File report the directory entry, and by putting the metadata that FAT users were accustomed to getting "for free", they could avoid being slower than FAT for directory listings. The directory-enumeration functions report the last-updated metadata, which may not correspond to the actual metadata if the directory entry is stale.

    The next question is where and how often this metadata replication is done; in other words, how stale is this data allowed to get? To avoid having to update a potentially unbounded number of directory entries each time a file's metadata changed, the NTFS folks decided that the replication would be performed only from the file into the directory entry that was used to open the file. This means that if a file has a thousand hard links, a change to the file size would be reflected in the directory entry that was used to open the file, but the other 999 directory entries would contain stale data.

    As for how often, the answer is a little more complicated. Starting in Windows Vista (and its corresponding Windows Server version which I don't know but I'm sure you can look up, and by "you" I mean "Yuhong Bao"), the NTFS file system performs this courtesy replication when the last handle to a file object is closed. Earlier versions of NTFS replicated the data while the file was open whenever the cache was flushed, which meant that it happened every so often according to an unpredictable schedule. The result of this change is that the directory entry now gets updated less frequently, and therefore the last-updated file size is more out-of-date than it already was.

    Note that even with the old behavior, the file size was still out of date (albeit not as out of date as it is now), so any correctly-written program already had to accept the possibility that the actual file size differs from the size reported by Find­First­File. The change to suppress the "bonus courtesy updates" was made for performance reasons. Obviously, updating the directory entries results in additional I/O (and forces a disk head seek), so it's an expensive operation for relatively little benefit.

    If you really need the actual file size right now, you can do what the first customer did and call Get­File­Size. That function operates on the actual file and not on the directory entry, so it gets the real information and not the shadow copy. Mind you, if the file is being continuously written-to, then the value you get is already wrong the moment you receive it.

    Why doesn't Explorer do the Get­File­Size thing when it enumerates the contents of a directory so it always reports the accurate file size? Well, for one thing, it would be kind of presumptuous of Explorer to second-guess the file system. "Oh, gosh, maybe the file system is lying to me. Let me go and verify this information via a slower alternate mechanism." Now you've created this environment of distrust. Why stop there? Why not also verify file contents? "Okay, I read the first byte of the file and it returned 0x42, but I'm not so sure the file system isn't trying to trick me, so after reading that byte, I will open the volume in raw mode, traverse the file system data structures, and find the first byte of the file myself, and if it isn't 0x42, then somebody's gonna have some explaining to do!" If the file system wants to lie to us, then let the file system lie to us.

    All this verification takes an operation that could be done in 2 + N/500 I/O operations and slows it down to 2 + N/500 + 3N operations. And you're reintroduced all the disk seeking that all the work was intended to avoid! (And if this is being done over the network, you can definitely feel a 1500× slowdown.) Congratulations, you made NTFS slower than FAT. I hope you're satisfied now.

    If you were paying close attention, you'd have noticed that I wrote that the information is propagated into the directory when the last handle to the file object is closed. If you call Create­File twice on the same file, that creates two file objects which refer to the same underlying file. You can therefore trigger the update of the directory entry from another program by simply opening the file and then closing it.

    void UpdateFileDirectoryEntry(__in PCWSTR pszFileName)
    {
        HANDLE h = CreateFileW(
            pszFileName,
            0,                  // don't require any access at all
            FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
            NULL,               // lpSecurityAttributes
            OPEN_EXISTING,
            0,                  // dwFlagsAndAttributes
            NULL);              // hTemplateFile
        if (h != INVALID_HANDLE_VALUE) {
            CloseHandle(h);
        }
    }
    

    You can even trigger the update from the program itself. You might call a function like this every so often from the program generating the output file:

    void UpdateFileDirectoryEntry(__in HANDLE hFile)
    {
        HANDLE h = ReOpenFile(
            hFile,
            0,                  // don't require any access at all
            FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
            0);                 // dwFlags
        if (h != INVALID_HANDLE_VALUE) {
            CloseHandle(h);
        }
    }
    

    If you want to update all file directory entries (rather than a specific one), you can build the loop yourself:

    // functions ProcessOneName and EnumerateAllNames
    // incorporated by reference.
    
    void UpdateAllFileDirectoryEntries(__in PCWSTR pszFileName)
    {
        EnumerateAllNames(pszFileName, UpdateFileDirectoryEntry);
    }
    

    Armed with this information, you can now give a fuller explanation of why Read­Directory­ChangesW does not report changes to a file until the handle is closed. (And why it's not a bug in Read­Directory­ChangesW.)

    Bonus chatter: Mind you, the file system could expose a flag to a Find­First­File-like function that means "Accuracy is more important than performance; return data that is as up-to-date as possible." The NTFS folks tell me that implementing such a flag wouldn't be all that hard. The real question is whether anybody would bother to use it. (If not, then it's a bunch of work for no benefit.)

    Bonus puzzle: A customer observed that whether the file size in the directory entry was being updated while the file was being written depended on what directory the file was created in. Come up with a possible explanation for this observation.

    Bonus reading:

  • The Old New Thing

    How do I get the full path for the target of a shortcut file?

    • 16 Comments

    A customer was having trouble obtaining information from a shortcut file. "Here is a sample program that tries to print the target of a shortcut file, but it only gets the file name without a directory. How do I get the full path?"

     IShellLink *psl;
     ... code that loads the IShellLink omitted ...
     TCHAR szPath[MAX_PATH];
     WIN32_FIND_DATA wfd;
     hr = psl->GetPath(szPath, MAX_PATH, &wfd, SLGP_UNCPATH);
     if (SUCCEEDED(hr)) {
      _tprintf(TEXT("Got path: %s\n"), wfd.cFileName);
     }
    

    Recall that the WIN32_FIND_DATA structure contains only a file name in the cFile­Name member. It doesn't have any path information. The WIN32_FIND_DATA structure was originally created for the Find­First­File function, and you already know the directory you are searching in because you passed it to Find­First­File.

    But we're not using the WIN32_FIND_DATA structure in conjunction with Find­First­File, so where do I get the directory from?

    In the customer's excitement over the WIN32_FIND_DATA structure, they forgot about that other parameter: szPath.

     if (SUCCEEDED(hr)) {
      _tprintf(TEXT("Got path: %s\n"), szPath);
     }
    
    The answer was sitting right there in front of them, like an overlooked Christmas present.

    (Don't forget, the target of the shortcut might not be a file, in which case the call to Get­Path will return S_FALSE.)

  • The Old New Thing

    How do I determine programmatically whether a particular language is LTR or RTL?

    • 17 Comments

    Given an LCID, how does one determine whether the language lays out left-to-right or right-to-left? One suggestion was simply to hard-code the list of known right-to-left languages, and if the language isn't on the list, then assume that it is left-to-right. This technique is clearly fragile, because Windows adds support for new languages not infrequently, and if one of those is a right-to-left language, then your table is now out of date. And besides, there are languages whose layout is neither left-to-right nor right-to-left. For example, Chinese and Japanese traditionally lay out top-to-bottom.

    To obtain the text layout direction programmatically, call Get­Locale­Info­Ex with LOCALE_IREADINGLAYOUT.

    And just hope that nobody ever hands you a boustrophedonic language.

  • The Old New Thing

    Deftly solving compatibility problems by withholding information

    • 21 Comments

    One of the continuing compatibility problems that plagued Direct3D was the way it reported texture formats. Historically, the way an application checked which texture formats were available was by calling Enum­Z­Buffer­Formats and passing a callback function which is called once for each supported format. The application's callback made some sort of decision based on the information it received. The problem was that any time a new format was added, a bunch of programs ended up not working. Either the new format confused them, or the change in the order of the formats violated some assumption. For example, they may have assumed that if a video card supports R8G8B8 format, then it will always be the first one in the list.

    For a time, the compatibility strategy was to try to detect what flavor of Direct3D the application was designed for and manipulating the list of supported formats in a way to keep that application happy, say by ordering the formats in a particular way or suppressing some formats from the list.

    In Direct3D 8, a new direction was taken: Ask, don't tell.

    Instead of the application being told what formats are available, the application asks, "Do you support format X?" and Direct3D answers "Yes" or "No." This solves both types of problems: Applications never saw a format they didn't expect, because if they didn't expect it, they would never ask for it in the first place. And applications always saw the supported formats in the order they requested, because the application chose what order they asked for them.

    The main casualties of the new design were diagnostic programs which listed technical details of your video card. They no longer were able to get a list of all supported formats; instead, they had to have a table of all the formats and ask for them one at a time.

    There was one company that objected to this new design because they wanted their program to support all texture formats, even the ones that didn't exist at the time the program was written. This is just another variation of Sure, we do that. "Oh look, this video card supports pixel format 826. I'm going to use it! Just one question, though. What's pixel format 826?"

    They broke their problem into two parts and were asking for help with the first part, unaware that even if the managed to solve that part, they were stuck with the impossible second part!

Page 1 of 3 (25 items) 123