Last time I blogged about an O(n log n) solution to finding the longest duplicated substring in a given piece of text; I have since found an O(n) algorithm, which I linked to in the comments.

But my blog post used an O(n2) algorithm to read the text from STDIN! It looked something like this:

while (!done) {
grab 2 KB of text
allocate a new buffer which is 2 KB bigger
copy the old text and the new text together into the new buffer
free the old buffer
}

There are two better algorithms:

while (!done) {
grab an amount of text equal to the amount we've grabbed so far
allocate a new buffer which is twice as large as the last buffer
copy the old text and the new text together into the new buffer
free the old buffer
}

And:

while (!done) {
grab 2 KB of text
add this to the end of a linked list of text chunks
}

allocate a buffer whose size is the total size of all the chunks added together
walk the linked list and copy the text of each chunk into the buffer

Both "better" algorithms are O(n) but the latter wastes less space.

Here's the improved code:

struct Chunk {
WCHAR text[1024];
Chunk *next;

Chunk() : next(nullptr) { text[0] = L'\0'; }
};

class DeleteChunksOnExit {
public:
DeleteChunksOnExit() : m_p(nullptr) {}
~DeleteChunksOnExit() {
Chunk *here = m_p;
while (here) {
Chunk *next = here->next;
delete here;
here = next;
}
}
void Set(Chunk *p) { m_p = p; }

private:
Chunk *m_p;
};

...

Chunk *tail = nullptr;

DeleteChunksOnExit dcoe;

size_t total_length = 0;

bool done = false;
while (!done) {
Chunk *buffer = new Chunk();
if (nullptr == buffer) {
LOG(L"Could not allocate memory for buffer");
return nullptr;
}

// this runs on the first pass only
tail = buffer;
dcoe.Set(buffer);
} else {
tail->next = buffer;
tail = buffer;
}

if (fgetws(buffer->text, ARRAYSIZE(buffer->text), stdin)) {
total_length += wcslen(buffer->text);
} else if (feof(stdin)) {
done = true;
} else {
return nullptr;
}
}

// gather all the allocations into a single string
size_t size = total_length + 1;
WCHAR *text = new WCHAR[size];
if (nullptr == text) {
LOG(L"Could not allocate memory for text");
return nullptr;
}
DeleteArrayOnExit<WCHAR> deleteText(text);

WCHAR *temp = text;
for (Chunk *here = head; here; here = here->next) {
if (wcscpy_s(temp, size, here->text)) {
LOG(L"wcscpy_s returned an error");
return nullptr;
}

size_t len = wcslen(here->text);
temp += len;
size -= len;
}

deleteText.Cancel();
return text;
}