Improve this page Quickly fork, edit online, and submit a pull request for this page. Requires a signed-in GitHub account. This works well for small changes. If you'd like to make larger changes you may want to consider using local clone. Page wiki View or edit the community-maintained wiki page associated with this page.

Word Count

This program is the D version of the classic wc (wordcount) C program. It serves to demonstrate how to read files, slice arrays, and simple symbol table management with associative arrays. It has two main code paths, one for smaller files that will fit in memory, and one using buffered file I/O for files that are larger.

import std.stdio;
import std.stream;

int main (string[] args)
{
    int w_total;
    int l_total;
    ulong c_total;
    int[string] dictionary;

    writeln("   lines   words   bytes file");
    foreach (arg; args[1 .. args.length])
    {
        int w_cnt, l_cnt;
        bool inword;

        auto c_cnt = std.file.getSize(arg);
        if (c_cnt < 10_000_000)
        {
            size_t wstart;
            auto input = cast(string)std.file.read(arg);

            foreach (j, c; input)
            {
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        wstart = j;
                        inword = true;
                        ++w_cnt;
                    }
                }
                else if (inword)
                {
                    auto word = input[wstart .. j];

                    dictionary[word]++;
                    inword = false;
                }
            }
            if (inword)
            {
                auto w = input[wstart .. input.length];
                dictionary[w]++;
            }
        }
        else
        {
            auto f = new BufferedFile(arg);
            string buf;

            while (!f.eof())
            {
                char c;

                f.read(c);
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                    if (inword)
                    buf ~= c;
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        buf.length = 0;
                        buf ~= c;
                        inword = 1;
                        ++w_cnt;
                    }
                    else
                        buf ~= c;
                }
                else if (inword)
                {
                    if (++dictionary[buf] == 1)
                        buf = null;
                    inword = 0;
                }
            }
            if (inword)
            {
                dictionary[buf]++;
            }
        }
        writefln("%8s%8s%8s %s\n", l_cnt, w_cnt, c_cnt, arg);
        l_total += l_cnt;
        w_total += w_cnt;
        c_total += c_cnt;
    }

    if (args.length > 2)
    {
        writefln("--------------------------------------\n%8s%8s%8s total",
        l_total, w_total, c_total);
    }

    writeln("--------------------------------------");

    foreach (word1; dictionary.keys.sort)
    {
        writefln("%3s %s", dictionary[word1], word1);
    }
    return 0;
}
Forums | Comments | Search | Downloads | Home