Report a bug
If you spot a problem with this page, click here to create a Bugzilla issue.
Improve this page
Quickly fork, edit online, and submit a pull request for this page. Requires a signed-in GitHub account. This works well for small changes. If you'd like to make larger changes you may want to consider using a local clone.


Implements the lexical analyzer, which converts source code into lexical tokens.

Specification Lexical


Source lexer.d

struct CompileEnv;
Values to use for various magic identifiers
uint versionNumber;
const(char)[] date;
const(char)[] time;
const(char)[] vendor;
const(char)[] timestamp;
bool previewIn;
in means [ref] scope const, accepts rvalues
bool ddocOutput;
collect embedded documentation comments
bool masm;
use MASM inline asm syntax
IdentifierCharLookup cCharLookupTable;
C identifier table (set to the lexer by the C parser)
IdentifierCharLookup dCharLookupTable;
D identifier table
class Lexer;
fprintf(stderr, "Lexer.unittest %d\n", __LINE__);

ErrorSink errorSink = new ErrorSinkStderr;

void test(T)(string sequence, T expected, bool Ccompile = false)
    auto p = cast(const(char)*)sequence.ptr;
    dchar c2;
    Lexer lexer = new Lexer(errorSink);
    assert(expected == lexer.escapeSequence(Loc.initial, p, Ccompile, c2));
    assert(p == sequence.ptr + sequence.length);

test(`'`, '\'');
test(`"`, '"');
test(`?`, '?');
test(`\`, '\\');
test(`0`, '\0');
test(`a`, '\a');
test(`b`, '\b');
test(`f`, '\f');
test(`n`, '\n');
test(`r`, '\r');
test(`t`, '\t');
test(`v`, '\v');

test(`x00`, 0x00);
test(`xff`, 0xff);
test(`xFF`, 0xff);
test(`xa7`, 0xa7);
test(`x3c`, 0x3c);
test(`xe2`, 0xe2);

test(`1`, '\1');
test(`42`, '\42');
test(`357`, '\357');

test(`u1234`, '\u1234');
test(`uf0e4`, '\uf0e4');

test(`U0001f603`, '\U0001f603');

test(`"`, '"');
test(`&lt;`, '<');
test(`&gt;`, '>');
IdentifierCharLookup charLookup;
Character table for identifiers
bool Ccompile;
true if compiling ImportC
ubyte boolsize;
size of a C Bool, default 1
ubyte shortsize;
size of a C short, default 2
ubyte intsize;
size of a C int, default 4
ubyte longsize;
size of C long, 4 or 8
ubyte long_longsize;
size of a C long long, default 8
ubyte long_doublesize;
size of C long double, 8 or D real.sizeof
ubyte wchar_tsize;
size of C wchar_t, 2 or 4
ErrorSink eSink;
send error messages through this interface
CompileEnv compileEnv;
nothrow scope this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, bool doDocComment, bool commentToken, ErrorSink errorSink, const CompileEnv* compileEnv);
Creates a Lexer for the source code base[begoffset..endoffset+1]. The last character, base[endoffset], must be null (0) or EOF (0x1A).
const(char)* filename used for error messages
const(char)* base source code, must be terminated by a null (0) or EOF (0x1A) character
size_t begoffset starting offset into base[]
size_t endoffset the last offset to read into base[]
bool doDocComment handle documentation comments
bool commentToken comments become TOK.comment's
ErrorSink errorSink where error messages go, must not be null
CompileEnv* compileEnv version, vendor, date, time, etc.
nothrow this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, bool doDocComment, bool commentToken, bool whitespaceToken, ErrorSink errorSink, const CompileEnv* compileEnv = null);
Alternative entry point for DMDLIB, adds whitespaceToken
nothrow scope @safe this(ErrorSink errorSink);
Used for unittests for a mock Lexer
final nothrow void resetDefineLines(const(char)[] slice);
Reset lexer to lex #define's
final nothrow void nextDefineLine();
Set up for next #define line. p should be at start of next line.
final const pure nothrow @nogc @property @safe bool empty();
Range interface
pure nothrow @safe Token* allocateToken();
a newly allocated Token.
final nothrow TOK peekNext();
Look ahead at next token's value.
final nothrow TOK peekNext2();
Look 2 tokens ahead at value.
final nothrow void scan(Token* t);
Turn next token in buffer into a token.
Token* t the token to set the resulting Token to
final nothrow Token* peekPastParen(Token* tk);
tk is on the opening (. Look ahead and return token that is past the closing ).
final nothrow TOK hexStringConstant(Token* t);
Lex hex strings: x"0A ae 34FE BD"
nothrow bool parseSpecialTokenSequence();
Parse special token sequence:
true if the special token sequence was handled
final nothrow void poundLine(ref Token tok, bool linemarker);
Parse line/file preprocessor directive: #line linnum [filespec] Allow __LINE__ for linnum, and __FILE__ for filespec. Accept linemarker format:

linnum [filespec] {flags}

There can be zero or more flags, which are one of the digits 1..4, and must be in ascending order. The flags are ignored.
Token tok token we're on, which is linnum of linemarker
bool linemarker true if line marker format and lexer is on linnum
final nothrow void skipToNextLine(OutBuffer* defines = null);
Scan forward to start of next line.
OutBuffer* defines send characters to defines
static pure nothrow const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph);
Combine two document comments into one, separated by an extra newline if newParagraph is true.
nothrow void printRestOfTokens();
Print the tokens from the current token to the end, while not advancing the parser forward. Useful for debugging.