Token State

A tokenizer can track whether it is outside or inside a word.

token_state.c

#include <stdio.h>

enum TokenState {
    TOKEN_OUTSIDE,
    TOKEN_INSIDE
};

int countWords(const char *text) {
    enum TokenState state = TOKEN_OUTSIDE;
    int count = 0;

    for (int i = 0; text[i] != '\0'; i++) {
        char ch = text[i];
        if (ch == ' ') {
            state = TOKEN_OUTSIDE;
        } else if (state == TOKEN_OUTSIDE) {
            count++;
            state = TOKEN_INSIDE;
        }
    }

    return count;
}

int main(void) {
    int useLong = ;
    const char *text = useLong ? "red blue green" : "red blue";
    int words = countWords(text);

    printf("useLong=%d words=%d\n", useLong, words);
    return 0;
}

#include <stdio.h>

enum TokenState {
    TOKEN_OUTSIDE,
    TOKEN_INSIDE
};

int countWords(const char *text) {
    enum TokenState state = TOKEN_OUTSIDE;
    int count = 0;

    for (int i = 0; text[i] != '\0'; i++) {
        char ch = text[i];
        if (ch == ' ') {
            state = TOKEN_OUTSIDE;
        } else if (state == TOKEN_OUTSIDE) {
            count++;
            state = TOKEN_INSIDE;
        }
    }

    return count;
}

int main(void) {
    int useLong = ;
    const char *text = useLong ? "red blue green" : "red blue";
    int words = countWords(text);

    printf("useLong=%d words=%d\n", useLong, words);
    return 0;
}

tokenizer The state changes when the scan crosses between spaces and word characters.

count The count increases only when the machine enters a word.