Ich lege meine Lexer und ich habe Probleme beim Drucken und oder richtig Zahlen zu ihrem Wert zuweisen. Hier ist meine Ausgabe, sollte es integer: "10"
sein, ich glaube, das Problem ist irgendwo um lexer_num
oder lexer_flt
: (einschließlich mehr Informationen wie angefordert.) Ich habe nicht oder weiß wirklich, was ein Hex-Dump ist, so glaube ich nicht ich kann das bieten. Um Ihre Frage zu beantworten, warum v
in lexer_num
ist eine size_t ist, weil sie keine Notwendigkeit für negative Zahlen seit negativ ist, behandelt im Parser nicht der Lexer.Seltsame Zahlen in Lexer
Type: "40" {
Line: "1"
Pos: "0"
Num: "2591542"
Real: "0.000000"
Stri: ""
}
Code:
#define _CRT_SECURE_NO_WARNINGS
#define DEBUG 0
#include "lexer.h"
#include "error.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <ctype.h>
#include <assert.h>
typedef struct lextoken_t {
const char* str;
token_t type;
} lextoken_t;
static const lextoken_t keywords[] = {
// types
{ "int", _int },
{ "double", _dbl },
{ "void", _void },
{ "char", _char },
{ "string", _str },
{ "bool", _bool },
{ "const", _const },
{ "struct", _struct }
};
/* token_new: creates and returns a new token ptr.
** -lexer: a ptr to the lexer.
** -type: the token type.
*/
token_t* token_new(lexer_t* lexer, tk_type type) {
token_t* token = malloc(sizeof(token_t));
token->line = lexer->line;
token->pos = lexer->pos;
token->type = type;
token->integer = 0;
token->flt = 0;
token->string = NULL;
return token;
}
static void token_print(token_t* token) {
if (token == NULL)
printf("Null token");
printf("Type: \"%i\" { \n", token->type);
printf("\tLine: \"%i\"\n", token->line);
printf("\tPos: \"%i\"\n", token->pos);
printf("\tNum: \"%i\"\n", token->integer);
printf("\tReal: \"%f\"\n", token->flt);
printf("\tStri: \"%s\"\n}\n\n", token->string);
}
/* lexer_look: look at the source (ahead) places infront of the lexer->ptr.
** -lexer: a ptr to the lexer to look ahead in.
** -ahead: how far ahead of the ptr to look.
*/
static char lexer_look(lexer_t* lexer, size_t ahead) {
if (lexer->len < lexer->ptr + ahead) {
error_new(lexer->errors, 0, 0, "The lexer tried to index %d out of bounds %d", lexer->ptr + ahead, lexer->len);
return;
}
return lexer->src[lexer->ptr + ahead];
}
static size_t can_adv(lexer_t* lexer, size_t steps) {
if (lexer->ptr + steps <= lexer->len)
return 1;
else
return 0;
}
/* lexer_adv: moves the lexer->ptr (steps) places.
** -lexer: a ptr to the lexer to look ahead in.
** -steps: how far to advance the ptr.
*/
static char lexer_adv(lexer_t* lexer, size_t steps) {
if (!can_adv(lexer, steps))
error_new(lexer->errors, 0, 0, "The lexer tried to move ptr past bounds %d with value of %d", lexer->len, lexer->ptr + steps);
lexer->ptr += steps;
return lexer->src[lexer->ptr];
}
static void new_line(lexer_t* lexer) {
lexer->line = 0;
lexer->pos = 0;
}
static void lexer_nested(lexer_t* lexer) {
lexer_adv(lexer, 2);
char c = lexer_look(lexer, 0);
size_t depth = 1;
while (depth > 0) {
if (!can_adv(lexer, 1))
error_new(lexer->errors, lexer->line, lexer->pos, "Unterminated block comment.");
else if (c == '*' && lexer_look(lexer, 1) == '#') {
lexer_adv(lexer, 2);
depth--;
} else if (c == '#' && lexer_look(lexer, 1) == '*') {
lexer_adv(lexer, 2);
depth++;
} else
c = lexer_adv(lexer, 1);
}
}
static void lexer_comment(lexer_t* lexer) {
if (lexer_look(lexer, 1) == '*')
lexer_nested(lexer);
else {
char c;
while (((c = lexer_look(lexer, 0)) != '\n') && can_adv(lexer, 1))
lexer_adv(lexer, 1);
new_line(lexer);
}
}
static token_t* lexer_str(lexer_t* lexer) {
size_t str_len = 0;
while (true) {
if (!can_adv(lexer, 1)) {
error_new(lexer->errors, lexer->len, lexer->pos, "Unterminated string.");
return NULL;
}
else if (lexer_look(lexer, 1) == '\"') {
lexer_adv(lexer, 2);
break;
}
else {
lexer_adv(lexer, 1);
str_len++;
}
}
char* string = malloc(str_len + 1);
for (size_t idx = 0; idx < str_len; idx++)
string[idx] = lexer->src[lexer->ptr - str_len + idx];
string[str_len] = '\0';
token_t* token = token_new(lexer, _str);
token->string = string;
return token;
}
static token_t* lexer_float(lexer_t* lexer, token_t* token, size_t v) {
size_t places = 0;
double d = v;
if (!isdigit(lexer_look(lexer, 1))) { return token; }
while (lexer->len > 0) {
char c = lexer_look(lexer, 1);
if (isdigit(c)) {
lexer_adv(lexer, 1);
d = (d * 10) + (c - '0');
places++;
} else
break;
}
token->flt = d/(places * 10);
token->string = "";
return token;
}
static token_t* lexer_num(lexer_t* lexer) {
token_t* token = token_new(lexer, _int);
size_t v = 0;
while (can_adv(lexer, 1)) {
char c = lexer_look(lexer, 0);
if (isdigit(c)) {
v = (v * 10) + (c - '0');
lexer_adv(lexer, 1);
} else if (c == '.') {
lexer_adv(lexer, 1);
return lexer_float(lexer, token, v);
} else {
break;
}
}
token->integer = v;
token->string = "";
return token;
}
static token_t* lexer_ident(lexer_t* lexer) {
token_t* token = token_new(lexer, _ident);
size_t id_len = 0;
while (can_adv(lexer, 1)) {
if (!isalpha(lexer_look(lexer, 0)))
break;
lexer_adv(lexer, 1);
id_len++;
}
char* ident = malloc(id_len + 1);
for (size_t idx = 0; idx < id_len; idx++)
ident[idx] = lexer->src[lexer->ptr - id_len + idx];
ident[id_len] = '\0';
token->string = ident;
return token;
}
static token_t* next_token(lexer_t* lexer) {
token_t* token = NULL;
while (token == NULL && can_adv(lexer, 1)) {
const int c = lexer_look(lexer, 0);
if (DEBUG)
printf("Current character: \"%c\", Length: %d, Pointer: %d \n", lexer_look(lexer, 0), lexer->len, lexer->ptr);
switch (c) {
case '=':
if (lexer_look(lexer, 1) == '=') {
token = token_new(lexer, _eqto);
lexer_adv(lexer, 2);
token->string = "==";
} else {
token = token_new(lexer, _assign);
token->string = "=";
lexer_adv(lexer, 1);
}
break;
case '+':
if (lexer_look(lexer, 1) == '=') {
token = token_new(lexer, _addeql);
lexer_adv(lexer, 2);
token->string = "+=";
} else {
token = token_new(lexer, _add);
token->string = "+";
lexer_adv(lexer, 1);
}
break;
case '-':
if (lexer_look(lexer, 1) == '=') {
token = token_new(lexer, _subeql);
lexer_adv(lexer, 2);
token->string = "-=";
} else {
token = token_new(lexer, _sub);
token->string = "-";
lexer_adv(lexer, 1);
}
break;
case '*':
if (lexer_look(lexer, 1) == '=') {
token = token_new(lexer, _muleql);
lexer_adv(lexer, 2);
token->string = "*=";
} else {
token = token_new(lexer, _mul);
token->string = "*";
lexer_adv(lexer, 1);
}
break;
case '/':
if (lexer_look(lexer, 1) == '=') {
token = token_new(lexer, _diveql);
lexer_adv(lexer, 2);
token->string = "/=";
} else {
token = token_new(lexer, _div);
token->string = "/";
lexer_adv(lexer, 1);
}
break;
case '<':
if (lexer_look(lexer, 1) == '<') {
token = token_new(lexer, _nteq);
lexer_adv(lexer, 2);
token->string = "<=";
} else {
token = token_new(lexer, _bang);
token->string = "<";
lexer_adv(lexer, 1);
}
break;
case '>':
if (lexer_look(lexer, 1) == '<') {
token = token_new(lexer, _nteq);
lexer_adv(lexer, 2);
token->string = ">=";
}
else {
token = token_new(lexer, _bang);
token->string = ">";
lexer_adv(lexer, 1);
}
break;
case '&':
if (lexer_look(lexer, 1) == '&') {
token = token_new(lexer, _and);
lexer_adv(lexer, 2);
token->string = "&&";
} else {
token = token_new(lexer, _notype);
lexer_adv(lexer, 1);
}
break;
case '|':
if (lexer_look(lexer, 1) == '|') {
token = token_new(lexer, _or);
lexer_adv(lexer, 2);
token->string = "||";
}
else {
token = token_new(lexer, _notype);
lexer_adv(lexer, 1);
}
break;
case '%':
token = token_new(lexer, _mod);
token->string = "%";
lexer_adv(lexer, 1);
break;
case '^':
token = token_new(lexer, _mod);
token->string = "^";
lexer_adv(lexer, 1);
break;
case '!':
if (lexer_look(lexer, 1) == '=') {
token = token_new(lexer, _nteq);
lexer_adv(lexer, 2);
token->string = "!=";
}
else {
token = token_new(lexer, _bang);
token->string = "!";
lexer_adv(lexer, 1);
}
break;
case '\"':
token = lexer_str(lexer);
break;
case '#':
lexer_comment(lexer);
break;
case '(':
token = token_new(lexer, _lpara);
token->string = "(";
lexer_adv(lexer, 1);
break;
case ')':
token = token_new(lexer, _rpara);
token->string = ")";
lexer_adv(lexer, 1);
break;
case '{':
token = token_new(lexer, _lcurl);
token->string = "{";
lexer_adv(lexer, 1);
break;
case '}':
token = token_new(lexer, _rcurl);
token->string = "}";
lexer_adv(lexer, 1);
break;
case '[':
token = token_new(lexer, _lbrac);
token->string = "[";
lexer_adv(lexer, 1);
break;
case ']':
token = token_new(lexer, _rbrac);
token->string = "]";
lexer_adv(lexer, 1);
break;
case ';':
token = token_new(lexer, _terml);
token->string = ";";
lexer_adv(lexer, 1);
break;
default:
if (isalpha(c) || c == '_')
token = lexer_ident(lexer);
else if (isdigit(c) || c == '.') {
token = lexer_num(lexer);
} else if (isspace(c))
lexer_adv(lexer, 1);
else
token = token_new(lexer, _eof);
break;
}
}
return token;
}
void lexer_print(lexer_t* lexer) {
size_t line = lexer->line;
size_t pos = lexer->pos;
size_t ptr = lexer->ptr;
token_t* token = next_token(lexer);
while (token != NULL && token->type != _eof) {
token_print(token);
token = next_token(lexer);
}
lexer->ptr = ptr;
lexer->pos = pos;
}
lexer_t* lexer_open(const char* file_name) {
FILE* file_ptr = fopen(file_name, "rb");
lexer_t* lexer = malloc(sizeof(lexer_t));
lexer->errors = errorlist_new();
lexer->line = 1;
lexer->pos = 0;
lexer->ptr = 0;
if (file_ptr == NULL) {
error_new(lexer->errors, 0, 0, "Couldent open file \"%s\".\n", file_name);
fclose(file_ptr);
free(lexer);
}
if (fseek(file_ptr, 0, SEEK_END) != 0) {
fclose(file_ptr);
return NULL;
}
lexer->len = ftell(file_ptr);
if (lexer->len == -1) {
error_new(lexer->errors, 0, 0, "Unable to get the size of file \"%s\".\n", file_name);
fclose(file_ptr);
free(lexer);
}
fseek(file_ptr, 0, SEEK_SET);
lexer->src = malloc(lexer->len);
size_t r = fread(lexer->src, lexer->len, 1, file_ptr);
fclose(file_ptr);
return lexer;
}
void lexer_close(lexer_t* lexer) {
if (lexer->src != NULL)
free(lexer->src);
free(lexer);
}
Kopf
#ifndef LEXER_H
#define LEXER_H
#include "error.h"
#include <stdio.h>
#include <stdbool.h>
#include <malloc.h>
#include <assert.h>
typedef enum tk_type {
// primitives
_notype,
_str,
_gen_num,
_ident,
_type,
// symbols
_rbrac,
_lbrac,
_rpara,
_lpara,
_rcurl,
_lcurl,
_terml,
_assign,
_bang,
_add,
_addeql,
_sub,
_subeql,
_div,
_diveql,
_mul,
_muleql,
_exp,
_mod,
// comparison operators
_lt,
_lteq,
_gt,
_gteq,
_eqto,
_nteq,
_and,
_or,
// keywords
_while,
_for,
_if,
_else,
_match,
_case,
_return,
_break,
_int,
_float,
_enum,
_true,
_false,
_import,
_struct,
_mac,
_dbl,
_void,
_char,
_bool,
_const,
// abstract
_block,
_eof
} tk_type;
typedef struct token_t {
tk_type type;
size_t line;
size_t pos;
union {
char* string;
double flt;
size_t integer;
};
} token_t;
typedef struct lexer_t {
size_t line;
size_t pos;
size_t ptr;
size_t len;
char* src;
errorlist_t* errors;
} lexer_t;
void lexer_print(lexer_t* lexer);
#endif
Eingang
int main() {
int var = 10 + 2;
}
Informationen ist etwas spärlich. Was ist Ihre Eingabe (inkl. Hexdump, nur für den Fall), wie ist 'token_t' definiert und wie drucken Sie die Ausgabe? Der Fehler kann auch in 'can_adv()', 'lexer_look()', 'lexer_adv()' und 'token_new()' versteckt werden. Auch: Wie ist 'lexer_t' definiert und was ist am Anfang der Funktion' lexer_num() '? Und warum ist der Datentyp von 'v' ein' size_t'? – deamentiaemundi
Überprüfen Sie, wie Sie ein MCVE ([MCVE]) erstellen, und aktualisieren Sie die Frage, um eine zu erstellen. Es besteht eine gute Chance, dass das Problem nicht in dem Code liegt, den Sie gerade anzeigen (und daher in dem nicht gezeigten Code enthalten ist). Da wir den Code jedoch nicht ausführen können, können wir uns nicht sicher sein. –
Ich kann Ihnen versichern, dass der Fehler in diesem Code ist, aber ich werde darüber nachsehen. Danke – Hedron