From 9a7c1d0dd4e9a4297b829fc7cc8508aad9f9332b Mon Sep 17 00:00:00 2001 From: Nathan Lebrun Date: Fri, 17 Jan 2025 08:29:12 +0100 Subject: [PATCH] tokenizer v2 --- tests/parse.c | 36 ++------- tests/tokenizer/copy_token_string.c | 93 +++++++++++++++++++++++ tests/tokenizer/goto_next_token.c | 25 ++++++ tests/tokenizer/tokenizer.c | 114 ++++++---------------------- tests/tokenizer/tokenizer.h | 7 +- tests/tokenizer/tokenizer_utils.c | 4 +- 6 files changed, 158 insertions(+), 121 deletions(-) create mode 100644 tests/tokenizer/copy_token_string.c create mode 100644 tests/tokenizer/goto_next_token.c diff --git a/tests/parse.c b/tests/parse.c index b501f1b..52c3b56 100644 --- a/tests/parse.c +++ b/tests/parse.c @@ -12,7 +12,12 @@ #include "tokenizer/tokenizer.h" -/*void truncate_after_exit_word(char **lst)*/ +/*str_equal(char *original)*/ +/*{*/ +/**/ +/*}*/ + +/*void truncate_after_exit_word(char *str)*/ /*{*/ /* int i;*/ /* int depth;*/ @@ -23,31 +28,7 @@ /* truncate_mode = FALSE;*/ /* while (lst[i])*/ /* {*/ -/* if (truncate_mode)*/ -/* {*/ -/* free(lst[i]);*/ -/* lst[i] = NULL;*/ -/* }*/ -/* else*/ -/* {*/ -/* if (lst[i][0] == '(')*/ -/* depth += 1;*/ -/* if (lst[i][ft_strlen(lst[i]) - 1] == ')')*/ -/* depth -= 1;*/ -/* if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)*/ -/* truncate_mode = TRUE;*/ -/* }*/ -/* i++;*/ -/* }*/ -/*}*/ - -/*void print_tab(char **lst)*/ -/*{*/ -/* int i = 0;*/ -/* while (lst[i])*/ -/* {*/ -/* printf("%s\n", lst[i]);*/ -/* i++;*/ +/**/ /* }*/ /*}*/ @@ -68,8 +49,7 @@ int main (int ac, char **av) if (str) { - /*truncate_after_exit_word(lst);*/ - /*free_tab(lst);*/ + /*truncate_after_exit_word(str);*/ lst = tokenize(str); print_linked_list(lst); } diff --git a/tests/tokenizer/copy_token_string.c b/tests/tokenizer/copy_token_string.c new file mode 100644 index 0000000..274c9f1 --- /dev/null +++ b/tests/tokenizer/copy_token_string.c @@ -0,0 +1,93 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* copy_token_string.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nalebrun +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/01/16 12:48:50 by nalebrun #+# #+# */ +/* Updated: 2025/01/16 12:48:50 by nalebrun ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "tokenizer.h" + +int is_meta_token(char c) +{ + if (c == '&' || c == '|' + || c == '(' || c == ')' + || c == '>' || c == '<') + return (1); + return (0); +} + +int skip_meta_token(char *str) +{ + int i; + + i = 0; + if ((str[i] == '&') + || (str[i] == '|' && str[i + 1] && str[i + 1] == '|') + || (str[i] == '<' && str[i + 1] && str[i + 1] == '<') + || (str[i] == '>' && str[i + 1] && str[i + 1] == '>')) + i = 2; + else + i = 1; + return (i); +} + +int go_to_next_meta_token(char *str) +{ + int i; + + i = 0; + while (str [i] + && str[i] != '&' + && str[i] != '|' + && str[i] != '(' + && str[i] != ')' + && str[i] != '<' + && str[i] != '>') + i++; + return (i); +} + +static int go_to_second_next_space_block(char *str) +{ + int i; + + i = 0; + while (str[i] && str[i] == ' ') + i++; + while (str[i] && str[i] != ' ') + i++; + return (i); +} + +char *copy_token_string(char *str, char last_token) +{ + char *out; + char *trimed_out; + int i; + int j; + + i = 0; + while (str[i] && str[i] == ' ') + i++; + if (is_meta_token(str[i])) + i += skip_meta_token(&str[i]); + else if (last_token != '<' && last_token != '>') + i += go_to_next_meta_token(&str[i]); + else + i += go_to_second_next_space_block(&str[i]); + out = malloc(i + 1); + if (!out) + return (NULL); + j = -1; + while (++j < i) + out[j] = str[j]; + out[j] = 0; + trimed_out = ft_strtrim(out, " "); + free(out); + return (trimed_out); +} diff --git a/tests/tokenizer/goto_next_token.c b/tests/tokenizer/goto_next_token.c new file mode 100644 index 0000000..7af3aff --- /dev/null +++ b/tests/tokenizer/goto_next_token.c @@ -0,0 +1,25 @@ +#include "tokenizer.h" + +int goto_next_token(char *str) +{ + int i; + static char last_token = '#'; + + i = 0; + if (is_meta_token(str[i])) + i += skip_meta_token(&str[i]); + else if (last_token != '<' && last_token != '>') + i += go_to_next_meta_token(&str[i]); + else + { + while (str[i] == ' ') + i++; + while (str[i] && str[i] != '&' && str[i] != '|' + && str[i] != '(' && str[i] != ')' + && str[i] != '<' && str[i] != '>' + && str[i] != ' ') + i++; + } + last_token = str[0]; + return (i); +} diff --git a/tests/tokenizer/tokenizer.c b/tests/tokenizer/tokenizer.c index 1016f33..611fecd 100644 --- a/tests/tokenizer/tokenizer.c +++ b/tests/tokenizer/tokenizer.c @@ -12,81 +12,21 @@ #include "tokenizer.h" -int important_token(char c) +static t_node *create_head(int *depth, char *str, char *last_token) { - if (c == '(' || c == ')' - || c == '|' || c == '&') - return (1); - return (0); -} + t_node *head; + char *token; -char *copy_token_string(char *start) -{ - char *out; - int i; - int j; - - i = 0; - while (start[i] && start[i] == ' ') - i++; - if (start[i] == '&') - i = 2; - else if (start[i] == '|' && start[i + 1] && start[i + 1] == '|') - i = 2; - else if (start[i] == '|' && start[i + 1] && start[i + 1] != '|') - i = 1; - else if (start[i] == '(') - i = 1; - else if (start[i] == ')') - i = 1; - else - { - while (start [i] - && start[i] != '&' - && start[i] != '|' - && start[i] != '(' - && start[i] != ')') - i++; - } - out = malloc(i + 1); - if (!out) + ajust_depth(depth, str[0]); + token = copy_token_string(&str[0], *last_token); + if (!(*token)) return (NULL); - j = -1; - while (++j < i) - out[j] = start[j]; - out[j] = 0; - return (out); -} - -int goto_next_token(char *str) -{ - int i; - - i = 0; - if (str[0] == '(' || str[0] == ')') - { - if (str[1] && str[1] == ' ') - return (2); - else - return (1); - } - else if (str[0] == '&' && str[1] && str[1] == '&') - return (2); - else if (str[0] == '|' && str[1] && str[1] == '|') - return (2); - else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] && str[1] == ' ') - return (2); - else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] != ' ') - return (1); - else - { - while (str[i] && str[i] != '&' && str[i] != '|' - && str[i] != '(' && str[i] != ')') - i++; - if (str[i] == ' ') - i++; - } - return (i); + head = create_node(token, get_priority(token), *depth); + if (!head) + return (NULL); + *last_token = str[0]; + free(token); + return (head); } t_node *tokenize(char *str) @@ -94,32 +34,26 @@ t_node *tokenize(char *str) int i; int depth; char *token; - char *trimed_token; t_node *head; + char last_token; - i = 0; depth = 0; - ajust_depth(&depth, str[i]); - token = copy_token_string(&str[i]); - if (!token) - return (NULL); - trimed_token = ft_strtrim(token, " "); - head = create_node(trimed_token, get_priority(trimed_token), depth); - free(token); - free(trimed_token); - i += goto_next_token(&str[i]); - while(str[i]) + last_token = ' '; + head = create_head(&depth, str, &last_token); + i = goto_next_token(&str[0]); + while (str[i]) { ajust_depth(&depth, str[i]); - token = copy_token_string(&str[i]); + token = copy_token_string(&str[i], last_token); if (!token) return (NULL); - trimed_token = ft_strtrim(token, " "); - if (trimed_token[0] != 0) - add_node_back(head, depth, trimed_token); - free(token); - free(trimed_token); + if (token[0] != 0) + add_node_back(head, depth, token); + while (str[i] == ' ') + i++; + last_token = str[i]; i += goto_next_token(&str[i]); + free(token); } return (head); } diff --git a/tests/tokenizer/tokenizer.h b/tests/tokenizer/tokenizer.h index b72d2fe..e8f9924 100644 --- a/tests/tokenizer/tokenizer.h +++ b/tests/tokenizer/tokenizer.h @@ -25,8 +25,13 @@ typedef struct s_node t_node *tokenize(char *str); t_node *create_node(char *token, int priority, int depth); -void add_node_back(t_node* head, int depth, char *token); +void add_node_back(t_node *head, int depth, char *token); void ajust_depth(int *depth, char c); int get_priority(char *token); +char *copy_token_string(char *start, char last_token); +int goto_next_token(char *str); +int skip_meta_token(char *str); +int is_meta_token(char c); +int go_to_next_meta_token(char *str); #endif diff --git a/tests/tokenizer/tokenizer_utils.c b/tests/tokenizer/tokenizer_utils.c index 59a0533..7ff085b 100644 --- a/tests/tokenizer/tokenizer_utils.c +++ b/tests/tokenizer/tokenizer_utils.c @@ -14,7 +14,7 @@ t_node *create_node(char *token, int priority, int depth) { - t_node *node; + t_node *node; node = malloc(sizeof(t_node)); if (!node) @@ -41,7 +41,7 @@ int get_priority(char *token) return (priority); } -void add_node_back(t_node* head, int depth, char *token) +void add_node_back(t_node *head, int depth, char *token) { while (head->next != NULL) head = head->next;