tokenizer v2

This commit is contained in:
Nathan Lebrun
2025-01-17 08:29:12 +01:00
parent 687dd72dba
commit 9a7c1d0dd4
6 changed files with 158 additions and 121 deletions

View File

@@ -12,7 +12,12 @@
#include "tokenizer/tokenizer.h" #include "tokenizer/tokenizer.h"
/*void truncate_after_exit_word(char **lst)*/ /*str_equal(char *original)*/
/*{*/
/**/
/*}*/
/*void truncate_after_exit_word(char *str)*/
/*{*/ /*{*/
/* int i;*/ /* int i;*/
/* int depth;*/ /* int depth;*/
@@ -23,31 +28,7 @@
/* truncate_mode = FALSE;*/ /* truncate_mode = FALSE;*/
/* while (lst[i])*/ /* while (lst[i])*/
/* {*/ /* {*/
/* if (truncate_mode)*/ /**/
/* {*/
/* free(lst[i]);*/
/* lst[i] = NULL;*/
/* }*/
/* else*/
/* {*/
/* if (lst[i][0] == '(')*/
/* depth += 1;*/
/* if (lst[i][ft_strlen(lst[i]) - 1] == ')')*/
/* depth -= 1;*/
/* if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)*/
/* truncate_mode = TRUE;*/
/* }*/
/* i++;*/
/* }*/
/*}*/
/*void print_tab(char **lst)*/
/*{*/
/* int i = 0;*/
/* while (lst[i])*/
/* {*/
/* printf("%s\n", lst[i]);*/
/* i++;*/
/* }*/ /* }*/
/*}*/ /*}*/
@@ -68,8 +49,7 @@ int main (int ac, char **av)
if (str) if (str)
{ {
/*truncate_after_exit_word(lst);*/ /*truncate_after_exit_word(str);*/
/*free_tab(lst);*/
lst = tokenize(str); lst = tokenize(str);
print_linked_list(lst); print_linked_list(lst);
} }

View File

@@ -0,0 +1,93 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* copy_token_string.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/16 12:48:50 by nalebrun #+# #+# */
/* Updated: 2025/01/16 12:48:50 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#include "tokenizer.h"
int is_meta_token(char c)
{
if (c == '&' || c == '|'
|| c == '(' || c == ')'
|| c == '>' || c == '<')
return (1);
return (0);
}
int skip_meta_token(char *str)
{
int i;
i = 0;
if ((str[i] == '&')
|| (str[i] == '|' && str[i + 1] && str[i + 1] == '|')
|| (str[i] == '<' && str[i + 1] && str[i + 1] == '<')
|| (str[i] == '>' && str[i + 1] && str[i + 1] == '>'))
i = 2;
else
i = 1;
return (i);
}
int go_to_next_meta_token(char *str)
{
int i;
i = 0;
while (str [i]
&& str[i] != '&'
&& str[i] != '|'
&& str[i] != '('
&& str[i] != ')'
&& str[i] != '<'
&& str[i] != '>')
i++;
return (i);
}
static int go_to_second_next_space_block(char *str)
{
int i;
i = 0;
while (str[i] && str[i] == ' ')
i++;
while (str[i] && str[i] != ' ')
i++;
return (i);
}
char *copy_token_string(char *str, char last_token)
{
char *out;
char *trimed_out;
int i;
int j;
i = 0;
while (str[i] && str[i] == ' ')
i++;
if (is_meta_token(str[i]))
i += skip_meta_token(&str[i]);
else if (last_token != '<' && last_token != '>')
i += go_to_next_meta_token(&str[i]);
else
i += go_to_second_next_space_block(&str[i]);
out = malloc(i + 1);
if (!out)
return (NULL);
j = -1;
while (++j < i)
out[j] = str[j];
out[j] = 0;
trimed_out = ft_strtrim(out, " ");
free(out);
return (trimed_out);
}

View File

@@ -0,0 +1,25 @@
#include "tokenizer.h"
int goto_next_token(char *str)
{
int i;
static char last_token = '#';
i = 0;
if (is_meta_token(str[i]))
i += skip_meta_token(&str[i]);
else if (last_token != '<' && last_token != '>')
i += go_to_next_meta_token(&str[i]);
else
{
while (str[i] == ' ')
i++;
while (str[i] && str[i] != '&' && str[i] != '|'
&& str[i] != '(' && str[i] != ')'
&& str[i] != '<' && str[i] != '>'
&& str[i] != ' ')
i++;
}
last_token = str[0];
return (i);
}

View File

@@ -12,81 +12,21 @@
#include "tokenizer.h" #include "tokenizer.h"
int important_token(char c) static t_node *create_head(int *depth, char *str, char *last_token)
{ {
if (c == '(' || c == ')' t_node *head;
|| c == '|' || c == '&') char *token;
return (1);
return (0);
}
char *copy_token_string(char *start) ajust_depth(depth, str[0]);
{ token = copy_token_string(&str[0], *last_token);
char *out; if (!(*token))
int i;
int j;
i = 0;
while (start[i] && start[i] == ' ')
i++;
if (start[i] == '&')
i = 2;
else if (start[i] == '|' && start[i + 1] && start[i + 1] == '|')
i = 2;
else if (start[i] == '|' && start[i + 1] && start[i + 1] != '|')
i = 1;
else if (start[i] == '(')
i = 1;
else if (start[i] == ')')
i = 1;
else
{
while (start [i]
&& start[i] != '&'
&& start[i] != '|'
&& start[i] != '('
&& start[i] != ')')
i++;
}
out = malloc(i + 1);
if (!out)
return (NULL); return (NULL);
j = -1; head = create_node(token, get_priority(token), *depth);
while (++j < i) if (!head)
out[j] = start[j]; return (NULL);
out[j] = 0; *last_token = str[0];
return (out); free(token);
} return (head);
int goto_next_token(char *str)
{
int i;
i = 0;
if (str[0] == '(' || str[0] == ')')
{
if (str[1] && str[1] == ' ')
return (2);
else
return (1);
}
else if (str[0] == '&' && str[1] && str[1] == '&')
return (2);
else if (str[0] == '|' && str[1] && str[1] == '|')
return (2);
else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] && str[1] == ' ')
return (2);
else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] != ' ')
return (1);
else
{
while (str[i] && str[i] != '&' && str[i] != '|'
&& str[i] != '(' && str[i] != ')')
i++;
if (str[i] == ' ')
i++;
}
return (i);
} }
t_node *tokenize(char *str) t_node *tokenize(char *str)
@@ -94,32 +34,26 @@ t_node *tokenize(char *str)
int i; int i;
int depth; int depth;
char *token; char *token;
char *trimed_token;
t_node *head; t_node *head;
char last_token;
i = 0;
depth = 0; depth = 0;
ajust_depth(&depth, str[i]); last_token = ' ';
token = copy_token_string(&str[i]); head = create_head(&depth, str, &last_token);
if (!token) i = goto_next_token(&str[0]);
return (NULL); while (str[i])
trimed_token = ft_strtrim(token, " ");
head = create_node(trimed_token, get_priority(trimed_token), depth);
free(token);
free(trimed_token);
i += goto_next_token(&str[i]);
while(str[i])
{ {
ajust_depth(&depth, str[i]); ajust_depth(&depth, str[i]);
token = copy_token_string(&str[i]); token = copy_token_string(&str[i], last_token);
if (!token) if (!token)
return (NULL); return (NULL);
trimed_token = ft_strtrim(token, " "); if (token[0] != 0)
if (trimed_token[0] != 0) add_node_back(head, depth, token);
add_node_back(head, depth, trimed_token); while (str[i] == ' ')
free(token); i++;
free(trimed_token); last_token = str[i];
i += goto_next_token(&str[i]); i += goto_next_token(&str[i]);
free(token);
} }
return (head); return (head);
} }

View File

@@ -25,8 +25,13 @@ typedef struct s_node
t_node *tokenize(char *str); t_node *tokenize(char *str);
t_node *create_node(char *token, int priority, int depth); t_node *create_node(char *token, int priority, int depth);
void add_node_back(t_node* head, int depth, char *token); void add_node_back(t_node *head, int depth, char *token);
void ajust_depth(int *depth, char c); void ajust_depth(int *depth, char c);
int get_priority(char *token); int get_priority(char *token);
char *copy_token_string(char *start, char last_token);
int goto_next_token(char *str);
int skip_meta_token(char *str);
int is_meta_token(char c);
int go_to_next_meta_token(char *str);
#endif #endif

View File

@@ -41,7 +41,7 @@ int get_priority(char *token)
return (priority); return (priority);
} }
void add_node_back(t_node* head, int depth, char *token) void add_node_back(t_node *head, int depth, char *token)
{ {
while (head->next != NULL) while (head->next != NULL)
head = head->next; head = head->next;