tokenisation
This commit is contained in:
@@ -1,33 +1,45 @@
|
||||
#include "../includes/minishell.h"
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* parse.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/01/15 08:23:41 by nalebrun #+# #+# */
|
||||
/* Updated: 2025/01/15 08:23:41 by nalebrun ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
void truncate_after_exit_word(char **lst)
|
||||
{
|
||||
int i;
|
||||
int depth;
|
||||
int truncate_mode;
|
||||
#include "tokenizer/tokenizer.h"
|
||||
|
||||
i = 0;
|
||||
depth = 0;
|
||||
truncate_mode = FALSE;
|
||||
while (lst[i])
|
||||
{
|
||||
if (truncate_mode)
|
||||
{
|
||||
free(lst[i]);
|
||||
lst[i] = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lst[i][0] == '(')
|
||||
depth += 1;
|
||||
if (lst[i][ft_strlen(lst[i]) - 1] == ')')
|
||||
depth -= 1;
|
||||
if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)
|
||||
truncate_mode = TRUE;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
/*void truncate_after_exit_word(char **lst)*/
|
||||
/*{*/
|
||||
/* int i;*/
|
||||
/* int depth;*/
|
||||
/* int truncate_mode;*/
|
||||
/**/
|
||||
/* i = 0;*/
|
||||
/* depth = 0;*/
|
||||
/* truncate_mode = FALSE;*/
|
||||
/* while (lst[i])*/
|
||||
/* {*/
|
||||
/* if (truncate_mode)*/
|
||||
/* {*/
|
||||
/* free(lst[i]);*/
|
||||
/* lst[i] = NULL;*/
|
||||
/* }*/
|
||||
/* else*/
|
||||
/* {*/
|
||||
/* if (lst[i][0] == '(')*/
|
||||
/* depth += 1;*/
|
||||
/* if (lst[i][ft_strlen(lst[i]) - 1] == ')')*/
|
||||
/* depth -= 1;*/
|
||||
/* if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)*/
|
||||
/* truncate_mode = TRUE;*/
|
||||
/* }*/
|
||||
/* i++;*/
|
||||
/* }*/
|
||||
/*}*/
|
||||
|
||||
/*void print_tab(char **lst)*/
|
||||
/*{*/
|
||||
@@ -39,19 +51,26 @@ void truncate_after_exit_word(char **lst)
|
||||
/* }*/
|
||||
/*}*/
|
||||
|
||||
void print_linked_list(t_node *head) {
|
||||
t_node *current = head;
|
||||
while (current != NULL) {
|
||||
printf("Node - Priority: %d, Depth: %d, TOKEN: |%s|\n", current->priority, current->depth, current->token);
|
||||
current = current->next; // Move to the next node
|
||||
}
|
||||
}
|
||||
|
||||
int main (int ac, char **av)
|
||||
{
|
||||
(void)ac;
|
||||
|
||||
char *str = av[1];
|
||||
char **lst;
|
||||
char *str = ft_strtrim(av[1], " ");
|
||||
t_node *lst;
|
||||
|
||||
if (str)
|
||||
{
|
||||
// replace by a custom split that also the token alone and under the form of a linked list
|
||||
lst = ft_split(str, ' ');
|
||||
truncate_after_exit_word(lst);
|
||||
print_tab(lst);
|
||||
free_tab(lst);
|
||||
/*truncate_after_exit_word(lst);*/
|
||||
/*free_tab(lst);*/
|
||||
lst = tokenize(str);
|
||||
print_linked_list(lst);
|
||||
}
|
||||
}
|
||||
|
||||
125
tests/tokenizer/tokenizer.c
Normal file
125
tests/tokenizer/tokenizer.c
Normal file
@@ -0,0 +1,125 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* tokenizer.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/01/15 13:27:57 by nalebrun #+# #+# */
|
||||
/* Updated: 2025/01/15 13:27:57 by nalebrun ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "tokenizer.h"
|
||||
|
||||
int important_token(char c)
|
||||
{
|
||||
if (c == '(' || c == ')'
|
||||
|| c == '|' || c == '&')
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
char *copy_token_string(char *start)
|
||||
{
|
||||
char *out;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
i = 0;
|
||||
while (start[i] && start[i] == ' ')
|
||||
i++;
|
||||
if (start[i] == '&')
|
||||
i = 2;
|
||||
else if (start[i] == '|' && start[i + 1] && start[i + 1] == '|')
|
||||
i = 2;
|
||||
else if (start[i] == '|' && start[i + 1] && start[i + 1] != '|')
|
||||
i = 1;
|
||||
else if (start[i] == '(')
|
||||
i = 1;
|
||||
else if (start[i] == ')')
|
||||
i = 1;
|
||||
else
|
||||
{
|
||||
while (start [i]
|
||||
&& start[i] != '&'
|
||||
&& start[i] != '|'
|
||||
&& start[i] != '('
|
||||
&& start[i] != ')')
|
||||
i++;
|
||||
}
|
||||
out = malloc(i + 1);
|
||||
if (!out)
|
||||
return (NULL);
|
||||
j = -1;
|
||||
while (++j < i)
|
||||
out[j] = start[j];
|
||||
out[j] = 0;
|
||||
return (out);
|
||||
}
|
||||
|
||||
int goto_next_token(char *str)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
if (str[0] == '(' || str[0] == ')')
|
||||
{
|
||||
if (str[1] && str[1] == ' ')
|
||||
return (2);
|
||||
else
|
||||
return (1);
|
||||
}
|
||||
else if (str[0] == '&' && str[1] && str[1] == '&')
|
||||
return (2);
|
||||
else if (str[0] == '|' && str[1] && str[1] == '|')
|
||||
return (2);
|
||||
else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] && str[1] == ' ')
|
||||
return (2);
|
||||
else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] != ' ')
|
||||
return (1);
|
||||
else
|
||||
{
|
||||
while (str[i] && str[i] != '&' && str[i] != '|'
|
||||
&& str[i] != '(' && str[i] != ')')
|
||||
i++;
|
||||
if (str[i] == ' ')
|
||||
i++;
|
||||
}
|
||||
return (i);
|
||||
}
|
||||
|
||||
t_node *tokenize(char *str)
|
||||
{
|
||||
int i;
|
||||
int depth;
|
||||
char *token;
|
||||
char *trimed_token;
|
||||
t_node *head;
|
||||
|
||||
i = 0;
|
||||
depth = 0;
|
||||
ajust_depth(&depth, str[i]);
|
||||
token = copy_token_string(&str[i]);
|
||||
if (!token)
|
||||
return (NULL);
|
||||
trimed_token = ft_strtrim(token, " ");
|
||||
head = create_node(trimed_token, get_priority(trimed_token), depth);
|
||||
free(token);
|
||||
free(trimed_token);
|
||||
i += goto_next_token(&str[i]);
|
||||
while(str[i])
|
||||
{
|
||||
ajust_depth(&depth, str[i]);
|
||||
token = copy_token_string(&str[i]);
|
||||
if (!token)
|
||||
return (NULL);
|
||||
trimed_token = ft_strtrim(token, " ");
|
||||
if (trimed_token[0] != 0)
|
||||
add_node_back(head, depth, trimed_token);
|
||||
free(token);
|
||||
free(trimed_token);
|
||||
i += goto_next_token(&str[i]);
|
||||
}
|
||||
return (head);
|
||||
}
|
||||
32
tests/tokenizer/tokenizer.h
Normal file
32
tests/tokenizer/tokenizer.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* parser.h :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/01/15 13:30:12 by nalebrun #+# #+# */
|
||||
/* Updated: 2025/01/15 13:30:12 by nalebrun ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#ifndef TOKENIZER_H
|
||||
# define TOKENIZER_H
|
||||
|
||||
# include "../includes/minishell.h"
|
||||
|
||||
typedef struct s_node
|
||||
{
|
||||
struct s_node *next;
|
||||
char *token;
|
||||
int priority;
|
||||
int depth;
|
||||
} t_node;
|
||||
|
||||
t_node *tokenize(char *str);
|
||||
t_node *create_node(char *token, int priority, int depth);
|
||||
void add_node_back(t_node* head, int depth, char *token);
|
||||
void ajust_depth(int *depth, char c);
|
||||
int get_priority(char *token);
|
||||
|
||||
#endif
|
||||
57
tests/tokenizer/tokenizer_utils.c
Normal file
57
tests/tokenizer/tokenizer_utils.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* tokenizer_utils.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/01/15 13:38:49 by nalebrun #+# #+# */
|
||||
/* Updated: 2025/01/15 13:38:49 by nalebrun ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "tokenizer.h"
|
||||
|
||||
t_node *create_node(char *token, int priority, int depth)
|
||||
{
|
||||
t_node *node;
|
||||
|
||||
node = malloc(sizeof(t_node));
|
||||
if (!node)
|
||||
return (NULL);
|
||||
node->token = ft_strdup(token);
|
||||
node->priority = priority;
|
||||
node->depth = depth;
|
||||
node->next = NULL;
|
||||
return (node);
|
||||
}
|
||||
|
||||
int get_priority(char *token)
|
||||
{
|
||||
int priority;
|
||||
|
||||
if (token[0] == '&' && token[1] && token[1] == '&')
|
||||
priority = 2;
|
||||
else if (token[0] == '|' && token[1] && token[1] == '|')
|
||||
priority = 2;
|
||||
else if (token[0] == '|')
|
||||
priority = 1;
|
||||
else
|
||||
priority = 0;
|
||||
return (priority);
|
||||
}
|
||||
|
||||
void add_node_back(t_node* head, int depth, char *token)
|
||||
{
|
||||
while (head->next != NULL)
|
||||
head = head->next;
|
||||
head->next = create_node(token, get_priority(token), depth);
|
||||
}
|
||||
|
||||
void ajust_depth(int *depth, char c)
|
||||
{
|
||||
if (c == '(')
|
||||
(*depth) += 1;
|
||||
if (c == ')')
|
||||
(*depth) -= 1;
|
||||
}
|
||||
Reference in New Issue
Block a user