#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "halium.h"

#include <assert.h>
#include <stdio.h>

#define error(x)\
fprintf(stderr, "File %s, line %i: %s", __FILE__, __LINE__, x);
#define errorf(f, x)\
fprintf(stderr, "File %s, line %i: " # f, __FILE__, __LINE__, x);

/* --- Prototypes ---------------------------------------------------------- */

static inline int is_word_character(char);
char* lowercase(char*);
char** make_word_list(const char*);
void free_word_list(char**);
static int word_list_length(char**);
static int node_sort(const node_t*, const node_t*);
int word_find(const char*, const node_t*);
model_t* make_model(int);
void free_model(model_t*);
static node_t* make_node(void);
static void free_node(node_t*);
static void initialize_context(model_t*);
void learn(model_t*, char**);
static void update_model(model_t*, char*);

/* --- Definitions --------------------------------------------------------- */

/* True if c is considered part of word */
int is_word_character(char c)
{
	return strchr(" \n\t\",.;:?!&()[]{}+=/", c) == NULL;
}

/* Convert a string to lowercase */
char* lowercase(char* s)
{
	char* p;
	for (p = s; *p != '\0'; p++) *p = tolower(*p);
	return s;
}

/* Break a string into a list of words and non-words */
char** make_word_list(const char* s)
{
	char** list;				/* List to return */
	char** list_p;				/* Next element */
	int list_len;				/* Maximum entries in list */
	const char* b = s;			/* Start of word or non-word */
	int word_flag;				/* Now forming a word? */
	int i;					/* Generic iterator */

	if (*s == '\0') {
		list = malloc(sizeof(char*));
		if (list == NULL) {
			error("Unable to allocate word list\n");
			return NULL;
		}
		list[0] = NULL;
		return list;
	}

	word_flag = is_word_character(*s);

	/* Guess one word and one non-word per six characters */
	list_len = strlen(s) / 3;
	if (list_len < 2) list_len = 2;
	list_p = list = malloc(sizeof(char*) * list_len);
	if (list == NULL) {
		error("Unable to allocate word list\n");
		return NULL;
	}
	for (i = 0; i < list_len; i++) list[i] = NULL;

	while (*s != '\0') {
		/* Check for boundary */
		s++;
		if (*s != '\0' && is_word_character(*s) == word_flag) continue;
		
		/* Add to word list */
		*list_p = malloc(s - b + 1);
		if (*list_p == NULL) {
			free_word_list(list);
			error("Unable to allocate new word for list\n");
			return NULL;
		}
		memcpy(*list_p, b, s - b); (*list_p)[s - b] = '\0';

		/* Advance to next slot in list */
		list_p++;
		if (list_p == &list[list_len]) {
			/* Allocate space for more entries */
			int len2 = list_len + 6;
			char** list2 = realloc(list, sizeof(char*) * len2);
			if (list2 == NULL) {
				free_word_list(list);
				error("Unable to expand word list\n");
				return NULL;
			}
			list = list2;
			list_p = list + list_len;
			for (i = list_len; i < len2; i++) list[i] = NULL;
			list_len = len2;
		}

		/* Prepare to find next word or non-word */
		word_flag = !word_flag;
		b = s;
	}

	return list;
}

/* Deallocate a word list and all words in it */
void free_word_list(char** l)
{
	char** l_p;
	if (l == NULL) return;
	for (l_p = l; *l_p != NULL; l_p++) free(*l_p);
	free(l);
}

/* Return the number of entries in word list l */
int word_list_length(char** l)
{
	int i = 0;
	for (; *l != NULL; l++, i++);
	return i;
}

/* Sort nodes alphabetically by word */
int node_sort(const node_t* a, const node_t* b)
{
	return strcmp(a->word, b->word);
}

/* Find a node with a given word */
int word_find(const char* a, const node_t* b)
{
	return strcmp(a, b->word);
}

/* Allocate and return an empty model */
model_t* make_model(int order)
{
	int i;
	model_t* m = malloc(sizeof(model_t));

	m->order = order;
	m->forward = newtree234((cmpfn234)node_sort);
	m->backward = newtree234((cmpfn234)node_sort);
	m->context = malloc(sizeof(tree234*) * (order + 2));
	m->dictionary = newtree234((cmpfn234)strcmp);
	initialize_context(m);

	return m;
}

/* Free a model and all its related data structures */
void free_model(model_t* m)
{
	freetree234(m->forward);
	freetree234(m->backward);
	free(m->context);
	freetree234(m->dictionary);
}

/* Allocate and return an empty node */
node_t* make_node(void)
{
	node_t* n = malloc(sizeof(node_t));

	if (n == NULL) {
		error("Unable to allocate node\n");
		return NULL;
	}
	n->tree = newtree234((cmpfn234)node_sort);
	if (n->tree == NULL) {
		free(n);
		error("Unable to allocate tree for node\n");
		return NULL;
	}
	n->word = NULL;
	n->usage = 0;

	return n;
}

/* Free a node and its related data structures */
void free_node(node_t* n)
{
	int i;
	node_t* p;

	/* Recursively free subtrees */
	if (n->tree != NULL) {
		for (i = 0; (p = index234(n->tree, i)) != NULL; i++)
			free_node(p);
		freetree234(n->tree);
	}
	
	/* Free this node */
	free(n);
}

/* Reset a model's context */
void initialize_context(model_t* m)
{
	int i;
	for (i = 0; i < m->order + 1; i++) m->context[i] = NULL;
}

/* Add word list l to model m */
void learn(model_t* m, char** l)
{
	int i;
	char* entry;
	int length = word_list_length(l);

	/* Ignore short input */
	if (length <= m->order) return;

	/* Train model in forward direction */
	initialize_context(m);
	m->context[0] = m->forward;
	for (i = 0; i < length; i++) {
		entry = find234(m->dictionary, l[i], NULL);
		if (entry == NULL) {
			/* Add to dictionary */
			char* new_entry = strdup(l[i]);
			if (new_entry == NULL) {
				error("Unable to allocate new word\n");
				return;
			}
			entry = add234(m->dictionary, new_entry);
			if (entry == NULL) {
				free(new_entry);
				error("Unable to add word to dictionary\n");
				return;
			}
		}
		update_model(m, entry);
	}

	/* Train model in backward direction */
	initialize_context(m);
	m->context[0] = m->backward;
	for (i = length - 1; i >= 0; i--) {
		entry = find234(m->dictionary, l[i], NULL);
		update_model(m, entry);
	}
}

/* Update model m with word w */
void update_model(model_t* m, char* w)
{
	int i;
	for (i = m->order + 1; i > 0; i--) {
		node_t* node;
		if (m->context[i - 1] == NULL) continue;

		/* Find w in the list */
		node = find234(m->context[i - 1], w, (cmpfn234)word_find);
		if (node == NULL) {
			/* Add w to the list */
			node = make_node();
			if (node == NULL) {
				error("Unable to add node to model\n");
				return;
			}
			node->word = w;
			node->usage = 1;
			add234(m->context[i - 1], node);
		} else if (node->usage < INT_MAX) {
			node->usage++;
		}

		/* Add to the next context */
		m->context[i] = node->tree;
	}
}

