Tensorflow model for predicting dice game decisions

For my first ML project I have modeled a dice game called Ten Thousand, or Farkle, depending on who you ask, as a vastly over-engineered solution to a computer player. You can find the complete game (With perfectly good compluter player comprised of about 15 lines of logic) here.

In way of brief explanation of the game, 1’s and 5′ are always valid, scoring dice. Other numbers must be involved in 1) three or more of a kind, 2) a straight or, 3) three pairs to be scoring dice. I would like my model to predict which dice should be kept for a given roll. Thus far it is excellent at figuring out that 1’s and 5’s are keepers, but I am unable to improve on this with any amount of monkeying around thus far.

I’m looking for any advice on how to improve predictions to include scoring dice other than 1’s and 5’s. I’ve tried increasing the proportion of those situations in the training set, increasing and decreasing the complexity of the model both in structure and with various regularization methods, and even using convolution layers.

Specifically, are the RMSProp optimizer and sigmoid-cross-entropy loss appropriate here?

Setting things up.

import tensorflow as tf import numpy as np import pandas as pd  from collections import Counter from itertools import combinations_with_replacement as combos from itertools import permutations as perms  import matplotlib.pyplot as plt from tensorflow.keras import layers, Model from tensorflow.data import Dataset   tf.enable_eager_execution() tfe = tf.contrib.eager 

I get my data by just making it, ensuring to make plenty of examples of special scoring situations.

def make_some_features(numbers, clip):     features = set()     combinations = (combo for combo in combos(numbers, 6))     for i, comb in enumerate(combinations):         if i % clip == 0:  # Keeping size reasonable             for perm in perms(comb):                 features.add(perm)     return features  # I've looked through these and there are an expected proportion of three-of-a-kind or better examples. features = make_some_features(list(range(1, 7)), 3)  # Make some three pairs features. special_features = set() for _ in range(1000):     half = [np.random.randint(1, 6) for _ in range(3)]     half += half     for perm in perms(half):         special_features.add(perm)  # We can only do so much with straights. for perm in perms([1, 2, 3, 4, 5, 6]):     special_features.add(perm)  all_features = [np.array(feature) for feature in special_features] all_features += [np.array(feature) for feature in features] all_labels = [choose_dice(feature) for feature in special_features] all_labels += [choose_dice(feature) for feature in features] 

I put it all in a pandas dataframe for easy scrambling and partition off training, validation and test sets.

def create_dataset(features, labels):     dice = pd.Series(features)     labels = pd.Series(labels)     dataset = pd.DataFrame({'dice': dice,                             'labels': labels})     return dataset   all_dice = create_dataset(all_features, all_labels) all_dice = all_dice.reindex(np.random.permutation(all_dice.index))  train_dice = all_dice.head(10000) val_dice = train_dice.tail(5000) test_dice = all_dice.tail(1936) 

I one_hot encode the features and resize the label tensor.

def pre_process_features(dice: pd.DataFrame) -> list:     rolls = []     for roll in dice['dice']:         roll = np.array(roll)         roll -= 1         roll = tf.one_hot(roll, depth=6, axis=-1)         rolls.append(roll)     return rolls   def pre_process_labels(dice: pd.DataFrame) -> list:     labels = [tf.reshape(tf.convert_to_tensor([label]), (6, 1)) for label in dice['labels']]     return labels 

Model, optimizer, loss and gradient functions.

model = tf.keras.Sequential([     layers.Dense(6, activation=tf.nn.relu, input_shape=(6, 6),                  kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),     layers.Dense(64, activation=tf.nn.relu,                  kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),     layers.Dense(128, activation=tf.nn.relu,                  kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),     # layers.Dense(256, activation=tf.nn.relu,     #              kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),     layers.Dense(32, activation=tf.nn.relu,                  kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),     layers.Dense(1)])  optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) global_step = tf.train.get_or_create_global_step()  def loss(model, features, labels):     logits = model(features)     if logits.shape == (1, 6, 1):         logits = tf.squeeze(logits, [0])     standard_loss = tf.losses.sigmoid_cross_entropy(logits=logits, multi_class_labels=labels)     return standard_loss   def grad(model, features, labels):     with tf.GradientTape() as tape:         loss_value = loss(model, features, labels)     return loss_value, tape.gradient(loss_value, model.trainable_variables) 

Training loop.

train_loss = [] train_accuracy = [] val_loss = [] val_accuracy = []  val_features, val_labels = iter(val_features), iter(val_labels) val_feature, val_label = next(val_features), next(val_labels) for epoch in range(num_epochs):     epoch_loss_ave = tfe.metrics.Mean('loss')     epoch_val_loss_average = tfe.metrics.Mean('loss')     epoch_accuracy = tfe.metrics.Accuracy('acc')     epoch_val_accuracy = tfe.metrics.Accuracy('acc')      for feature, label in zip(train_features, train_labels):         feature = tf.convert_to_tensor(feature.numpy().reshape(1, 6, 6))          loss_value, grads = grad(model, feature, label)         optimizer.apply_gradients(zip(grads, model.variables), global_step)         epoch_loss_ave(loss_value)          guessed_label = decode_label(model(feature))         epoch_accuracy(guessed_label, decode_label(label))          val_loss_value = loss(model, val_feature, val_label)         epoch_val_loss_average(val_loss_value)          val_guess_label = decode_label(model(val_feature))         epoch_val_accuracy(val_guess_label, decode_label(val_label))      train_loss.append(epoch_loss_ave.result())     train_accuracy.append(epoch_accuracy.result())      val_loss.append(epoch_val_loss_average.result())     val_accuracy.append((epoch_val_accuracy.result()))      if epoch % 20 == 0:         print(f'Epoch {epoch} Loss: {epoch_loss_ave.result()} Accuracy: {epoch_accuracy.result()}')         print(f'Validation loss: {epoch_val_loss_average.result()} Accuracy: {epoch_val_accuracy.result()}') 

Testing and few predictions for random game inputs.

test_results = [] test_accuracy = tfe.metrics.Accuracy('acc')  for feature, label in zip(test_features, test_labels):      guessed_label = decode_label(model(feature))     test_accuracy(guessed_label, decode_label(label)) print(f'Test accuracy: {test_accuracy.result()}')  for _ in range(25):     roll = np.array([np.random.randint(0, 5) for _ in range(6)])     turn = tf.one_hot(roll, depth=6, dtype=np.int32)     roll += 1     answer = choose_dice(roll)     print(f'Roll: {roll}')     print(f'Dice expected to be kept:  {answer}')     turn = tf.convert_to_tensor(turn.numpy().reshape((1, 6, 6)), dtype=tf.float32)     predictions = model.predict(turn)     tf.nn.softmax(predictions)     predicted_label = []     for prediction in predictions[0]:         if prediction[0] > 0.:             predicted_label.append(1.)         else:             predicted_label.append(0.)     print(f'Dice predicted to be kept: {predicted_label}') 

The complete code can be found here.