experimenting with archs

Z80coder · Z80coder · commit 3bcb6d9ebb5f · 2023-01-21T17:24:06.000Z
diff --git a/neurallogic/harden_layer.py b/neurallogic/harden_layer.py
@@ -18,6 +18,7 @@ def straight_through_harden_element(x):
 def hard_harden_layer(x):
     return x
 
+#TODO: can we harden arbitrary tensors?
 #TODO: is this correct?
 def symbolic_harden_layer(x):
     return x
diff --git a/tests/test_hard_majority.py b/tests/test_hard_majority.py
@@ -163,3 +163,4 @@ def test_majority_layer():
         symbolic_output = symbolic_generation.symbolic_expression(jaxpr, harden.harden(input))
         assert jax.numpy.array_equal(symbolic_output, harden.harden(expected))
 
+# TODO: test training the hard majority layer
diff --git a/tests/test_mnist.py b/tests/test_mnist.py
@@ -3,16 +3,21 @@
 import ml_collections
 import numpy as np
 import optax
+import pytest
 import tensorflow as tf
 import tensorflow_datasets as tfds
 from flax import linen as nn
 from flax.metrics import tensorboard
 from flax.training import train_state
+from jax.config import config
 from matplotlib import pyplot as plt
 from tqdm import tqdm
 
-from neurallogic import (hard_not, hard_or, harden, harden_layer,
-                         neural_logic_net)
+from neurallogic import (hard_and, hard_majority, hard_not, hard_or, harden,
+                         harden_layer, neural_logic_net, real_encoder)
+
+# Uncomment to debug NaNs
+#config.update("jax_debug_nans", True)
 
 """
 MNIST test.
@@ -21,16 +26,39 @@
 The data is loaded using tensorflow_datasets.
 """
 
-
+# TODO: experiment in ipython notebook with different values for these
+"""
 def nln(type, x, width):
-    x = hard_or.or_layer(type)(width, nn.initializers.uniform(1.0))(x)
+    #x = x.reshape((-1, 1))
+    #re = real_encoder.real_encoder_layer(type)(3)
+    #x = jax.vmap(re, 0)(x)
+    #x = x.ravel()
+    #x = hard_or.or_layer(type)(width, nn.initializers.uniform(1.0), dtype=jax.numpy.float16)(x)
+    x = hard_or.or_layer(type)(width, nn.initializers.uniform(1.0), dtype=jax.numpy.float16)(x)
     x = hard_not.not_layer(type)(10)(x)
     x = x.ravel()  # flatten the outputs of the not layer
     # harden the outputs of the not layer
     x = harden_layer.harden_layer(type)(x)
     x = x.reshape((10, width))  # reshape to 10 ports, 100 bits each
     x = x.sum(-1)  # sum the 100 bits in each port
     return x
+"""
+
+def nln(type, x, width):
+    majority_size = 3
+    n = int(width / majority_size)
+    not_size = 200
+    num_classes = 10
+
+    x = hard_or.or_layer(type)(width, nn.initializers.uniform(1.0), dtype=jax.numpy.float16)(x) # width number of or neurons
+    x = x.reshape((n, majority_size)) # reshape to (n, majority_size)
+    x = hard_majority.majority_layer(type)(x) # reduce to (n,)
+    x = hard_not.not_layer(type)(not_size, dtype=jax.numpy.float16)(x) # (not_size, n)
+    x = x.ravel()  # flatten the outputs of the not layer (not_size * n,)
+    x = harden_layer.harden_layer(type)(x) # (not_size * n,)
+    x = x.reshape((num_classes, int(not_size * n / num_classes))) # reshape to num_classes ports (num_classes, (not_size * n) / num_classes)
+    x = x.sum(-1)  # sum the bits in each port
+    return x
 
 
 def batch_nln(type, x, width):
@@ -107,6 +135,8 @@ def get_datasets():
     test_ds = tfds.as_numpy(ds_builder.as_dataset(split="test", batch_size=-1))
     train_ds["image"] = jnp.float32(train_ds["image"]) / 255.0
     test_ds["image"] = jnp.float32(test_ds["image"]) / 255.0
+    # TODO: we don't need to do this even when we don't use the real encoder
+    # Use grayscale information
     # Convert the floating point values in [0,1] to binary values in {0,1}
     train_ds["image"] = jnp.round(train_ds["image"])
     test_ds["image"] = jnp.round(test_ds["image"])
@@ -191,12 +221,14 @@ def get_config():
     # config for CNN
     config.learning_rate = 0.01
     # config for NLN
-    config.learning_rate = 0.1
+    #config.learning_rate = 0.1
+    config.learning_rate = 0.01
 
     # Always commit with num_epochs = 1 for short test time
     config.momentum = 0.9
     config.batch_size = 128
-    config.num_epochs = 2
+    #config.num_epochs = 2
+    config.num_epochs = 1000
     return config
 
 
@@ -260,7 +292,7 @@ def check_symbolic(nets, datasets, trained_state):
         symbolic_output = symbolic.apply({"params": symbolic_weights}, symbolic_input)
         print("symbolic_output", symbolic_output[0][:10000])
 
-
+@pytest.mark.skip(reason="temporarily off")
 def test_mnist():
     # Make sure tf does not allocate gpu memory.
     tf.config.experimental.set_visible_devices([], "GPU")
@@ -270,7 +302,7 @@ def test_mnist():
 
     # Define the model.
     # soft = CNN()
-    width = 10
+    width = 1599
     soft, _, _ = neural_logic_net.net(lambda type, x: batch_nln(type, x, width))
 
     # Get the MNIST dataset.
@@ -279,11 +311,13 @@ def test_mnist():
     train_ds["image"] = jnp.reshape(train_ds["image"], (train_ds["image"].shape[0], -1))
     test_ds["image"] = jnp.reshape(test_ds["image"], (test_ds["image"].shape[0], -1))
 
+    print(soft.tabulate(jax.random.PRNGKey(0), train_ds["image"][0:1]))
+
     # Train and evaluate the model.
     trained_state = train_and_evaluate(
         soft, (train_ds, test_ds), config=config, workdir="./mnist_metrics"
     )
 
     # Check symbolic net
-    _, hard, symbolic = neural_logic_net.net(lambda type, x: nln(type, x, width))
-    check_symbolic((soft, hard, symbolic), (train_ds, test_ds), trained_state)
+    #_, hard, symbolic = neural_logic_net.net(lambda type, x: nln(type, x, width))
+    #check_symbolic((soft, hard, symbolic), (train_ds, test_ds), trained_state)

Original file line number	Diff line number	Diff line change
`@@ -163,3 +163,4 @@ def test_majority_layer():`
`163`	`163`	`symbolic_output = symbolic_generation.symbolic_expression(jaxpr, harden.harden(input))`
`164`	`164`	`assert jax.numpy.array_equal(symbolic_output, harden.harden(expected))`
`165`	`165`
	`166`	`+# TODO: test training the hard majority layer`