Covariance matrices in EM may become ill-conditioned:
Code
# Example of regularizing a covariance matrixcov = np.array([[0.1, 0.09], [0.09, 0.1]]) # Nearly singular matrixprint(f"Original condition number: {np.linalg.cond(cov):.1f}")# Add small constant to diagonalepsilon =1e-5cov_reg = cov + np.eye(2) * epsilonprint(f"Regularized condition number: {np.linalg.cond(cov_reg):.1f}")
Original condition number: 19.0
Regularized condition number: 19.0
2 Working with HDF5 Files
2.1 Introduction to HDF5
HDF5 (Hierarchical Data Format version 5) provides an efficient way to store and access structured data. It supports storage of multiple arrays within a single file with fast random access.
2.1.1 Basic File Operations
Understanding HDF5 file structure helps when working with stored data:
Code
def demonstrate_hdf5_basics():"""Show basic HDF5 file operations"""# Create sample data data1 = np.random.rand(5, 10) data2 = np.random.randint(0, 2, size=(3, 5)) # Binary data# Write to HDF5 filewith h5py.File('example.h5', 'w') as f:# Create datasets with different names f.create_dataset('float_array', data=data1) f.create_dataset('binary_array', data=data2)# Add metadata as attributes f['float_array'].attrs['description'] ='Random float values' f['binary_array'].attrs['description'] ='Random binary values'# Read from HDF5 filewith h5py.File('example.h5', 'r') as f:# List all datasetsprint("Datasets in file:", list(f.keys()))# Access data float_data = f['float_array'][:] binary_data = f['binary_array'][:]# Read attributesprint("Float array description:", f['float_array'].attrs['description'])# Print shapesprint("Float array shape:", float_data.shape)print("Binary array shape:", binary_data.shape)demonstrate_hdf5_basics()
HDF5’s key advantage is efficient access to selected portions of data:
Code
def demonstrate_random_access():"""Show efficient random access to HDF5 data"""# Create a larger dataset large_data = np.random.rand(1000, 50)# Write to filewith h5py.File('large_example.h5', 'w') as f: f.create_dataset('large_array', data=large_data)# Access specific elementswith h5py.File('large_example.h5', 'r') as f: dataset = f['large_array']# Get specific indices indices = [5, 120, 342, 867] selected_rows = dataset[indices]print(f"Shape of full dataset: {dataset.shape}")print(f"Shape of selected rows: {selected_rows.shape}")# Get specific region region = dataset[200:205, 10:15]print(f"Shape of region: {region.shape}")demonstrate_random_access()
Shape of full dataset: (1000, 50)
Shape of selected rows: (4, 50)
Shape of region: (5, 5)
3 Feed-Forward Neural Networks for MNIST
3.1 MNIST Dataset Structure
The MNIST dataset contains 28×28 pixel handwritten digit images. Understanding the data organization is essential:
Code
# Create sample MNIST digit for visualizationdef create_sample_digit():# Generate a simplified "3" digit digit = np.zeros((28, 28))# Top curvefor j inrange(10, 18): digit[5, j] =1.0# Right side of top curvefor i inrange(5, 14): digit[i, 18] =1.0# Middle linefor j inrange(10, 18): digit[14, j] =1.0# Right side of bottom curvefor i inrange(14, 22): digit[i, 18] =1.0# Bottom curvefor j inrange(10, 18): digit[22, j] =1.0return digit# Visualize the digitdigit = create_sample_digit()plt.figure(figsize=(5, 5))plt.imshow(digit, cmap='gray')plt.axis('off')plt.title('Sample Digit (3)')plt.show()# Show vector representationflattened = digit.flatten()print(f"Image shape: {digit.shape}, Flattened length: {len(flattened)}")
Example MNIST digit visualization
Image shape: (28, 28), Flattened length: 784
3.2 Neural Network Architecture
A Multi-Layer Perceptron (MLP) is organized as a sequence of layers, each performing a linear transformation followed by a non-linear activation:
Code
def visualize_network_architecture():# Create figure fig, ax = plt.subplots(figsize=(8, 4))# Layer sizes (simplified) layers = [ {"name": "Input", "size": 784, "color": "lightskyblue", "width": 0.8, "height": 2.8}, {"name": "Hidden 1", "size": 200, "color": "lightgreen", "width": 0.8, "height": 2.4}, {"name": "Hidden 2", "size": 100, "color": "lightgreen", "width": 0.8, "height": 2.0}, {"name": "Output", "size": 10, "color": "salmon", "width": 0.8, "height": 1.6} ]# Position layers horizontally spacing =2.0for i, layer inenumerate(layers):# Draw layer box x = i * spacing y = (3- layer["height"]) /2# Center vertically rect = plt.Rectangle((x, y), layer["width"], layer["height"], facecolor=layer["color"], edgecolor="gray", alpha=0.7) ax.add_patch(rect)# Add layer name and size ax.text(x + layer["width"]/2, y -0.3, f"{layer['name']}\n({layer['size']} neurons)", ha='center', va='center', fontsize=9)# Add nodes (just a few representative ones)if layer["size"] <=10:# Show all nodes if fewfor j inrange(layer["size"]): node_y = y +0.2+ j * (layer["height"] -0.4) /max(1, layer["size"]-1) ax.plot(x + layer["width"]/2, node_y, 'o', markersize=6, color='white', markeredgecolor='gray')else:# Show just 3 nodes with ellipsisfor j inrange(3): pos = [0.1, 0.5, 0.9] # Relative positions node_y = y +0.2+ pos[j] * (layer["height"] -0.4) ax.plot(x + layer["width"]/2, node_y, 'o', markersize=6, color='white', markeredgecolor='gray') ax.text(x + layer["width"]/2, y + layer["height"]/2, "...", fontsize=14)# Draw connections to next layerif i <len(layers) -1:# Connection label (activation function) label ="ReLU"if i <len(layers) -2else"Softmax" ax.text(x + spacing/2+ layer["width"]/2, 3.3, label, ha='center', fontsize=9)# Connection arrows next_x = (i+1) * spacingfor src_y in [y +0.3, y + layer["height"]/2, y + layer["height"] -0.3]:for dst_y in [layers[i+1]["height"]/2+ (3- layers[i+1]["height"])/2]: ax.arrow(x + layer["width"], src_y, next_x - (x + layer["width"]), dst_y - src_y, head_width=0.05, head_length=0.1, fc='gray', ec='gray', alpha=0.3) ax.set_xlim(-0.5, (len(layers)-1) * spacing +1.5) ax.set_ylim(-0.8, 3.8) ax.set_axis_off() ax.set_title("Multi-Layer Perceptron for MNIST", fontsize=12) plt.tight_layout() plt.show()visualize_network_architecture()
MLP architecture for MNIST classification
3.3 ReLU Activation Function
The Rectified Linear Unit (ReLU) introduces non-linearity by setting negative values to zero:
When processing multiple images at once, efficient matrix operations can be used:
Code
def demonstrate_batch_processing():# Create simplified batch of 5 images with 4 pixels each batch_size =5 image_size =4# Random batch of images np.random.seed(0) images = np.random.rand(batch_size, image_size)# Simple model weights (4 inputs, 3 outputs) W = np.random.randn(3, 4) *0.1 b = np.zeros((3, 1))print("Batch of images shape:", images.shape)print("Weights shape:", W.shape)print("Bias shape:", b.shape)# Process one by one results_individual = []for i inrange(batch_size):# Get single image and reshape to column vector img = images[i].reshape(-1, 1)# Forward pass z = W @ img + b# Simplified output (no activation for demo) results_individual.append(z.flatten())# Process as batch# Transpose images to have shape (4, 5) images_t = images.T# Compute W @ images_t to get shape (3, 5) z_batch = W @ images_t + b# Each column is the result for one image results_batch = z_batch.Tprint("\nResults match:", np.allclose(results_individual, results_batch))# Demonstrate JSON formatting for output sample_output = {"index": 42,"activations": [0.1, 0.7, 0.05, 0.02, 0.03, 0.05, 0.02, 0.01, 0.01, 0.01],"classification": 1# Index of maximum activation }print("\nSample JSON output format:")import jsonprint(json.dumps(sample_output, indent=2))demonstrate_batch_processing()