1+ '''
2+ This code is changed based on the Latent-deeponet
3+ available at https://github.com/katiana22/latent-deeponet
4+ '''
5+
6+ import torch
7+ import torch .nn as nn
8+ import numpy as np
9+ import argparse
10+ from sklearn .model_selection import train_test_split
11+ from sklearn .decomposition import PCA
12+ from sklearn .metrics import mean_squared_error
13+ import matplotlib .pyplot as plt
14+ import os
15+ import random
16+ #device = torch.device('cpu')
17+
18+
19+ ## Parser
20+ parser = argparse .ArgumentParser (description = 'Running autoencoder models.' )
21+ parser .add_argument (
22+ '--method' ,
23+ default = 'MLAE' , # Determine the autoencoder method
24+ help = 'vanilla-AE | MLAE | CAE | WAE' )
25+ parser .add_argument (
26+ '--latent_dim' ,
27+ type = int ,
28+ default = 64 , # Determine latent dimensionality
29+ help = 'latent dimensionality (default: 64)' )
30+ parser .add_argument (
31+ '--n_samples' ,
32+ type = int ,
33+ default = 300 , # Determine number of generated samples
34+ help = 'number of generated samples (default: 800)' )
35+ parser .add_argument (
36+ '--n_epochs' ,
37+ type = int ,
38+ default = 300 , # Determine number of epochs
39+ help = 'number of epochs (default: 800)' )
40+ parser .add_argument (
41+ '--bs' ,
42+ type = int ,
43+ default = 16 , # Determine batch size
44+ help = 'batch size (default: 128)' )
45+ parser .add_argument (
46+ '--ood' ,
47+ type = int ,
48+ default = 0 ,
49+ help = 'generate results for OOD data' )
50+ parser .add_argument (
51+ '--noise' ,
52+ type = int ,
53+ default = 0 ,
54+ help = 'generate results for noisy data' )
55+
56+ args , unknown = parser .parse_known_args ()
57+
58+ #### Fix random see (for reproducibility of results)
59+ seed_value = random .randint (1 ,1000 )
60+ os .environ ['PYTHONHASHSEED' ]= str (seed_value )
61+ random .seed (seed_value )
62+ np .random .seed (seed_value )
63+
64+ #### Load data
65+ nx , ny , nt = 256 , 256 , 72 # size of the longitude, latitude and time
66+ n = 300 # number of generated samples
67+
68+ file = np .load ('Data/shallow-water-256x256x72_1.npz' )
69+ inputs_1 , outputs_1 = file ['inputs' ], np .array ((file ['outputs' ]))
70+
71+ file = np .load ('Data/shallow-water-256x256x72_2.npz' )
72+ inputs_2 , outputs_2 = file ['inputs' ], np .array ((file ['outputs' ]))
73+
74+ inputs = np .concatenate ((inputs_1 , inputs_2 ), axis = 0 ).reshape (n , nx * ny ) # Concatenate input data
75+ outputs = np .concatenate ((outputs_1 , outputs_2 ), axis = 0 ) # Concatenate output data
76+
77+ outputs = outputs [:,:nt ,:,:]
78+
79+ outputs_re = outputs .reshape (n * nt , nx * ny ) # Reshape the data to the desired shape
80+
81+ n_samples = inputs .shape [0 ] # Number of samples
82+ num_train = 230 # Number of training samples
83+ num_test = n - num_train # Number of testing samples
84+ x_y_data = outputs_re # only outputs
85+
86+ def NormalizeData (data ):
87+ return (data - np .min (data )) / (np .max (data ) - np .min (data )) # Normalize the data
88+
89+ # Normalize datasets
90+ x_norm = NormalizeData (inputs ) # Normalize te input data
91+ x_y_data_norm = NormalizeData (x_y_data ).astype ("float32" ) # Normalize te output data
92+ y_norm = x_y_data_norm
93+
94+ # Perform PCA for input data
95+ pca = PCA (n_components = args .latent_dim ) # Create an instance of the PCA class with desired number of components
96+ x_red = pca .fit_transform (x_norm ) # Fit the PCA model to the normalized input data and transform the data into the reduced dimensional space
97+ if not os .path .exists ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/PCA/' ):
98+ os .makedirs ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/PCA/' )
99+
100+ PCA_dir = 'results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/PCA/'
101+ #plt.show()
102+ #print('All data shape:', x_norm.shape, y_norm.shape)
103+
104+ # Split to train/eval autoencoder
105+ n_train = nt * num_train
106+ n_test = nt * num_test
107+ x_y_train = x_y_data_norm [:n_train ,:]
108+ x_y_test = x_y_data_norm [n_train :,:]
109+ # x_y_train, x_y_test = train_test_split(x_y_data_norm, test_size=70/300, random_state=42)
110+ # n_train, n_test, n_all = x_y_train.shape[0], x_y_test.shape[0], x_y_data.shape[0] # for autoencoder training
111+
112+ # Reshaping the train and test sets
113+ x_y_train = torch .tensor (x_y_train .reshape (n_train , nx , ny ))
114+ r = torch .randperm (n_train )
115+ x_y_train = x_y_train [r ]
116+ x_y_test = torch .tensor (x_y_test .reshape (n_test , nx , ny ))
117+
118+ #### Create directories for results
119+ if not os .path .exists ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/plots/' ):
120+ os .makedirs ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/plots/' )
121+ if not os .path .exists ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/errors_AE/' ):
122+ os .makedirs ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/errors_AE/' )
123+ if not os .path .exists ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/data/' ):
124+ os .makedirs ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/data/' )
125+ if not os .path .exists ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/class_AE/' ):
126+ os .makedirs ('results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/class_AE/' )
127+
128+ class_AE_dir = 'results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/class_AE/'
129+
130+ #### Run autoencoders
131+ print ('Seed number:' , seed_value )
132+ print ('Autoencoder training in progress...' )
133+
134+
135+ ## Multi-layer perception (MLP)/Fully-connected autoencoder
136+ class MLAE (nn .Module ):
137+ def __init__ (self , latent_dim ):
138+ super (MLAE , self ).__init__ ()
139+ self .latent_dim = latent_dim
140+ # Define the encoder network as a sequential module
141+ self .encoder = nn .Sequential (
142+ nn .Flatten (),
143+ nn .Linear (nx * ny , 256 ),
144+ nn .ReLU (),
145+ nn .Linear (256 , 169 ),
146+ nn .ReLU (),
147+ nn .Linear (169 , 121 ),
148+ nn .ReLU (),
149+ nn .Linear (121 , latent_dim ),
150+ nn .ReLU ()
151+ )
152+ # Define the decoder network as a sequential module
153+ self .decoder = nn .Sequential (
154+ nn .Linear (latent_dim , 121 ),
155+ nn .ReLU (),
156+ nn .Linear (121 , 169 ),
157+ nn .ReLU (),
158+ nn .Linear (169 , 256 ),
159+ nn .ReLU (),
160+ nn .Linear (256 , nx * ny ),
161+ nn .Sigmoid ()
162+ )
163+
164+ def forward (self , x ):
165+ encoded = self .encoder (x ) # Perform encoder
166+ decoded = self .decoder (encoded ) # Perform decoder
167+ decoded = decoded .view (- 1 , nx , ny ) # Reshape to the desired shape
168+ return decoded
169+
170+ # control which parameters are frozen / free for optimization
171+ def free_params (module : nn .Module ):
172+ for p in module .parameters ():
173+ p .requires_grad = True
174+
175+ def frozen_params (module : nn .Module ):
176+ for p in module .parameters ():
177+ p .requires_grad = False
178+
179+ #### Autoencoder
180+
181+ autoencoder = MLAE (args .latent_dim )
182+
183+ optimizer = torch .optim .Adam (autoencoder .parameters ()) # Construct an optimizer
184+ criterion = nn .MSELoss () # Define MSE loss
185+
186+
187+
188+ # Train model
189+ epochs = args .n_epochs # epochs for training
190+ batch_size = args .bs # batch size
191+
192+ loss_history = []
193+ val_loss_history = []
194+
195+ for epoch in range (epochs ):
196+ running_loss = 0.0 # Initialize variables to store training loss
197+ for i in range (0 , len (x_y_train ), batch_size ): # Iterate over the training data batches
198+ inputs_sbatch = torch .Tensor (x_y_train [i :i + batch_size ]) # batch
199+
200+ optimizer .zero_grad () # Clear the gradients of the optimizer
201+
202+ outputs_sbatch = autoencoder (inputs_sbatch ) # Obtain output by autoencoder
203+ loss = criterion (outputs_sbatch , inputs_sbatch ) # Calculate the MSE loss
204+
205+ loss .backward () # Backward pass: compute gradients of the loss
206+ optimizer .step () # Update the model parameters based on the computed gradients
207+
208+ running_loss += loss .item ()
209+
210+ loss_history .append (running_loss ) # Loss history
211+
212+ # Evaluate on validation set
213+ val_loss = 0.0
214+ with torch .no_grad ():
215+ for i in range (0 , len (x_y_test ), batch_size ):
216+ inputs_sbatch = torch .Tensor (x_y_test [i :i + batch_size ])
217+ outputs_sbatch = autoencoder (inputs_sbatch )
218+ val_loss += criterion (outputs_sbatch , inputs_sbatch ).item ()
219+ val_loss_history .append (val_loss )
220+
221+ print (f"Epoch [{ epoch + 1 } /{ epochs } ], Loss: { running_loss :.4f} , Val Loss: { val_loss :.4f} " )
222+
223+ print ('Autoencoder training is completed.' )
224+
225+ # Compute L2 error on test data
226+ decoded_data = autoencoder (torch .Tensor (x_y_test )).reshape (n_test , nx * ny ).detach ().numpy () # Calculate the decoded data
227+ reference_data = x_y_test .reshape (n_test , nx * ny ).detach ().numpy () # Reshape the target to the desired shape
228+
229+ # L2 error
230+ errors = np .abs (decoded_data - reference_data ) # Calculate the error between the output and target labels
231+ l2_rel_err = np .linalg .norm (errors , axis = 0 )/ np .linalg .norm (reference_data , axis = 0 ) # Calculate relative L2 error
232+ l2 = np .mean (l2_rel_err ) # Calculate the mean value of the relative L2 error
233+ print ('Autoencoder relative L2 error: {}\n ' .format (round (l2 ,4 )))
234+
235+ # Mean squared error (MSE)
236+ mse = mean_squared_error (reference_data , decoded_data ) # Calculate the MSE error
237+ print ('Mean squared error (MSE): {}\n ' .format (round (mse , 4 )))
238+
239+ errordir = 'results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/errors_AE/'
240+ np .savetxt (
241+ errordir + 'error_' + str (args .method ) + '_seed_' + str (seed_value ) + '.txt' ,
242+ np .expand_dims (np .array (mse ), axis = 0 ),
243+ fmt = '%e'
244+ )
245+
246+
247+ #### Save all data
248+ # Pass ALL data through the encoder and then split
249+ x_y_red = autoencoder .encoder (torch .Tensor (x_y_data_norm .reshape (n * nt , nx , ny ))).detach ().numpy () # encoder
250+ norm_min = np .min (x_y_data ) # Calculate the minimum value of the output
251+ norm_max = np .max (x_y_data ) # Calculate the maximum value of the output
252+ # Train-test
253+ # Split x and y data
254+ y_red = x_y_red .reshape (n ,nt * args .latent_dim ) # Reshape the reduced output to the desired shape
255+
256+ # Split to train/test
257+ x_train_red , x_test_red = x_red [:num_train ], x_red [num_train :] # Split the reduced input to training input and testing input
258+ y_train_red , y_test_red = y_red [:num_train ].reshape (num_train , nt , args .latent_dim ), y_red [num_train :].reshape (num_test , nt , args .latent_dim ) # Split the reduced output to training output and testing output
259+ x_train , x_test = inputs [:num_train ], inputs [num_train :] # Split the input to training input and testing input
260+ y_train , y_test = outputs [:num_train ], outputs [num_train :] # Split the output to training output and testing output
261+
262+
263+ # Save reduced data (for DeepONet)
264+ data_dir = 'results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/data/'
265+
266+ np .savez (data_dir + 'data_d_{}.npz' .format (args .latent_dim ), latent_dim = args .latent_dim , x_train = x_train , x_test = x_test ,
267+ y_train = y_train , y_test = y_test ,
268+ x_train_red = x_train_red , x_test_red = x_test_red ,
269+ y_train_red = y_train_red , y_test_red = y_test_red ,norm_max = norm_max , norm_min = norm_min )
270+
271+ # Save autoencoder class (to use decoder later)
272+ class_AE_dir = 'results/d_' + str (args .latent_dim ) + '/' + str (args .method ) + '/class_AE/'
273+ torch .save (autoencoder ,class_AE_dir + 'class_AE_{}' .format (args .latent_dim ))
0 commit comments