- class light_pfp_autogen.config.ActiveLearningConfig(*, task_name: str, init_dataset: Sequence[Union[Path, str]], pfp_model_version: str, pfp_calc_mode: str, train_config: TrainConfig = None, training_time: float = 0.5, work_dir: Union[str, Path] = None, sample_config: SampleConfig = None)#
-
Bases:
BaseModel
The configuration for active learning
- task_name#
-
The name of the task. The model name will be {task_name}_N
where N is the iteration index. Default is “active_learning”.- Type
-
str
- init_dataset#
-
The initial dataset for the first iteration.
- Type
-
Sequence[Union[Path, str]]
- pfp_model_version#
-
The version of the PFP model for data augmentation.
- Type
-
str
- pfp_calc_mode#
-
The calculation mode of the PFP model for data augmentation.
- Type
-
str
- train_config#
-
The configuration for training. Default is TrainConfig().
- Type
- training_time#
-
The time required for model updates during each active learning iteration
in hours. The total number of epochs for model training is adjusted according
to the size of the current dataset while keeping the training time roughly
around the training_time hours. Defaults to 0.5.- Type
-
float
- work_dir#
-
The working directory to store training data, etc, during the iterations.
Defaults to ./autogen_workdir.- Type
-
Union[str, Path]
- sample_config#
-
The configuration for selecting new training data. Default is SampleConfig().
- Type
- classmethod from_dict(config_dict: Dict) ActiveLearningConfig #
- classmethod from_json(config_file: str) ActiveLearningConfig #
- init_dataset: Sequence[Union[Path, str]]#
- pfp_calc_mode: str#
- pfp_model_version: str#
- sample_config: SampleConfig#
- task_name: str#
- to_json(config_file: str) None #
- train_config: TrainConfig#
- training_time: float#
- classmethod validate_init_dataset(value: Sequence[Path]) Sequence[Path] #
- classmethod validate_training_time(value: float) float #
- work_dir: Union[str, Path]#
- class light_pfp_autogen.config.CLIConfig(*, task_name: str, init_dataset: Sequence[Union[Path, str]], pfp_model_version: str, pfp_calc_mode: str, train_config: TrainConfig = None, training_time: float = 0.5, work_dir: Union[str, Path] = None, sample_config: SampleConfig = None, md_script: str, num_iteration: int = 10)#
-
Bases:
ActiveLearningConfig
The configuration for active learning CLI
- md_script#
-
The path to the MD script.
- Type
-
str
- num_iteration#
-
The number of iterations for active learning. Default is 10.
- Type
-
int
- classmethod check_md_script(value: str) str #
- classmethod check_num_iteration(value: int) int #
- md_script: str#
- num_iteration: int#
- class light_pfp_autogen.config.CommonConfig(*, total_epoch: int = 0, batch_size: int = 128, reload_model: str = ”, lr_scheduling: str = ‘linear_warmup’, max_energy: float = 1e+38, max_forces: float = 1e+38)#
-
Bases:
BaseModel
The common configuration for training
- total_epoch#
-
Number of epochs the entire multistage training process will run for.
Divided in a 0.25:0.5:0.25 ratio between stage1/stage2/calibration stages.
Default is 0.- Type
-
int
- batch_size#
-
Number of training structures used for one time of parameter update.
Default is 128.- Type
-
int
- reload_model#
-
If a model ID is provided, the training job will be started from the given model.
If empty, the parameters of the model will be randomly initialized.
Default is “”.- Type
-
str
- lr_scheduling#
-
The learning rate scheduler. Support “step”, “linear_warmup” and “None”.
Default is “linear_warmup”.- Type
-
str
- max_energy#
-
The max energy per atom allowed in the training dataset. The structure will be
skipped if the energy is larger than this value. Default is 1e38.- Type
-
float
- max_forces#
-
The max forces allowed in the training dataset. The structure will be skipped if
the force acting on one atom is larger than this value. Default is 1e38.- Type
-
float
- batch_size: int#
- lr_scheduling: str#
- max_energy: float#
- max_forces: float#
- reload_model: str#
- total_epoch: int#
- class light_pfp_autogen.config.MTPConfig(*, model_version: str = ‘v1.0’, pretrained_model: str = ”, rc: float = 6.0, levmax: int = 8, moment_init_cost: int = 2, moment_mu_cost: int = 1, moment_nu_cost: int = 1)#
-
Bases:
BaseModel
The configuration for MTP model
- model_version#
-
The version of the MTP model. Default is “v1.0”.
- Type
-
str
- pretrained_model#
-
Use a pre-trained model. If empty, the pre-trained model is not used. Default is “”.
- Type
-
str
- rc#
-
The cutoff distance to define the local environment of one atom. Default is 6.0.
- Type
-
float
- levmax#
-
The argument to control the complexity of the moment descriptor. Default is 8.
- Type
-
int
- moment_init_cost#
-
The argument to control the complexity of the moment descriptor. Default is 2.
- Type
-
int
- moment_mu_cost#
-
The argument to control the complexity of the moment descriptor. Default is 1.
- Type
-
int
- moment_nu_cost#
-
The argument to control the complexity of the moment descriptor. Default is 1.
- Type
-
int
- levmax: int#
- model_version: str#
- moment_init_cost: int#
- moment_mu_cost: int#
- moment_nu_cost: int#
- pretrained_model: str#
- rc: float#
- class light_pfp_autogen.config.SampleConfig(*, dE_min_coef: Optional[float] = None, dE_min: Optional[float] = None, dE_max_coef: Optional[float] = None, dE_max: Optional[float] = None, dF_min_coef: Optional[float] = None, dF_min: Optional[float] = None, dF_max_coef: Optional[float] = None, dF_max: Optional[float] = None, dS_min_coef: Optional[float] = None, dS_min: Optional[float] = None, dS_max_coef: Optional[float] = None, dS_max: Optional[float] = None, pfp_fallback_samples: int = 5)#
-
Bases:
BaseModel
The criteria for selecting new training data in active learning
- dE_min_coef#
-
Minimum threshold for energy error, as a multiple of MAE.
- Type
-
float, Optional
- dE_min#
-
Minimum threshold for energy error, absolute value (eV/atom).
- Type
-
float, Optional
- dE_max_coef#
-
Maximum threshold for energy error, as a multiple of MAE.
- Type
-
float, Optional
- dE_max#
-
Maximum threshold for energy error, absolute value (eV/atom).
- Type
-
float, Optional
- dF_min_coef#
-
Minimum threshold for forces error, as a multiple of MAE.
- Type
-
float, Optional
- dF_min#
-
Minimum threshold for forces error, absolute value (eV/Å).
- Type
-
float, Optional
- dF_max_coef#
-
Maximum threshold for forces error, as a multiple of MAE.
- Type
-
float, Optional
- dF_max#
-
Maximum threshold for forces error, absolute value (eV/Å).
- Type
-
float, Optional
- dS_min_coef#
-
Minimum threshold for stress error, as a multiple of MAE.
- Type
-
float, Optional
- dS_min#
-
Minimum threshold for stress error, absolute value (GPa).
- Type
-
float, Optional
- dS_max_coef#
-
Maximum threshold for stress error, as a multiple of MAE.
- Type
-
float, Optional
- dS_max#
-
Maximum threshold for stress error, absolute value (GPa).
- Type
-
float, Optional
- pfp_fallback_samples#
-
Number of additional training structures to collect after switching to PFP calculator.
If 0, the PFP-based fallback mechanism is disabled. Defaults to 5.- Type
-
int, Optional
- dE_max: Optional[float]#
- dE_max_coef: Optional[float]#
- dE_min: Optional[float]#
- dE_min_coef: Optional[float]#
- dF_max: Optional[float]#
- dF_max_coef: Optional[float]#
- dF_min: Optional[float]#
- dF_min_coef: Optional[float]#
- dS_max: Optional[float]#
- dS_max_coef: Optional[float]#
- dS_min: Optional[float]#
- dS_min_coef: Optional[float]#
- get_sample_range(energy_mae: float, forces_mae: float, stress_mae: float) SampleRange #
-
Get the sample range based on the MAE of energy/forces/stress
- pfp_fallback_samples: int#
- validate_at_least_one_threshold() SampleConfig #
- classmethod validate_pfp_fallback_samples(value: int) int #
- class light_pfp_autogen.config.TrainConfig(*, common_config: CommonConfig = None, mtp_config: MTPConfig = None)#
-
Bases:
BaseModel
The configuration for training
- common_config#
-
The common configuration for training.
- Type
- common_config: CommonConfig#
- classmethod from_dict(config_dict: Dict) TrainConfig #
-
Get the configuration from a dictionary
:param config_dict: The configuration dictionary
:type config_dict: Dict- Returns
-
The configuration object
- Return type
- classmethod from_json(config_file: str) TrainConfig #
-
Get the configuration from a json file
:param config_file: The path to the json file
:type config_file: str- Returns
-
The configuration object
- Return type
- to_dict() Dict #
- to_json(config_file: str) None #