update script to train fusion model for 1800 words

85f73997 · zalashub · 83e4d5b0 · 85f73997
Commit 85f73997 authored 3 years ago by zalashub
--- a/fairseq-hsg/train-job-02.sh
+++ b/fairseq-hsg/train-job-02.sh
 #!/bin/bash
-#SBATCH --time 10080          # time in minutes to reserve - 10080 is 1 week, 8640 is 6 days
+#SBATCH --time 4320          # time in minutes to reserve - 10080 is 1 week, 8640 is 6 days, 4320 is 3 days
 #SBATCH --cpus-per-task 4  # number of cpu cores
 #SBATCH --mem 50G          # memory pool for all cores
 #SBATCH --gres gpu:2      # number of gpu cores
-#SBATCH  -o train-1000w-02.log       # log output
+#SBATCH  -o train-1800w-02.log       # log output

 # Initialise conda environment.
 eval "$(conda shell.bash hook)"
@@ -11,19 +11,16 @@ conda activate story

 # Run the job to train the model - the first of the two seq2seq.
 srun -l fairseq-train data-bin/scifi-stories -a fconv_self_att_wp --lr 0.25 --optimizer nag \
--clip-norm 0.1 --max-source-positions 1024 --max-target-positions 1024 --max-tokens 1500 --max-epoch 70 \
+--clip-norm 0.1 --max-source-positions 2048 --max-target-positions 2048 --max-tokens 2100 --max-epoch 35 \
 --lr-scheduler reduce_lr_on_plateau --decoder-attention True --encoder-attention False \
 --criterion label_smoothed_cross_entropy --weight-decay .0000001 --label-smoothing 0 \
 --source-lang wp_source --target-lang wp_target --gated-attention True --self-attention True --project-input True \
--pretrained True --pretrained-checkpoint checkpoints/checkpoint19.pt --save-dir fusion_checkpoints
+--pretrained True --pretrained-checkpoint checkpoints/checkpoint_best.pt --save-dir fusion_checkpoints

 # ---- NOTE ----- #
-##### Because I'm using target text that is 2000 characters long, the --max-target-positions need to be increased from 1024 
+##### Because I'm using target text that is 1800 characters long, the --max-target-positions need to be increased from 1024 
 ##### (did this for the --max-source-positions too).
-##### I've also increased the --max-tokens to 2200 from 1500 for this reason.
-
-#### ----- The above currently does not apply, because I've kept the 1000 words length. ----- #####
-
+##### I've also increased the --max-tokens to 2100 from 1500 for this reason.

 # Train a fusion model:
 # add the arguments: --pretrained True --pretrained-checkpoint path/to/checkpoint