diff --git a/README.md b/README.md
index 17fb2f1f70..ff0bdffb4c 100644
--- a/README.md
+++ b/README.md
@@ -186,7 +186,7 @@ python run_squad.py \
   --init_checkpoint $BERT_PYTORCH_DIR/pytorch_model.bin \
   --do_train \
   --do_predict \
-  --do_lower_case
+  --do_lower_case \
   --train_file $SQUAD_DIR/train-v1.1.json \
   --predict_file $SQUAD_DIR/dev-v1.1.json \
   --train_batch_size 12 \
@@ -217,5 +217,21 @@ To get these results that we used a combination of:
 
 Here are the full list of hyper-parameters we used for this run:
 ```bash
-python ./run_squad.py --vocab_file $BERT_LARGE_DIR/vocab.txt --bert_config_file $BERT_LARGE_DIR/bert_config.json --init_checkpoint $BERT_LARGE_DIR/pytorch_model.bin --do_lower_case --do_train --do_predict --train_file $SQUAD_TRAIN --predict_file $SQUAD_EVAL --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir $OUTPUT_DIR/bert_large_bsz_24 --train_batch_size 24 --gradient_accumulation_steps 2 --optimize_on_cpu
+python ./run_squad.py \
+  --vocab_file $BERT_LARGE_DIR/vocab.txt \
+  --bert_config_file $BERT_LARGE_DIR/bert_config.json \
+  --init_checkpoint $BERT_LARGE_DIR/pytorch_model.bin \
+  --do_lower_case \
+  --do_train \
+  --do_predict \
+  --train_file $SQUAD_TRAIN \
+  --predict_file $SQUAD_EVAL \
+  --learning_rate 3e-5 \
+  --num_train_epochs 2 \
+  --max_seq_length 384 \
+  --doc_stride 128 \
+  --output_dir $OUTPUT_DIR \
+  --train_batch_size 24 \
+  --gradient_accumulation_steps 2 \
+  --optimize_on_cpu
 ```