|
142 | 142 | "tuner = kt.BayesianOptimization(\n",
|
143 | 143 | " build_model,\n",
|
144 | 144 | " objective=\"val_accuracy\",\n",
|
145 |
| - " max_trials=100,\n", |
| 145 | + " max_trials=20,\n", |
146 | 146 | " executions_per_trial=2,\n",
|
147 | 147 | " directory=\"mnist_kt_test\",\n",
|
148 | 148 | " overwrite=True,\n",
|
|
241 | 241 | "source": [
|
242 | 242 | "def get_best_trained_model(hp):\n",
|
243 | 243 | " best_epoch = get_best_epoch(hp)\n",
|
| 244 | + " model = build_model(hp)\n", |
244 | 245 | " model.fit(\n",
|
245 | 246 | " x_train_full, y_train_full, batch_size=128, epochs=int(best_epoch * 1.2)\n",
|
246 | 247 | " )\n",
|
|
327 | 328 | "##### Model parallelism: split your model across multiple GPUs"
|
328 | 329 | ]
|
329 | 330 | },
|
330 |
| - { |
331 |
| - "cell_type": "code", |
332 |
| - "execution_count": 0, |
333 |
| - "metadata": { |
334 |
| - "colab_type": "code" |
335 |
| - }, |
336 |
| - "outputs": [], |
337 |
| - "source": [ |
338 |
| - "model = keras.Sequential(\n", |
339 |
| - " [\n", |
340 |
| - " keras.layers.Input(shape=(16000,)),\n", |
341 |
| - " keras.layers.Dense(64000, activation=\"relu\"),\n", |
342 |
| - " keras.layers.Dense(8000, activation=\"sigmoid\"),\n", |
343 |
| - " ]\n", |
344 |
| - ")" |
345 |
| - ] |
346 |
| - }, |
347 |
| - { |
348 |
| - "cell_type": "code", |
349 |
| - "execution_count": 0, |
350 |
| - "metadata": { |
351 |
| - "colab_type": "code" |
352 |
| - }, |
353 |
| - "outputs": [], |
354 |
| - "source": [ |
355 |
| - "half_kernel_0 = kernel[:, :32000]\n", |
356 |
| - "half_bias_0 = bias[:32000]\n", |
357 |
| - "\n", |
358 |
| - "half_kernel_1 = kernel[:, 32000:]\n", |
359 |
| - "half_bias_1 = bias[32000:]\n", |
360 |
| - "\n", |
361 |
| - "with keras.device(\"gpu:0\"):\n", |
362 |
| - " half_output_0 = keras.ops.matmul(inputs, half_kernel_0) + half_bias_0\n", |
363 |
| - "\n", |
364 |
| - "with keras.device(\"gpu:1\"):\n", |
365 |
| - " half_output_1 = keras.ops.matmul(inputs, half_kernel_1) + half_bias_1" |
366 |
| - ] |
367 |
| - }, |
368 | 331 | {
|
369 | 332 | "cell_type": "markdown",
|
370 | 333 | "metadata": {
|
|
392 | 355 | "###### Using data parallelism with JAX"
|
393 | 356 | ]
|
394 | 357 | },
|
395 |
| - { |
396 |
| - "cell_type": "code", |
397 |
| - "execution_count": 0, |
398 |
| - "metadata": { |
399 |
| - "colab_type": "code" |
400 |
| - }, |
401 |
| - "outputs": [], |
402 |
| - "source": [ |
403 |
| - "keras.distribution.set_distribution(keras.distribution.DataParallel())" |
404 |
| - ] |
405 |
| - }, |
406 |
| - { |
407 |
| - "cell_type": "code", |
408 |
| - "execution_count": 0, |
409 |
| - "metadata": { |
410 |
| - "colab_type": "code" |
411 |
| - }, |
412 |
| - "outputs": [], |
413 |
| - "source": [ |
414 |
| - "keras.distribution.list_devices()" |
415 |
| - ] |
416 |
| - }, |
417 |
| - { |
418 |
| - "cell_type": "code", |
419 |
| - "execution_count": 0, |
420 |
| - "metadata": { |
421 |
| - "colab_type": "code" |
422 |
| - }, |
423 |
| - "outputs": [], |
424 |
| - "source": [ |
425 |
| - "keras.distribution.set_distribution(\n", |
426 |
| - " keras.distribution.DataParallel([\"gpu:0\", \"gpu:1\"])\n", |
427 |
| - ")" |
428 |
| - ] |
429 |
| - }, |
430 | 358 | {
|
431 | 359 | "cell_type": "markdown",
|
432 | 360 | "metadata": {
|
|
436 | 364 | "###### Using model parallelism with JAX"
|
437 | 365 | ]
|
438 | 366 | },
|
439 |
| - { |
440 |
| - "cell_type": "code", |
441 |
| - "execution_count": 0, |
442 |
| - "metadata": { |
443 |
| - "colab_type": "code" |
444 |
| - }, |
445 |
| - "outputs": [], |
446 |
| - "source": [ |
447 |
| - "mesh = keras.distribution.DeviceMesh(\n", |
448 |
| - " shape=(2, 4),\n", |
449 |
| - " axis_names=[\"data\", \"model\"],\n", |
450 |
| - ")" |
451 |
| - ] |
452 |
| - }, |
453 |
| - { |
454 |
| - "cell_type": "code", |
455 |
| - "execution_count": 0, |
456 |
| - "metadata": { |
457 |
| - "colab_type": "code" |
458 |
| - }, |
459 |
| - "outputs": [], |
460 |
| - "source": [ |
461 |
| - "devices = [f\"gpu:{i}\" for i in range(8)]\n", |
462 |
| - "mesh = keras.distribution.DeviceMesh(\n", |
463 |
| - " shape=(2, 4),\n", |
464 |
| - " axis_names=[\"data\", \"model\"],\n", |
465 |
| - " devices=devices,\n", |
466 |
| - ")" |
467 |
| - ] |
468 |
| - }, |
469 |
| - { |
470 |
| - "cell_type": "code", |
471 |
| - "execution_count": 0, |
472 |
| - "metadata": { |
473 |
| - "colab_type": "code" |
474 |
| - }, |
475 |
| - "outputs": [], |
476 |
| - "source": [ |
477 |
| - "for v in model.variables:\n", |
478 |
| - " print(v.path)" |
479 |
| - ] |
480 |
| - }, |
481 |
| - { |
482 |
| - "cell_type": "code", |
483 |
| - "execution_count": 0, |
484 |
| - "metadata": { |
485 |
| - "colab_type": "code" |
486 |
| - }, |
487 |
| - "outputs": [], |
488 |
| - "source": [ |
489 |
| - "sequential/dense/kernel\n", |
490 |
| - "sequential/dense/bias\n", |
491 |
| - "sequential/dense_1/kernel\n", |
492 |
| - "sequential/dense_1/bias" |
493 |
| - ] |
494 |
| - }, |
495 |
| - { |
496 |
| - "cell_type": "code", |
497 |
| - "execution_count": 0, |
498 |
| - "metadata": { |
499 |
| - "colab_type": "code" |
500 |
| - }, |
501 |
| - "outputs": [], |
502 |
| - "source": [ |
503 |
| - "layout_map = keras.distribution.LayoutMap(device_mesh)\n", |
504 |
| - "layout_map[\"sequential/dense/kernel\"] = (None, \"model\")\n", |
505 |
| - "layout_map[\"sequential/dense/bias\"] = (\"model\",)\n", |
506 |
| - "layout_map[\"sequential/dense_1/kernel\"] = (None, \"model\")\n", |
507 |
| - "layout_map[\"sequential/dense_1/bias\"] = (\"model\",)" |
508 |
| - ] |
509 |
| - }, |
510 |
| - { |
511 |
| - "cell_type": "code", |
512 |
| - "execution_count": 0, |
513 |
| - "metadata": { |
514 |
| - "colab_type": "code" |
515 |
| - }, |
516 |
| - "outputs": [], |
517 |
| - "source": [ |
518 |
| - "model_parallel = keras.distribution.ModelParallel(\n", |
519 |
| - " layout_map=layout_map,\n", |
520 |
| - " batch_dim_name=\"data\",\n", |
521 |
| - ")\n", |
522 |
| - "keras.distribution.set_distribution(model_parallel)" |
523 |
| - ] |
524 |
| - }, |
525 |
| - { |
526 |
| - "cell_type": "code", |
527 |
| - "execution_count": 0, |
528 |
| - "metadata": { |
529 |
| - "colab_type": "code" |
530 |
| - }, |
531 |
| - "outputs": [], |
532 |
| - "source": [ |
533 |
| - "model.layers[0].kernel.value.sharding" |
534 |
| - ] |
535 |
| - }, |
536 |
| - { |
537 |
| - "cell_type": "code", |
538 |
| - "execution_count": 0, |
539 |
| - "metadata": { |
540 |
| - "colab_type": "code" |
541 |
| - }, |
542 |
| - "outputs": [], |
543 |
| - "source": [ |
544 |
| - "import jax\n", |
545 |
| - "\n", |
546 |
| - "value = model.layers[0].kernel.value\n", |
547 |
| - "jax.debug.visualize_sharding(value.shape, value.sharding)" |
548 |
| - ] |
549 |
| - }, |
550 | 367 | {
|
551 | 368 | "cell_type": "markdown",
|
552 | 369 | "metadata": {
|
|
592 | 409 | "##### Float16 inference"
|
593 | 410 | ]
|
594 | 411 | },
|
595 |
| - { |
596 |
| - "cell_type": "code", |
597 |
| - "execution_count": 0, |
598 |
| - "metadata": { |
599 |
| - "colab_type": "code" |
600 |
| - }, |
601 |
| - "outputs": [], |
602 |
| - "source": [ |
603 |
| - "import keras\n", |
604 |
| - "\n", |
605 |
| - "keras.config.set_dtype_policy(\"float16\")" |
606 |
| - ] |
607 |
| - }, |
608 | 412 | {
|
609 | 413 | "cell_type": "markdown",
|
610 | 414 | "metadata": {
|
|
614 | 418 | "##### Mixed-precision training"
|
615 | 419 | ]
|
616 | 420 | },
|
617 |
| - { |
618 |
| - "cell_type": "code", |
619 |
| - "execution_count": 0, |
620 |
| - "metadata": { |
621 |
| - "colab_type": "code" |
622 |
| - }, |
623 |
| - "outputs": [], |
624 |
| - "source": [ |
625 |
| - "import keras\n", |
626 |
| - "\n", |
627 |
| - "keras.config.set_dtype_policy(\"mixed_float16\")" |
628 |
| - ] |
629 |
| - }, |
630 | 421 | {
|
631 | 422 | "cell_type": "markdown",
|
632 | 423 | "metadata": {
|
|
636 | 427 | "##### Using loss scaling with mixed precision"
|
637 | 428 | ]
|
638 | 429 | },
|
639 |
| - { |
640 |
| - "cell_type": "code", |
641 |
| - "execution_count": 0, |
642 |
| - "metadata": { |
643 |
| - "colab_type": "code" |
644 |
| - }, |
645 |
| - "outputs": [], |
646 |
| - "source": [ |
647 |
| - "optimizer = keras.optimizers.Adam(learning_rate=1e-3, loss_scale_factor=10)" |
648 |
| - ] |
649 |
| - }, |
650 |
| - { |
651 |
| - "cell_type": "code", |
652 |
| - "execution_count": 0, |
653 |
| - "metadata": { |
654 |
| - "colab_type": "code" |
655 |
| - }, |
656 |
| - "outputs": [], |
657 |
| - "source": [ |
658 |
| - "optimizer = keras.optimizers.LossScaleOptimizer(\n", |
659 |
| - " keras.optimizers.Adam(learning_rate=1e-3)\n", |
660 |
| - ")" |
661 |
| - ] |
662 |
| - }, |
663 | 430 | {
|
664 | 431 | "cell_type": "markdown",
|
665 | 432 | "metadata": {
|
|
746 | 513 | "ops.matmul(x, kernel)"
|
747 | 514 | ]
|
748 | 515 | },
|
749 |
| - { |
750 |
| - "cell_type": "code", |
751 |
| - "execution_count": 0, |
752 |
| - "metadata": { |
753 |
| - "colab_type": "code" |
754 |
| - }, |
755 |
| - "outputs": [], |
756 |
| - "source": [ |
757 |
| - "model = ...\n", |
758 |
| - "model.quantize(\"int8\")\n", |
759 |
| - "predictions = model.predict(...)" |
760 |
| - ] |
761 |
| - }, |
762 | 516 | {
|
763 | 517 | "cell_type": "markdown",
|
764 | 518 | "metadata": {
|
|
0 commit comments