Commit 7a0f6a2c by Laia Amoros Carafi

Upload New File

parent fdf1114d
 { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Ideal class number of real quadratic fiels with Keras" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Prepare the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "file_path = 'number_fields_all_100coeff.csv'\n", "number_fields = pd.read_csv(file_path, header=None, names=None)\n", "number_fields = number_fields.sample(frac=1) # Shuffle the number fields\n", "\n", "len(number_fields)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "number_fields.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "# Separate features from label\n", "X = number_fields.copy()\n", "y = X.pop(100)\n", "\n", "# We convert the list of coefficients to a numpy array\n", "y = np.array(y)\n", "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Separate training and test data\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.3, random_state=42, shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Scale the features to be between 0 and 1\n", "X = X / X.max().max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Define the model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.1 Option 1: Decision tree with Scikit-learn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "\n", "# define the model\n", "#tree_model = DecisionTreeClassifier()\n", "tree_model = DecisionTreeClassifier(max_leaf_nodes=50, random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# feed our data to the model\n", "tree_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_absolute_error\n", "\n", "# determine how accurate the model's predictions are\n", "y_pred = tree_model.predict(X_test)\n", "mae = mean_absolute_error(y_test, y_pred)\n", "mae" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, y_pred))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn import tree\n", "\n", "tree.plot_tree(tree_model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.2 Option 2: Random forest with Scikit-learn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "forest_model = RandomForestClassifier()\n", "\n", "# feed our data to the model\n", "forest_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_absolute_error\n", "\n", "# determine how accurate the model's predictions are\n", "y_pred = forest_model.predict(X_test)\n", "mae = mean_absolute_error(y_test, y_pred)\n", "mae" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, y_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3 Option 3: Sequential neural network with Keras" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow.keras import layers\n", "\n", "# Here we define the model\n", "sequential_model = tf.keras.Sequential([\n", " layers.Dense(128, activation=\"relu\"),\n", " layers.Dense(64, activation=\"relu\"),\n", " layers.Dense(23, activation=\"relu\"),\n", " layers.Dense(1, activation=\"sigmoid\")\n", "])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sequential_model.compile(\n", " loss ='binary_crossentropy',\n", " optimizer = 'adam', \n", " metrics=['binary_accuracy']\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "history = sequential_model.fit(\n", " X_train,\n", " y_train,\n", " validation_data=(X_test, y_test),\n", " epochs=10,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "history_frame = pd.DataFrame(history.history)\n", "history_frame.loc[:, ['loss', 'val_loss']].plot()\n", "history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_loss, test_acc = sequential_model.evaluate(X_test, y_test, verbose=2)\n", "print('\\nTest accuracy:', test_acc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the paper ``Machine-Learning number fields``, https://arxiv.org/abs/2011.08958, the authors claim that with 1000 coefficients of the Dedekind zeta function they achieve an accuracy of 96%." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment