Commit 7a0f6a2c authored by Laia Amoros Carafi's avatar Laia Amoros Carafi
Browse files

Upload New File

parent fdf1114d
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Ideal class number of real quadratic fiels with Keras"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Prepare the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"file_path = 'number_fields_all_100coeff.csv'\n",
"number_fields = pd.read_csv(file_path, header=None, names=None)\n",
"number_fields = number_fields.sample(frac=1) # Shuffle the number fields\n",
"\n",
"len(number_fields)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"number_fields.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"# Separate features from label\n",
"X = number_fields.copy()\n",
"y = X.pop(100)\n",
"\n",
"# We convert the list of coefficients to a numpy array\n",
"y = np.array(y)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Separate training and test data\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.3, random_state=42, shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Scale the features to be between 0 and 1\n",
"X = X / X.max().max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Define the model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.1 Option 1: Decision tree with Scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"# define the model\n",
"#tree_model = DecisionTreeClassifier()\n",
"tree_model = DecisionTreeClassifier(max_leaf_nodes=50, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# feed our data to the model\n",
"tree_model.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import mean_absolute_error\n",
"\n",
"# determine how accurate the model's predictions are\n",
"y_pred = tree_model.predict(X_test)\n",
"mae = mean_absolute_error(y_test, y_pred)\n",
"mae"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import tree\n",
"\n",
"tree.plot_tree(tree_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.2 Option 2: Random forest with Scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"forest_model = RandomForestClassifier()\n",
"\n",
"# feed our data to the model\n",
"forest_model.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import mean_absolute_error\n",
"\n",
"# determine how accurate the model's predictions are\n",
"y_pred = forest_model.predict(X_test)\n",
"mae = mean_absolute_error(y_test, y_pred)\n",
"mae"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3 Option 3: Sequential neural network with Keras"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras import layers\n",
"\n",
"# Here we define the model\n",
"sequential_model = tf.keras.Sequential([\n",
" layers.Dense(128, activation=\"relu\"),\n",
" layers.Dense(64, activation=\"relu\"),\n",
" layers.Dense(23, activation=\"relu\"),\n",
" layers.Dense(1, activation=\"sigmoid\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sequential_model.compile(\n",
" loss ='binary_crossentropy',\n",
" optimizer = 'adam', \n",
" metrics=['binary_accuracy']\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"history = sequential_model.fit(\n",
" X_train,\n",
" y_train,\n",
" validation_data=(X_test, y_test),\n",
" epochs=10,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"history_frame = pd.DataFrame(history.history)\n",
"history_frame.loc[:, ['loss', 'val_loss']].plot()\n",
"history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_loss, test_acc = sequential_model.evaluate(X_test, y_test, verbose=2)\n",
"print('\\nTest accuracy:', test_acc)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the paper ``Machine-Learning number fields``, https://arxiv.org/abs/2011.08958, the authors claim that with 1000 coefficients of the Dedekind zeta function they achieve an accuracy of 96%."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment