diff options
Diffstat (limited to 'gen/Untitled.ipynb')
-rw-r--r-- | gen/Untitled.ipynb | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/gen/Untitled.ipynb b/gen/Untitled.ipynb new file mode 100644 index 0000000..5eb6fdc --- /dev/null +++ b/gen/Untitled.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 50, + "id": "9fdccc99-04bd-4d5b-b632-101b669ffb7e", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "import pandas as pd\n", + "import pyarrow as pa\n", + "import pyarrow.parquet as pq\n", + "import pyarrow.dataset as ds\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "a10bff4c-f5f6-4102-8151-2c4020b9d2d6", + "metadata": {}, + "outputs": [], + "source": [ + "n = 10000\n", + "df = ds.dataset('data/').scanner().head(n).to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "b07c097e-6857-4fad-900a-a9643fb5dc9d", + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(n):\n", + " row = df.iloc[i]\n", + " with open(f'corpus/{i}.jpg', 'wb') as f:\n", + " f.write(row.image['bytes'])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "2080bb6f-44df-4b45-8bc2-308105b89e09", + "metadata": {}, + "outputs": [], + "source": [ + "artists = {\n", + " i: int(df.iloc[i].artist) for i in range(n)\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "e91afe69-1722-426f-862e-b3ce98207d35", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "with open(f'artists.js', 'w') as f:\n", + " f.write('var artists = ' + json.dumps(artists))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f44450c8-30c5-4fc1-90a8-53e33b94ee65", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "649daac5-2a2c-4af2-8120-e5b80838bd9b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} |