summaryrefslogtreecommitdiff
path: root/generators
diff options
context:
space:
mode:
Diffstat (limited to 'generators')
-rw-r--r--generators/datageneration.ipynb552
1 files changed, 194 insertions, 358 deletions
diff --git a/generators/datageneration.ipynb b/generators/datageneration.ipynb
index 457cc1d..ee65ead 100644
--- a/generators/datageneration.ipynb
+++ b/generators/datageneration.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 1,
"id": "e1b17564-0abb-41c5-8cf4-7200b014550f",
"metadata": {},
"outputs": [
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 154,
+ "execution_count": 2,
"id": "8fe45bc7-a41a-49db-9067-700254f388c0",
"metadata": {},
"outputs": [],
@@ -41,53 +41,63 @@
},
{
"cell_type": "code",
- "execution_count": 229,
+ "execution_count": 3,
"id": "c75240e0-8392-4a7b-9999-dc528b3d17a1",
"metadata": {},
"outputs": [],
"source": [
"from collections import defaultdict\n",
- "dsynonyms = defaultdict(set)\n",
- "n=0\n",
"\n",
- "for word in wn.words():\n",
- " n+=1\n",
- " synsets = wn.synsets(word)\n",
- " synonymss = wn.synonyms(word)\n",
- " syns = set()\n",
- " for synset, synonyms in zip(synsets, synonymss):\n",
- " if synset.pos() in ['a', 's']:\n",
- " syns |= set(synonyms)\n",
- " if len(syns) >= 4:\n",
- " clues = [format(clue) for clue in syns]\n",
- " ok = True\n",
- " for clue in clues:\n",
- " if clue in dsynonyms:\n",
- " ok = False\n",
- " if ok:\n",
+ "def groups_for_pos(pos='as'):\n",
+ " dsynonyms = []\n",
+ " for word in wn.words():\n",
+ " synsets = wn.synsets(word)\n",
+ " synonymss = wn.synonyms(word)\n",
+ " syns = set()\n",
+ " for synset, synonyms in zip(synsets, synonymss):\n",
+ " if synset.pos() in pos: # 'as'\n",
+ " syns |= set(synonyms)\n",
+ " if len(syns) >= 4:\n",
+ " clues = [format(clue) for clue in syns]\n",
+ " \n",
" clues.append(format(word))\n",
- " dsynonyms[word] = dict(group=f'synonyms for {format(word)}', clues=clues)"
+ " dsynonyms.append(dict(answer=word, hint=f'synonyms for {format(word)}', clues=clues))\n",
+ " \"\"\" \n",
+ " ok = True\n",
+ " for clue in clues:\n",
+ " if clue in dsynonyms:\n",
+ " ok = False\n",
+ " if ok:\n",
+ " clues.append(format(word))\n",
+ " dsynonyms[word] = dict(group=f'synonyms for {format(word)}', clues=clues)\n",
+ " \"\"\"\n",
+ " return dsynonyms\n",
+ "\n",
+ "dadj = groups_for_pos('as')\n",
+ "dverb = groups_for_pos('v')\n",
+ "dnoun = groups_for_pos('n')\n",
+ "dadverb = groups_for_pos('r')"
]
},
{
"cell_type": "code",
- "execution_count": 230,
+ "execution_count": 4,
"id": "7e552fc8-03b2-4b8f-b6f6-072d580702bc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[Synset('spanish_lime.n.01'), Synset('genip.n.02')]"
+ "(3976, 7141, 19563, 490)"
]
},
- "execution_count": 230,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "wn.synsets('genip')"
+ "len(dadj), len(dverb), len(dnoun), len(dadverb)"
]
},
{
@@ -100,7 +110,7 @@
},
{
"cell_type": "code",
- "execution_count": 231,
+ "execution_count": 5,
"id": "48233554-2634-4a5e-9013-4e45c6f7d3d9",
"metadata": {},
"outputs": [],
@@ -126,7 +136,7 @@
},
{
"cell_type": "code",
- "execution_count": 232,
+ "execution_count": 6,
"id": "e588bdf3-d648-48b3-ab6b-027a07194292",
"metadata": {},
"outputs": [],
@@ -136,7 +146,7 @@
},
{
"cell_type": "code",
- "execution_count": 243,
+ "execution_count": 7,
"id": "b27aa837-73d2-4b10-826b-990e12a3f7e2",
"metadata": {},
"outputs": [],
@@ -146,78 +156,146 @@
},
{
"cell_type": "code",
- "execution_count": 244,
+ "execution_count": 8,
"id": "176e2790-560c-4daf-b436-a1771611c4bf",
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(df[df.correctParse == 'no'].index)\n",
- "df = df.drop(df[df.isCommonstim == 'no'].index)"
+ "df = df.drop(df[df.isCommonstim == 0].index)"
]
},
{
"cell_type": "code",
- "execution_count": 235,
+ "execution_count": 9,
"id": "64ccaf3d-9743-49ed-b10b-d7b3e70e0235",
"metadata": {},
"outputs": [],
"source": [
"prefixes = df.groupby('c1').groups\n",
"suffixes = df.groupby('c2').groups\n",
- "pres = []\n",
+ "dprefix = []\n",
"for prefix, ids in prefixes.items():\n",
" if len(ids) >= 4:\n",
- " pres.append((prefix, list(df.loc[list(ids)].c2)))\n",
- "sufs = []\n",
+ " dprefix.append(dict(answer=prefix, hint=f'{prefix} _', clues=list(df.loc[list(ids)].c2)))\n",
+ "dsuffix = []\n",
"for suffix, ids in suffixes.items():\n",
" if len(ids) >= 4:\n",
- " sufs.append((suffix, list(df.loc[list(ids)].c1)))"
+ " dsuffix.append(dict(answer=suffix, hint=f'_ {suffix}', clues=list(df.loc[list(ids)].c1)))"
]
},
{
"cell_type": "code",
- "execution_count": 236,
+ "execution_count": null,
"id": "86c69c9f-bc6a-4dd1-9eb3-ab37ad766586",
"metadata": {},
"outputs": [],
- "source": [
- "dprefix = {}\n",
- "for prefix, ids in pres:\n",
- " res = set()\n",
- " for id in ids:\n",
- " if (id[-1] == 's' and id[:-1] in ids) or (ids[-2:] == 'es' and ids[:-2] in ids):\n",
- " continue\n",
- " res.add(id)\n",
- " if len(res) < 4:\n",
- " continue\n",
- " dprefix[prefix] = dict(group=f'{prefix} _', clues=list(res))\n",
- "\n",
- "dsuffix = {}\n",
- "for suffix, ids in sufs:\n",
- " if (suffix[-1] == 's' and suffix[:-1] in dsuffix) or (suffix[-2:] == 'es' and suffix[:-2] in ids):\n",
- " #dsuffix[suffix[:-1]] = set(ids)\n",
- " continue\n",
- " if len(ids) < 4:\n",
- " continue\n",
- " dsuffix[suffix] = dict(group=f'_ {suffix}', clues=ids)"
- ]
+ "source": []
},
{
"cell_type": "code",
- "execution_count": 237,
+ "execution_count": 11,
"id": "def43999-d789-4e5c-bb27-4fd29074c875",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "IndentationError",
+ "evalue": "unindent does not match any outer indentation level (<tokenize>, line 84)",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;36m File \u001b[0;32m<tokenize>:84\u001b[0;36m\u001b[0m\n\u001b[0;31m r = ratio(a, b)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unindent does not match any outer indentation level\n"
+ ]
+ }
+ ],
"source": [
"from Levenshtein import ratio\n",
"def similar(a, b):\n",
- " return ratio(a, b) >= .8\n",
- "import inflect\n",
+ " a, b = a.lower(), b.lower()\n",
+ " if len(a) > len(b):\n",
+ " a, b = b, a\n",
+ " if a + 's' == b or a + 'es' == b or a + 'ing' == b or a + 'ly' == b\n",
+ "Skip to Main\n",
+ "datageneration\n",
+ "Last Checkpoint: 11 days ago\n",
+ "[Python 3 (ipykernel)]\n",
+ "import json\n",
+ "import nltk\n",
+ "from nltk.corpus import wordnet as wn\n",
+ "from nltk.stem.wordnet import WordNetLemmatizer\n",
+ "nltk.download('wordnet')\n",
+ "from nltk.corpus import words\n",
+ "nltk.download('words')\n",
+ "ww = words.words()\n",
"\n",
- "p = inflect.engine()\n",
+ "[nltk_data] Downloading package wordnet to /home/sy/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package words to /home/sy/nltk_data...\n",
+ "[nltk_data] Package words is already up-to-date!\n",
"\n",
- "def normalize(w):\n",
- " pass\n",
+ "def format(s):\n",
+ " return ' '.join(s.split('_'))\n",
+ "from collections import defaultdict\n",
+ "\n",
+ "def groups_for_pos(pos='as'):\n",
+ " dsynonyms = []\n",
+ " for word in wn.words():\n",
+ " synsets = wn.synsets(word)\n",
+ " synonymss = wn.synonyms(word)\n",
+ " syns = set()\n",
+ " for synset, synonyms in zip(synsets, synonymss):\n",
+ " if synset.pos() in pos: # 'as'\n",
+ " syns |= set(synonyms)\n",
+ " if len(syns) >= 4:\n",
+ " clues = [format(clue) for clue in syns]\n",
+ " \n",
+ " clues.append(format(word))\n",
+ " dsynonyms.append(dict(answer=word, hint=f'synonyms for {format(word)}', clues=clues))\n",
+ " \"\"\" \n",
+ " ok = True\n",
+ " for clue in clues:\n",
+ " if clue in dsynonyms:\n",
+ " ok = False\n",
+ " if ok:\n",
+ " clues.append(format(word))\n",
+ " dsynonyms[word] = dict(group=f'synonyms for {format(word)}', clues=clues)\n",
+ " \"\"\"\n",
+ " return dsynonyms\n",
+ "\n",
+ "dadj = groups_for_pos('as')\n",
+ "dverb = groups_for_pos('v')\n",
+ "dnoun = groups_for_pos('n')\n",
+ "dadverb = groups_for_pos('r')\n",
+ "len(dadj), len(dverb), len(dnoun), len(dadverb)\n",
+ "\n",
+ "(3976, 7141, 19563, 490)\n",
+ "\n",
+ "\n",
+ "# flag button for reporting\n",
+ "\n",
+ "\n",
+ "import pandas as pd\n",
+ "df = pd.read_csv('LADECv1-2019.csv', index_col=0)\n",
+ "df = df.drop(df[df.correctParse == 'no'].index)\n",
+ "df = df.drop(df[df.isCommonstim == 0].index)\n",
+ "prefixes = df.groupby('c1').groups\n",
+ "suffixes = df.groupby('c2').groups\n",
+ "dprefix = []\n",
+ "for prefix, ids in prefixes.items():\n",
+ " if len(ids) >= 4:\n",
+ " dprefix.append(dict(answer=prefix, hint=f'{prefix} _', clues=list(df.loc[list(ids)].c2)))\n",
+ "dsuffix = []\n",
+ "for suffix, ids in suffixes.items():\n",
+ " if len(ids) >= 4:\n",
+ " dsuffix.append(dict(answer=suffix, hint=f'_ {suffix}', clues=list(df.loc[list(ids)].c1)))\n",
+ "\n",
+ ":\n",
+ " return True\n",
+ " # Then, print everything between .8 and .9 to see whats the best cutoff\n",
+ " r = ratio(a, b)\n",
+ " if .8 <= r <= .9:\n",
+ " pass\n",
+ " #print(a, b, r)\n",
+ " return r >= .85\n",
"\n",
"def filter_duplicates(group):\n",
" if not group:\n",
@@ -234,7 +312,7 @@
},
{
"cell_type": "code",
- "execution_count": 238,
+ "execution_count": null,
"id": "6a3c04eb-79a6-47f5-846e-93258db65921",
"metadata": {},
"outputs": [],
@@ -244,58 +322,66 @@
},
{
"cell_type": "code",
- "execution_count": 239,
+ "execution_count": null,
"id": "dfb38b21-3dc4-495a-8805-446b2e9e8483",
"metadata": {},
"outputs": [],
"source": [
- "\n",
- "def process_corpus(corpus):\n",
- " new = {}\n",
- " for word, group in corpus.items():\n",
+ "def process_groups(groups):\n",
+ " new = []\n",
+ " for group in groups:\n",
" clues = group['clues']\n",
" clues = [clue for clue in clues if clue not in blacklist]\n",
" clues = filter_duplicates(clues)\n",
" if len(clues) < 4:\n",
" continue\n",
- " new[word] = dict(group=group['group'], clues=clues)\n",
+ " new.append(dict(answer=group['answer'], hint=group['hint'], clues=clues))\n",
" return new"
]
},
{
"cell_type": "code",
- "execution_count": 240,
- "id": "a59a4514-2572-4d35-a73d-fef58d1bc804",
+ "execution_count": null,
+ "id": "1c8175f2-817e-45ab-af0b-5ea7ee7a5dc9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "corpus = dict(\n",
+ " adjectives=dadj,\n",
+ " nouns=dnoun,\n",
+ " adverbs=dadverb,\n",
+ " verbs=dverb,\n",
+ " prefixes=dprefix,\n",
+ " suffixes=dsuffix,\n",
+ ")\n",
+ "filtered_corpus = {}\n",
+ "for k, d in corpus.items():\n",
+ " filtered_corpus[k] = process_groups(d)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b35092c5-17db-4257-bf45-83e8c3973da4",
"metadata": {},
"outputs": [],
"source": [
- "corpus = {**dprefix}\n",
- "corpus.update(dsuffix)\n",
- "corpus.update(dsynonyms)\n",
- "filtered_corpus = process_corpus(corpus)"
+ "filtered_corpus['adverbs']"
]
},
{
"cell_type": "code",
- "execution_count": 259,
+ "execution_count": null,
"id": "8025664c-e116-481a-9609-d58200f773ec",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "437 330\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "print(len(dprefix), len(dsuffix))"
+ "# Can lemmatize/singularize and compare levenshtein based on that, but don't use lemma itself for the group as it may bed wrong"
]
},
{
"cell_type": "code",
- "execution_count": 241,
+ "execution_count": null,
"id": "fccac4d7-af42-4445-8dd5-6f4b0d3aa9ca",
"metadata": {},
"outputs": [],
@@ -309,317 +395,67 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "4a82df07-568a-41f9-98c9-be0182522577",
+ "id": "551ce71f-0d75-4e41-8387-808db1e5e20f",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "similar('slow', 'slowl')"
+ ]
},
{
"cell_type": "code",
- "execution_count": 242,
- "id": "46157b29-1084-4caa-be4f-7c56be562da8",
+ "execution_count": 38,
+ "id": "19589357-f1ca-4d10-8574-3639bd05173f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[['encroach', 'impinge', 'infringe'],\n",
- " ['encroach', 'entrench', 'impinge', 'trench'],\n",
- " ['invasive', 'trespassing']]"
+ "26388"
]
},
- "execution_count": 242,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "wn.synonyms('encroaching')"
+ "len(filtered_corpus)"
]
},
{
"cell_type": "code",
- "execution_count": 252,
- "id": "98e6a79f-4e7b-498d-a824-a44b52ae3829",
+ "execution_count": 39,
+ "id": "dd927be9-a77c-4606-984a-b3cf555b2618",
"metadata": {},
"outputs": [
{
"data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>c1</th>\n",
- " <th>c2</th>\n",
- " <th>stim</th>\n",
- " <th>isCommonC1</th>\n",
- " <th>isCommonC2</th>\n",
- " <th>isCommonstim</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>id_master</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>3237</th>\n",
- " <td>gad</td>\n",
- " <td>about</td>\n",
- " <td>gadabout</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4592</th>\n",
- " <td>knock</td>\n",
- " <td>about</td>\n",
- " <td>knockabout</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8231</th>\n",
- " <td>turn</td>\n",
- " <td>about</td>\n",
- " <td>turnabout</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6139</th>\n",
- " <td>race</td>\n",
- " <td>about</td>\n",
- " <td>raceabout</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8331</th>\n",
- " <td>walk</td>\n",
- " <td>about</td>\n",
- " <td>walkabout</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4515</th>\n",
- " <td>junk</td>\n",
- " <td>yards</td>\n",
- " <td>junkyards</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6812</th>\n",
- " <td>ship</td>\n",
- " <td>yards</td>\n",
- " <td>shipyards</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2667</th>\n",
- " <td>farm</td>\n",
- " <td>yards</td>\n",
- " <td>farmyards</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1007</th>\n",
- " <td>brick</td>\n",
- " <td>yards</td>\n",
- " <td>brickyards</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8892</th>\n",
- " <td>zig</td>\n",
- " <td>zag</td>\n",
- " <td>zigzag</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>8372 rows × 6 columns</p>\n",
- "</div>"
- ],
"text/plain": [
- " c1 c2 stim isCommonC1 isCommonC2 isCommonstim\n",
- "id_master \n",
- "3237 gad about gadabout 1 1 1\n",
- "4592 knock about knockabout 1 1 1\n",
- "8231 turn about turnabout 1 1 1\n",
- "6139 race about raceabout 1 1 0\n",
- "8331 walk about walkabout 1 1 1\n",
- "... ... ... ... ... ... ...\n",
- "4515 junk yards junkyards 1 0 0\n",
- "6812 ship yards shipyards 1 0 0\n",
- "2667 farm yards farmyards 1 0 0\n",
- "1007 brick yards brickyards 1 0 0\n",
- "8892 zig zag zigzag 0 0 1\n",
- "\n",
- "[8372 rows x 6 columns]"
+ "195"
]
},
- "execution_count": 252,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df[['c1', 'c2', 'stim', 'isCommonC1', 'isCommonC2', 'isCommonstim']]"
+ "len(dsuffix)"
]
},
{
"cell_type": "code",
- "execution_count": 258,
- "id": "ebcdf335-02c3-480c-a241-f83f7569acb0",
+ "execution_count": null,
+ "id": "f598354d-3f52-4952-a8c0-69c480ebe8b1",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>c1</th>\n",
- " <th>c2</th>\n",
- " <th>stim</th>\n",
- " <th>isCommonC1</th>\n",
- " <th>isCommonC2</th>\n",
- " <th>isCommonstim</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>id_master</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>8361</th>\n",
- " <td>war</td>\n",
- " <td>fare</td>\n",
- " <td>warfare</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2715</th>\n",
- " <td>field</td>\n",
- " <td>fare</td>\n",
- " <td>fieldfare</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1298</th>\n",
- " <td>car</td>\n",
- " <td>fare</td>\n",
- " <td>carfare</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>51</th>\n",
- " <td>air</td>\n",
- " <td>fare</td>\n",
- " <td>airfare</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " c1 c2 stim isCommonC1 isCommonC2 isCommonstim\n",
- "id_master \n",
- "8361 war fare warfare 1 1 1\n",
- "2715 field fare fieldfare 1 1 0\n",
- "1298 car fare carfare 1 1 1\n",
- "51 air fare airfare 1 1 1"
- ]
- },
- "execution_count": 258,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[df.c2=='fare'][['c1', 'c2', 'stim', 'isCommonC1', 'isCommonC2', 'isCommonstim']]"
- ]
+ "outputs": [],
+ "source": []
},
{
"cell_type": "code",
"execution_count": null,
- "id": "50989f8d-368e-4b4d-ab6c-355efce36c93",
+ "id": "77cb04f6-846e-454b-98e1-4feb575d2332",
"metadata": {},
"outputs": [],
"source": []