diff --git a/__init__.pyc b/__init__.pyc index d1c94d3..57355ef 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_pipeline/__init__.pyc b/q01_pipeline/__init__.pyc index b360a57..0e880cc 100644 Binary files a/q01_pipeline/__init__.pyc and b/q01_pipeline/__init__.pyc differ diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..52d3f7b 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -9,5 +9,29 @@ bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') # Write your solution here : +y = bank['y'] +X = bank.drop(['y'], axis=1) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +# Write your solution here : +model = RandomForestClassifier(random_state=9,class_weight = 'balanced') +def pipeline(X_train, X_test, y_train, y_test,model): + param_grid = {"max_depth": [2, 3, 5, 6, 8, 10, 15, 20, 30], + "max_leaf_nodes": [2, 5, 10, 15, 20], + "max_features": [8,10,12,14]} + grid = GridSearchCV(estimator=model,param_grid=param_grid) + le = LabelEncoder() + y_train=le.fit_transform(y_train) + for column in X_train.columns: + if X_train[column].dtype == type(object): + le = LabelEncoder() + X_train[column] = le.fit_transform(X_train[column]) + y_test=le.fit_transform(y_test) + for column in X_test.columns: + if X_test[column].dtype == type(object): + le = LabelEncoder() + X_test[column] = le.fit_transform(X_test[column]) + grid.fit(X_train, y_train) + auc= roc_auc_score(y_test, grid.predict(X_test)) + return grid.fit(X_train, y_train),auc diff --git a/q01_pipeline/build.pyc b/q01_pipeline/build.pyc index 5a9b3ad..c2ee1d8 100644 Binary files a/q01_pipeline/build.pyc and b/q01_pipeline/build.pyc differ diff --git a/q01_pipeline/tests/__init__.pyc b/q01_pipeline/tests/__init__.pyc index b2f2c5b..821c412 100644 Binary files a/q01_pipeline/tests/__init__.pyc and b/q01_pipeline/tests/__init__.pyc differ diff --git a/q01_pipeline/tests/test_q01_pipeline.pyc b/q01_pipeline/tests/test_q01_pipeline.pyc index 109bce3..edb3bf6 100644 Binary files a/q01_pipeline/tests/test_q01_pipeline.pyc and b/q01_pipeline/tests/test_q01_pipeline.pyc differ diff --git a/quickstarts/Prompting.ipynb b/quickstarts/Prompting.ipynb new file mode 100644 index 0000000..c7824ae --- /dev/null +++ b/quickstarts/Prompting.ipynb @@ -0,0 +1,1685 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2024 Google LLC." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yeadDkMiISin" + }, + "source": [ + "# Gemini API: Prompting Quickstart\n", + "\n", + "\n", + " \n", + "
\n", + " Run in Google Colab\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dpOYALec6N8Z" + }, + "source": [ + "This notebook contains examples of how to write and run your first prompts with the Gemini API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gvkDhXtHgol7" + }, + "source": [ + "## Learn more\n", + "\n", + "There's lots more to learn!\n", + "\n", + "* For more fun prompts, check out [Market a Jetpack](https://github.com/google-gemini/cookbook/blob/main/examples/Market_a_Jet_Backpack.ipynb).\n", + "* Check out the [safety quickstart](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Safety.ipynb) next to learn about the Gemini API's configurable safety settings, and what to do if your prompt is blocked.\n", + "* For lots more details on using the Python SDK, check out this [detailed quickstart](https://ai.google.dev/tutorials/python_quickstart)." + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import spacy\n", + "import numpy as np\n", + "import nltk\n", + "import openpyxl" + ], + "metadata": { + "id": "VJ4RWLUo9hRY" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "NdGtXpZzIdlm" + } + }, + { + "cell_type": "code", + "source": [ + "df = pd.read_csv(\"/content/complaints.csv\")" + ], + "metadata": { + "id": "RLU6JoF99-6m" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "nlp = spacy.load ('en_core_web_sm')\n", + "doc = nlp(df.Sub_issue.iloc[0])" + ], + "metadata": { + "id": "KQRhFkRy-Tf9" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "lqGMQEQ5Hsi5" + } + }, + { + "cell_type": "code", + "source": [ + "tokens = []\n", + "lemma = []\n", + "pos = []\n", + "\n", + "for doc in nlp.pipe (df['Sub_issue'].astype('unicode').values, batch_size = 50):\n", + " if doc.is_parsed:\n", + " tokens.append([n.text for n in doc])\n", + " lemma.append([n.lemma_ for n in doc])\n", + " pos.append([n.pos_ for n in doc])\n", + " else:\n", + " tokens.append(None)\n", + " lemma.append(None)\n", + " pos.append(None)\n", + "\n", + "df['issue_tokens'] = tokens\n", + "df['issue_lemma'] = lemma\n", + "df['issue_pos'] = pos" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hV58oTPWIsFz", + "outputId": "6c03c0e7-ba2a-4d76-a436-7ce9d3955e25" + }, + "execution_count": 56, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":6: DeprecationWarning: [W107] The property `Doc.is_parsed` is deprecated. Use `Doc.has_annotation(\"DEP\")` instead.\n", + " if doc.is_parsed:\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "def to_doc(words:tuple) -> spacy.tokens.Doc:\n", + " return nlp(' '.join(words))\n", + "\n", + "def remove_stops(doc) -> list:\n", + " return [token.text for token in doc if not token.is_stop]" + ], + "metadata": { + "id": "yRO0E0qxKzvR" + }, + "execution_count": 58, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "docs = list(map(to_doc, df.issue_lemma))\n", + "df['removed_stops'] = list(map(remove_stops, docs))" + ], + "metadata": { + "id": "TrWDRfJqLmeB" + }, + "execution_count": 60, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import re\n", + "#Remove punctuation\n", + "df['removed_stops_proces'] = df['removed_stops'].map(lambda x: re.sub(\"[,\\.!?]\",\"\",str(x)))\n", + "#convert to lower\n", + "df['removed_stops_proces'] = df['removed_stops_proces'].map(lambda x:x.lower())" + ], + "metadata": { + "id": "jPHGY_5lM00A" + }, + "execution_count": 64, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df['removed_stops_proces'] = df['removed_stops_proces'].str.replace(\"'\",'')" + ], + "metadata": { + "id": "slMm38XjN25u" + }, + "execution_count": 66, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from sklearn.decomposition import LatentDirichletAllocation" + ], + "metadata": { + "id": "jfFE5SbmONT6" + }, + "execution_count": 68, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "cv = CountVectorizer(max_df = 0.9, min_df = 2)\n", + "dtm = cv.fit_transform(df['removed_stops_proces'])" + ], + "metadata": { + "id": "60WyQ8QZR7d4" + }, + "execution_count": 69, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "LDA = LatentDirichletAllocation(n_components = 6, random_state = 42)\n", + "LDA.fit(dtm)" + ], + "metadata": { + "id": "5Ct_VuyiSSui", + "outputId": "6a09ea26-2057-488f-8d7d-0d767aa56bdb", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + } + }, + "execution_count": 70, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LatentDirichletAllocation(n_components=6, random_state=42)" + ], + "text/html": [ + "
LatentDirichletAllocation(n_components=6, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 70 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "iB3MZLpb9-Al" + } + }, + { + "cell_type": "code", + "source": [ + "LDA.components_[2]" + ], + "metadata": { + "id": "Hx6vlBKcSgaT", + "outputId": "d6baaa0c-10dc-451a-8120-e701c3ac4705", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 73, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1.66666949e-01, 1.66718690e-01, 1.66667273e-01, 1.66670633e-01,\n", + " 1.66667902e-01, 1.66667913e-01, 1.66666952e-01, 1.66666887e-01,\n", + " 1.66757910e-01, 1.41666533e+01, 1.66708421e-01, 1.66667378e-01,\n", + " 1.66666768e-01, 1.66881724e-01, 1.66666801e-01, 1.66666944e-01,\n", + " 1.66714690e-01, 1.66666928e-01, 1.66666829e-01, 1.66666831e-01,\n", + " 1.66666826e-01, 1.66667316e-01, 1.04705814e+01, 1.66667561e-01,\n", + " 1.66666910e-01, 1.66667089e-01, 2.41624258e+01, 4.16664844e+00,\n", + " 1.66667135e-01, 1.66908366e-01, 1.66667338e-01, 1.66666956e-01,\n", + " 1.66667769e-01, 1.66667338e-01, 1.66929954e-01, 1.66668434e-01,\n", + " 1.66910484e-01, 1.66666796e-01, 1.66817093e-01, 3.44616596e+03,\n", + " 1.66777266e-01, 1.66666980e-01, 1.66666789e-01, 1.89815693e+03,\n", + " 1.66668235e-01, 1.66905507e-01, 1.66666950e-01, 1.66881724e-01,\n", + " 1.66685720e-01, 1.67370158e-01, 1.66723281e-01, 1.66667102e-01,\n", + " 4.15270413e+00, 1.66667166e-01, 1.66667117e-01, 1.66666943e-01,\n", + " 1.66667085e-01, 1.41646664e+01, 1.66666760e-01, 1.66667914e-01,\n", + " 1.66727445e-01, 1.66714690e-01, 1.66666944e-01, 1.66666961e-01,\n", + " 1.66667244e-01, 1.66667127e-01, 1.66666768e-01, 1.66667299e-01,\n", + " 1.67126728e-01, 1.66673908e-01, 1.66723293e-01, 1.66704647e-01,\n", + " 4.16664844e+00, 1.41666533e+01, 1.66667228e-01, 1.66667244e-01,\n", + " 1.66666944e-01, 1.66667680e-01, 1.67126527e-01, 2.41624258e+01,\n", + " 1.66666785e-01, 1.66666944e-01, 3.33416665e+03, 1.66666958e-01,\n", + " 1.66667251e-01, 1.67155802e-01, 1.66676161e-01, 1.70316665e+03,\n", + " 1.66667085e-01, 1.66667273e-01, 1.66667387e-01, 1.66667228e-01,\n", + " 1.66800268e-01, 1.66666910e-01, 1.66666980e-01, 1.66718690e-01,\n", + " 1.66666944e-01, 1.66667764e-01, 1.66667964e-01, 1.66723281e-01,\n", + " 1.67370158e-01, 1.66683379e-01, 1.01637513e+01, 1.66889079e-01,\n", + " 1.66668115e-01, 1.66667083e-01, 1.66777266e-01, 1.67113168e-01,\n", + " 1.66723293e-01, 1.66767374e-01, 2.41624258e+01, 1.66666807e-01,\n", + " 1.60166584e+02, 1.66667618e-01, 1.66666768e-01, 1.66666899e-01,\n", + " 1.66666760e-01, 1.66666880e-01, 1.66718690e-01, 1.66666726e-01,\n", + " 1.66667224e-01, 1.66666944e-01, 1.66666828e-01, 1.66848252e-01,\n", + " 1.66667563e-01, 1.66666951e-01, 1.66666959e-01, 1.01637513e+01,\n", + " 1.66666843e-01, 1.66666918e-01, 1.66668051e-01, 1.66667458e-01,\n", + " 1.66666943e-01, 1.66666987e-01, 1.66666807e-01, 1.66666807e-01,\n", + " 8.56055412e+01, 1.66689830e-01, 1.66669276e-01, 1.66667215e-01,\n", + " 1.66718690e-01, 1.66666887e-01, 1.66667764e-01, 1.66667297e-01,\n", + " 2.41624258e+01, 1.66714690e-01, 1.66666950e-01, 1.66913762e-01,\n", + " 1.66667220e-01, 1.66666829e-01, 1.66667109e-01, 1.70316665e+03,\n", + " 1.66666950e-01, 1.66718119e-01, 1.66667127e-01, 1.66666752e-01,\n", + " 1.66667228e-01, 8.59988329e+03, 1.66666894e-01, 1.66975383e-01,\n", + " 1.66666991e-01, 1.67005609e-01, 1.66666875e-01, 1.66718194e-01,\n", + " 3.71666498e+01, 1.66667263e-01, 1.66707404e-01, 1.66667319e-01,\n", + " 1.94553093e+00, 1.66723293e-01, 1.66848252e-01, 1.66990792e-01,\n", + " 1.66667456e-01, 1.66666834e-01, 1.66848252e-01, 1.66666943e-01,\n", + " 1.66666918e-01, 1.66666768e-01, 1.66905507e-01, 1.66666843e-01,\n", + " 1.66667244e-01, 1.66666789e-01, 1.66666849e-01, 1.66667175e-01,\n", + " 1.66836305e-01, 1.66905599e-01, 1.66667135e-01, 1.66666928e-01,\n", + " 1.66968209e-01, 1.66736159e-01, 1.66775876e-01, 1.66667225e-01,\n", + " 1.66667169e-01, 1.66666951e-01, 3.38816635e+03, 1.66666845e-01,\n", + " 1.66666830e-01, 1.66667128e-01, 1.67126527e-01, 1.66714690e-01,\n", + " 1.66666834e-01])" + ] + }, + "metadata": {}, + "execution_count": 73 + } + ] + }, + { + "cell_type": "code", + "source": [ + "for i,topic in enumerate (LDA.components_):\n", + " print(f'The top 10 words for topic #{i}')\n", + " print([cv.get_feature_names_out()[index] for index in topic.argsort()[-10:]])\n", + " print ('\\n')" + ], + "metadata": { + "id": "XxtOP2y0S1m8", + "outputId": "6ad70cd1-886d-4a7d-e145-0231367da9b1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 75, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The top 10 words for topic #0\n", + "['repeat', 'process', 'card', 'open', 'knowledge', 'consent', 'investigation', 'report', 'error', 'fix']\n", + "\n", + "\n", + "The top 10 words for topic #1\n", + "['theft', 'disclose', 'notification', 'reappear', 'away', 'old', 'wrong', 'attempt', 'collect', 'debt']\n", + "\n", + "\n", + "The top 10 words for topic #2\n", + "['score', 'problem', 'nan', 'inquiry', 'recognize', 'credit', 'improperly', 'use', 'company', 'report']\n", + "\n", + "\n", + "The top 10 words for topic #3\n", + "['phone', 'difficulty', 'submit', 'card', 'dispute', 'personal', 'status', 'information', 'account', 'incorrect']\n", + "\n", + "\n", + "The top 10 words for topic #4\n", + "['record', 'inaccurate', 'dispute', 'problem', 'result', 'status', 'notify', '30', 'day', 'investigation']\n", + "\n", + "\n", + "The top 10 words for topic #5\n", + "['disburse', 'instruct', 'handle', 'insurance', 'fund', 'communicate', 'issue', 'miss', 'belong', 'information']\n", + "\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "topic_results = LDA.transform(dtm)\n", + "df['Topic'] = topic_results.argmax(axis = 1)" + ], + "metadata": { + "id": "Q4qWBe9RVR5W" + }, + "execution_count": 76, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import nltk\n", + "nltk.download('vader_lexicon')" + ], + "metadata": { + "id": "zTCzIIpBV50a", + "outputId": "a0658213-7612-4b7a-94d4-944f7f74f5e1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 78, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package vader_lexicon to /root/nltk_data...\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 78 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "sid = SentimentIntensityAnalyzer()" + ], + "metadata": { + "id": "JZ5reZUkVjLu" + }, + "execution_count": 79, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df['removed_stops_proces'] = df['removed_stops_proces'].str.replace(\"[\",'')\n", + "df['removed_stops_proces'] = df['removed_stops_proces'].str.replace(\"[\",'')" + ], + "metadata": { + "id": "twVFV48Na2Fx" + }, + "execution_count": 80, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df['scores'] = df['removed_stops_proces'].apply(lambda removed_stops_proces: sid.polarity_scores(removed_stops_proces))\n", + "df['compound'] = df['scores'].apply(lambda d:d['compound'])\n", + "df['comp_score'] = df['compound'].apply (lambda score: 'positive' if score > 0 else ('negative' if score < 0 else 'neutral'))\n", + "df['neg_score'] = df['scores'].apply (lambda x:x.get('neg'))\n", + "df['sentiment'] = np.where (df['neg_score']>0,'negative',np.where (df['compound']<0,'negative', np.where (df['compound']>0,'positive','neutral')))" + ], + "metadata": { + "id": "08rGmxp2bQq6" + }, + "execution_count": 82, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df" + ], + "metadata": { + "id": "y4DPcppkdBac", + "outputId": "1dd4cf87-3d9e-435d-ea27-a994cb55e57d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + } + }, + "execution_count": 83, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Date received Product \\\n", + "0 10/26/2024 Credit reporting or other personal consumer re... \n", + "1 10/26/2024 Credit reporting or other personal consumer re... \n", + "2 10/18/2024 Credit reporting or other personal consumer re... \n", + "3 10/26/2024 Credit reporting or other personal consumer re... \n", + "4 10/26/2024 Credit reporting or other personal consumer re... \n", + "... ... ... \n", + "19779 10/23/2024 Credit reporting or other personal consumer re... \n", + "19780 10/23/2024 Credit reporting or other personal consumer re... \n", + "19781 10/18/2024 Credit reporting or other personal consumer re... \n", + "19782 10/21/2024 Credit reporting or other personal consumer re... \n", + "19783 10/21/2024 Credit reporting or other personal consumer re... \n", + "\n", + " Sub-product Issue \\\n", + "0 Credit reporting Incorrect information on your report \n", + "1 Credit reporting Improper use of your report \n", + "2 Credit reporting Problem with a company's investigation into an... \n", + "3 Credit reporting Incorrect information on your report \n", + "4 Credit reporting Incorrect information on your report \n", + "... ... ... \n", + "19779 Credit reporting Incorrect information on your report \n", + "19780 Credit reporting Incorrect information on your report \n", + "19781 Credit reporting Incorrect information on your report \n", + "19782 Credit reporting Incorrect information on your report \n", + "19783 Credit reporting Incorrect information on your report \n", + "\n", + " Sub_issue \\\n", + "0 Information belongs to someone else \n", + "1 Credit inquiries on your report that you don't... \n", + "2 Was not notified of investigation status or re... \n", + "3 Account information incorrect \n", + "4 Information belongs to someone else \n", + "... ... \n", + "19779 Account information incorrect \n", + "19780 Account information incorrect \n", + "19781 Information belongs to someone else \n", + "19782 Account status incorrect \n", + "19783 Information belongs to someone else \n", + "\n", + " Consumer_complaint_narrative Company public response \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "19779 NaN NaN \n", + "19780 NaN NaN \n", + "19781 NaN NaN \n", + "19782 NaN NaN \n", + "19783 NaN NaN \n", + "\n", + " Company State ZIP code ... \\\n", + "0 EQUIFAX, INC. PA 19153 ... \n", + "1 EQUIFAX, INC. SC 29212 ... \n", + "2 EQUIFAX, INC. SC 29418 ... \n", + "3 EQUIFAX, INC. SC 29483 ... \n", + "4 EQUIFAX, INC. LA 70122 ... \n", + "... ... ... ... ... \n", + "19779 TRANSUNION INTERMEDIATE HOLDINGS, INC. FL 32811 ... \n", + "19780 TRANSUNION INTERMEDIATE HOLDINGS, INC. FL 32811 ... \n", + "19781 Experian Information Solutions Inc. CA 92602 ... \n", + "19782 EQUIFAX, INC. FL 34771 ... \n", + "19783 Experian Information Solutions Inc. NC 27834 ... \n", + "\n", + " issue_lemma \\\n", + "0 [information, belong, to, someone, else] \n", + "1 [credit, inquiry, on, your, report, that, you,... \n", + "2 [be, not, notify, of, investigation, status, o... \n", + "3 [account, information, incorrect] \n", + "4 [information, belong, to, someone, else] \n", + "... ... \n", + "19779 [account, information, incorrect] \n", + "19780 [account, information, incorrect] \n", + "19781 [information, belong, to, someone, else] \n", + "19782 [account, status, incorrect] \n", + "19783 [information, belong, to, someone, else] \n", + "\n", + " issue_pos \\\n", + "0 [NOUN, VERB, ADP, PRON, ADV] \n", + "1 [NOUN, NOUN, ADP, PRON, NOUN, SCONJ, PRON, AUX... \n", + "2 [AUX, PART, VERB, ADP, NOUN, NOUN, CCONJ, NOUN] \n", + "3 [NOUN, NOUN, ADJ] \n", + "4 [NOUN, VERB, ADP, PRON, ADV] \n", + "... ... \n", + "19779 [NOUN, NOUN, ADJ] \n", + "19780 [NOUN, NOUN, ADJ] \n", + "19781 [NOUN, VERB, ADP, PRON, ADV] \n", + "19782 [NOUN, NOUN, NOUN] \n", + "19783 [NOUN, VERB, ADP, PRON, ADV] \n", + "\n", + " removed_stops \\\n", + "0 [information, belong] \n", + "1 [credit, inquiry, report, recognize] \n", + "2 [notify, investigation, status, result] \n", + "3 [account, information, incorrect] \n", + "4 [information, belong] \n", + "... ... \n", + "19779 [account, information, incorrect] \n", + "19780 [account, information, incorrect] \n", + "19781 [information, belong] \n", + "19782 [account, status, incorrect] \n", + "19783 [information, belong] \n", + "\n", + " removed_stops_proces Topic \\\n", + "0 information belong] 5 \n", + "1 credit inquiry report recognize] 2 \n", + "2 notify investigation status result] 4 \n", + "3 account information incorrect] 3 \n", + "4 information belong] 5 \n", + "... ... ... \n", + "19779 account information incorrect] 3 \n", + "19780 account information incorrect] 3 \n", + "19781 information belong] 5 \n", + "19782 account status incorrect] 3 \n", + "19783 information belong] 5 \n", + "\n", + " scores compound comp_score \\\n", + "0 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "1 {'neg': 0.0, 'neu': 0.536, 'pos': 0.464, 'comp... 0.3818 positive \n", + "2 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "3 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "4 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "... ... ... ... \n", + "19779 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "19780 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "19781 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "19782 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "19783 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound... 0.0000 neutral \n", + "\n", + " neg_score sentiment \n", + "0 0.0 neutral \n", + "1 0.0 positive \n", + "2 0.0 neutral \n", + "3 0.0 neutral \n", + "4 0.0 neutral \n", + "... ... ... \n", + "19779 0.0 neutral \n", + "19780 0.0 neutral \n", + "19781 0.0 neutral \n", + "19782 0.0 neutral \n", + "19783 0.0 neutral \n", + "\n", + "[19784 rows x 29 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Date receivedProductSub-productIssueSub_issueConsumer_complaint_narrativeCompany public responseCompanyStateZIP code...issue_lemmaissue_posremoved_stopsremoved_stops_procesTopicscorescompoundcomp_scoreneg_scoresentiment
010/26/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportInformation belongs to someone elseNaNNaNEQUIFAX, INC.PA19153...[information, belong, to, someone, else][NOUN, VERB, ADP, PRON, ADV][information, belong]information belong]5{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
110/26/2024Credit reporting or other personal consumer re...Credit reportingImproper use of your reportCredit inquiries on your report that you don't...NaNNaNEQUIFAX, INC.SC29212...[credit, inquiry, on, your, report, that, you,...[NOUN, NOUN, ADP, PRON, NOUN, SCONJ, PRON, AUX...[credit, inquiry, report, recognize]credit inquiry report recognize]2{'neg': 0.0, 'neu': 0.536, 'pos': 0.464, 'comp...0.3818positive0.0positive
210/18/2024Credit reporting or other personal consumer re...Credit reportingProblem with a company's investigation into an...Was not notified of investigation status or re...NaNNaNEQUIFAX, INC.SC29418...[be, not, notify, of, investigation, status, o...[AUX, PART, VERB, ADP, NOUN, NOUN, CCONJ, NOUN][notify, investigation, status, result]notify investigation status result]4{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
310/26/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportAccount information incorrectNaNNaNEQUIFAX, INC.SC29483...[account, information, incorrect][NOUN, NOUN, ADJ][account, information, incorrect]account information incorrect]3{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
410/26/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportInformation belongs to someone elseNaNNaNEQUIFAX, INC.LA70122...[information, belong, to, someone, else][NOUN, VERB, ADP, PRON, ADV][information, belong]information belong]5{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
..................................................................
1977910/23/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportAccount information incorrectNaNNaNTRANSUNION INTERMEDIATE HOLDINGS, INC.FL32811...[account, information, incorrect][NOUN, NOUN, ADJ][account, information, incorrect]account information incorrect]3{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
1978010/23/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportAccount information incorrectNaNNaNTRANSUNION INTERMEDIATE HOLDINGS, INC.FL32811...[account, information, incorrect][NOUN, NOUN, ADJ][account, information, incorrect]account information incorrect]3{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
1978110/18/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportInformation belongs to someone elseNaNNaNExperian Information Solutions Inc.CA92602...[information, belong, to, someone, else][NOUN, VERB, ADP, PRON, ADV][information, belong]information belong]5{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
1978210/21/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportAccount status incorrectNaNNaNEQUIFAX, INC.FL34771...[account, status, incorrect][NOUN, NOUN, NOUN][account, status, incorrect]account status incorrect]3{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
1978310/21/2024Credit reporting or other personal consumer re...Credit reportingIncorrect information on your reportInformation belongs to someone elseNaNNaNExperian Information Solutions Inc.NC27834...[information, belong, to, someone, else][NOUN, VERB, ADP, PRON, ADV][information, belong]information belong]5{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...0.0000neutral0.0neutral
\n", + "

19784 rows × 29 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df" + } + }, + "metadata": {}, + "execution_count": 83 + } + ] + } + ], + "metadata": { + "colab": { + "name": "Prompting.ipynb", + "provenance": [], + "include_colab_link": true + }, + "google": { + "image_path": "/static/site-assets/images/docs/logo-python.svg", + "keywords": [ + "examples", + "gemini", + "beginner", + "googleai", + "quickstart", + "python", + "text", + "chat", + "vision", + "embed" + ] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file