{"cells":[{"cell_type":"markdown","id":"96549bb7-9943-4c04-97ab-bdee8d493bd6","metadata":{"id":"96549bb7-9943-4c04-97ab-bdee8d493bd6"},"source":["\"Open\n","\n","# sqlite in python\n","\n","An sqlite3 library ships with python. In this tutorial, we'll discuss how to utilize this library and read sqlite tables into pandas. With this, you can generalize to other python APIs to other databases. First, let's continue on with our work from the previous notebook. A nice little tutorial can be found [here](https://datacarpentry.org/python-ecology-lesson/09-working-with-sql/index.html)."]},{"cell_type":"code","execution_count":null,"id":"6dc9eb71-e4eb-4a7e-8c78-11d8bc266353","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6dc9eb71-e4eb-4a7e-8c78-11d8bc266353","executionInfo":{"status":"ok","timestamp":1713207215487,"user_tz":240,"elapsed":4142,"user":{"displayName":"Brian Caffo","userId":"07979705296072332292"}},"outputId":"b0c05179-1048-415d-af0d-d24e373e8554"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2024-04-15 18:53:36-- https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/sql/opioid.db\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 7774208 (7.4M) [application/octet-stream]\n","Saving to: ‘opioid.db’\n","\n","opioid.db 100%[===================>] 7.41M 21.5MB/s in 0.3s \n","\n","2024-04-15 18:53:36 (21.5 MB/s) - ‘opioid.db’ saved [7774208/7774208]\n","\n","('1', 'AUTAUGA', 'AL', '01001', '1', '1', 'Autauga', 'Autauga County, Alabama', 'B01003_001', '2006', '51328', 'Autauga, AL', '594.44')\n","('2', 'BALDWIN', 'AL', '01003', '1', '3', 'Baldwin', 'Baldwin County, Alabama', 'B01003_001', '2006', '168121', 'Baldwin, AL', '1589.78')\n","('3', 'BARBOUR', 'AL', '01005', '1', '5', 'Barbour', 'Barbour County, Alabama', 'B01003_001', '2006', '27861', 'Barbour, AL', '884.88')\n","('4', 'BIBB', 'AL', '01007', '1', '7', 'Bibb', 'Bibb County, Alabama', 'B01003_001', '2006', '22099', 'Bibb, AL', '622.58')\n","('5', 'BLOUNT', 'AL', '01009', '1', '9', 'Blount', 'Blount County, Alabama', 'B01003_001', '2006', '55485', 'Blount, AL', '644.78')\n"]},{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{},"execution_count":1}],"source":["import sqlite3 as sq3\n","import pandas as pd\n","\n","##Download the database using the unix command wget\n","!wget https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/sql/opioid.db\n","\n","## Create the connection\n","con = sq3.connect(\"opioid.db\")\n","\n","## cursor() creates an object that can execute functions in the sqlite cursor\n","sql = con.cursor()\n","\n","## execute a command, loop through the output and print out\n","for row in sql.execute(\"select * from county_info limit 5;\"):\n"," print(row)\n","\n","\n","# you have to close the connection\n","con.close"]},{"cell_type":"markdown","id":"ae5fc83a-3440-42f4-a938-ecff8f43988e","metadata":{"id":"ae5fc83a-3440-42f4-a938-ecff8f43988e"},"source":["Let's read this dataset into pandas."]},{"cell_type":"code","execution_count":null,"id":"b54a4a17-8dc6-421e-80e7-08f39893e28a","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"b54a4a17-8dc6-421e-80e7-08f39893e28a","executionInfo":{"status":"ok","timestamp":1713207217688,"user_tz":240,"elapsed":2208,"user":{"displayName":"Brian Caffo","userId":"07979705296072332292"}},"outputId":"29347935-8fb6-4c99-8338-af36b5f021ef"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" BUYER_COUNTY BUYER_STATE countyfips STATE COUNTY county_name \\\n","0 1 AUTAUGA AL 01001 1 1 Autauga \n","1 2 BALDWIN AL 01003 1 3 Baldwin \n","2 3 BARBOUR AL 01005 1 5 Barbour \n","3 4 BIBB AL 01007 1 7 Bibb \n","4 5 BLOUNT AL 01009 1 9 Blount \n","\n"," NAME variable year population Areaname \\\n","0 Autauga County, Alabama B01003_001 2006 51328 Autauga, AL \n","1 Baldwin County, Alabama B01003_001 2006 168121 Baldwin, AL \n","2 Barbour County, Alabama B01003_001 2006 27861 Barbour, AL \n","3 Bibb County, Alabama B01003_001 2006 22099 Bibb, AL \n","4 Blount County, Alabama B01003_001 2006 55485 Blount, AL \n","\n"," LND110210D \n","0 594.44 \n","1 1589.78 \n","2 884.88 \n","3 622.58 \n","4 644.78 "],"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
BUYER_COUNTYBUYER_STATEcountyfipsSTATECOUNTYcounty_nameNAMEvariableyearpopulationAreanameLND110210D
01AUTAUGAAL0100111AutaugaAutauga County, AlabamaB01003_001200651328Autauga, AL594.44
12BALDWINAL0100313BaldwinBaldwin County, AlabamaB01003_0012006168121Baldwin, AL1589.78
23BARBOURAL0100515BarbourBarbour County, AlabamaB01003_001200627861Barbour, AL884.88
34BIBBAL0100717BibbBibb County, AlabamaB01003_001200622099Bibb, AL622.58
45BLOUNTAL0100919BlountBlount County, AlabamaB01003_001200655485Blount, AL644.78
\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n","\n","\n","\n"," \n","
\n","\n","
\n","
\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"county_info","summary":"{\n \"name\": \"county_info\",\n \"rows\": 28265,\n \"fields\": [\n {\n \"column\": \"\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 28265,\n \"samples\": [\n \"24878\",\n \"17508\",\n \"15431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BUYER_COUNTY\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1849,\n \"samples\": [\n \"ALLEN\",\n \"CUYAHOGA\",\n \"CLOUD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BUYER_STATE\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 51,\n \"samples\": [\n \"TX\",\n \"SC\",\n \"VA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"countyfips\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3142,\n \"samples\": [\n \"49033\",\n \"53041\",\n \"21151\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"STATE\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 51,\n \"samples\": [\n \"48\",\n \"45\",\n \"51\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"COUNTY\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 325,\n \"samples\": [\n \"409\",\n \"163\",\n \"437\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"county_name\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1850,\n \"samples\": [\n \"Conejos\",\n \"Hays\",\n \"Brown\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"NAME\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3140,\n \"samples\": [\n \"Russell County, Virginia\",\n \"Lewis County, Washington\",\n \"Marion County, Kentucky\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"variable\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NA\",\n \"B01003_001\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"2013\",\n \"2007\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 24017,\n \"samples\": [\n \"8249\",\n \"3869\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Areaname\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3136,\n \"samples\": [\n \"Catron, NM\",\n \"Walker, GA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LND110210D\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3101,\n \"samples\": [\n \"306.02\",\n \"173.92\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":2}],"source":["con = sq3.connect(\"opioid.db\")\n","\n","county_info = pd.read_sql_query(\"SELECT * from county_info\", con)\n","\n","# you have to close the connection\n","con.close\n","\n","county_info.head()"]},{"cell_type":"markdown","source":["This should get you started on working with SQL in python. Personally, I find using python easier than directly using the SQL, so the basic approach in this section is how I would suggest that you operate."],"metadata":{"id":"sgzpOy22GiVN"},"id":"sgzpOy22GiVN"}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"},"colab":{"provenance":[]}},"nbformat":4,"nbformat_minor":5}