## pandas简介
Pandas中一共有三种数据结构,分别为:Series、DataFrame和MultiIndex。
Python Data Analysis Library,面板数据(panel data)和python数据分析(data analysis)。最初由AQR Capital? Management于2008年4月开发,并于2009年底开源出来,目前由专注于Python数据包开发的PyData开发team继续开发和维护,属于PyData项目的一部分。Pandas最初被作为金融数据分析工具而开发出来,因此,pandas为时间序列分析提供了很好的支持。
{ ?"cells": [ ? { ?? "cell_type": "code", ?? "execution_count": 1, ?? "id": "9f7e44d2", ?? "metadata": {}, ?? "outputs": [], ?? "source": [ ??? "import pandas as pd\n", ??? "import numpy as np" ?? ] ? }, ? { ?? "cell_type": "code", ?? "execution_count": 8, ?? "id": "23c5a15f", ?? "metadata": {}, ?? "outputs": [ ??? { ???? "name": "stdout", ???? "output_type": "stream", ???? "text": [ ????? "0??? 0.959924\n", ????? "1??? 0.057490\n", ????? "2??? 0.395029\n", ????? "3??? 0.861239\n", ????? "4??? 0.332671\n", ????? "dtype: float64\n", ????? "0??? 0.959924\n", ????? "1??? 0.057490\n", ????? "2??? 0.395029\n", ????? "3??? 0.861239\n", ????? "dtype: float64\n", ????? "0??? 0.959924\n", ????? "1??? 0.057490\n", ????? "2??? 0.395029\n", ????? "3??? 0.861239\n", ????? "dtype: float64\n", ????? "0??? 0.959924\n", ????? "2??? 0.395029\n", ????? "4??? 0.332671\n", ????? "dtype: float64\n", ????? "0??? 20.000000\n", ????? "1??? 20.000000\n", ????? "2??? 20.000000\n", ????? "3???? 0.861239\n", ????? "4???? 0.332671\n", ????? "dtype: float64\n" ???? ] ??? } ?? ], ?? "source": [ ??? "#切片索引\n", ??? "s=pd.Series(np.random.rand(5))\n", ??? "print(s)\n", ??? "print(s[0:4])\n", ??? "print(s[:-1])\n", ??? "print(s[::2])\n", ??? "#修改值\n", ??? "s[:-2]=20\n", ??? "print(s)\n" ?? ] ? }, ? { ?? "cell_type": "code", ?? "execution_count": 45, ?? "id": "949b7461", ?? "metadata": {}, ?? "outputs": [ ??? { ???? "name": "stdout", ???? "output_type": "stream", ???? "text": [ ????? "0??? 64.755105\n", ????? "1??? 50.714969\n", ????? "2??? 52.834138\n", ????? "3??? 89.628520\n", ????? "4??? 69.999119\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "0??? 64.755105\n", ????? "1??? 50.714969\n", ????? "2??? 52.834138\n", ????? "3??? 89.628520\n", ????? "4????????? NaN\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "0???? True\n", ????? "1??? False\n", ????? "2??? False\n", ????? "3???? True\n", ????? "4??? False\n", ????? "dtype: bool <class 'pandas.core.series.Series'>\n", ????? "##############################\n", ????? "1??? 50.714969\n", ????? "2??? 52.834138\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "0??? False\n", ????? "1??? False\n", ????? "2??? False\n", ????? "3??? False\n", ????? "4???? True\n", ????? "dtype: bool\n", ????? "##############################\n", ????? "0???? True\n", ????? "1???? True\n", ????? "2???? True\n", ????? "3???? True\n", ????? "4??? False\n", ????? "dtype: bool\n", ????? "##############################\n", ????? "4?? NaN\n", ????? "dtype: float64\n" ???? ] ??? } ?? ], ?? "source": [ ??? "#布尔索引\n", ??? "np.random.seed(88)\n", ??? "s=pd.Series(np.random.rand(5)*100)\n", ??? "print(s)\n", ??? "print('#'*30)\n", ??? "s[4]=None\n", ??? "print(s)\n", ??? "print('#'*30)\n", ??? "bol=s>55\n", ??? "print(bol,type(bol))\n", ??? "print('#'*30)\n", ??? "#通过布尔series获取值\n", ??? "print(s[s<55])\n", ??? "print('#'*30)\n", ??? "#查看series的方法\n", ??? "bol2=s.isnull()\n", ??? "print(bol2)\n", ??? "'''返回false没有缺失值返回true有缺失值 s.nonull()函数则相反'''\n", ??? "print('#'*30)\n", ??? "bol3=s.notnull()\n", ??? "print(bol3)\n", ??? "print('#'*30)\n", ??? "#获取空值\n", ??? "print(s[bol2])" ?? ] ? }, ? { ?? "cell_type": "code", ?? "execution_count": 62, ?? "id": "6791fc89", ?? "metadata": { ??? "scrolled": true ?? }, ?? "outputs": [ ??? { ???? "name": "stdout", ???? "output_type": "stream", ???? "text": [ ????? "0??? 64.755105\n", ????? "1??? 50.714969\n", ????? "2??? 52.834138\n", ????? "3??? 89.628520\n", ????? "4??? 69.999119\n", ????? "5??? 71.429710\n", ????? "6??? 71.733838\n", ????? "7??? 22.281946\n", ????? "8??? 17.515452\n", ????? "9??? 45.684149\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "0??? 64.755105\n", ????? "dtype: float64\n", ????? "9??? 45.684149\n", ????? "dtype: float64\n" ???? ] ??? } ?? ], ?? "source": [ ??? "#pandas数据结构series技巧----数据查看,重新索引,对齐,增,删,改\n", ??? "#数据查看\n", ??? "np.random.seed(88)\n", ??? "s=pd.Series(np.random.rand(10)*100)\n", ??? "print(s)\n", ??? "print('#'*30)\n", ??? "'''head,tail方法'''\n", ??? "print(s.head(1))\n", ??? "print(s.tail(1))\n", ??? "\n" ?? ] ? }, ? { ?? "cell_type": "code", ?? "execution_count": 88, ?? "id": "c97af1c5", ?? "metadata": {}, ?? "outputs": [ ??? { ???? "name": "stdout", ???? "output_type": "stream", ???? "text": [ ????? "0??? 64.755105\n", ????? "1??? 50.714969\n", ????? "2??? 52.834138\n", ????? "3??? 89.628520\n", ????? "4??? 69.999119\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "c?? NaN\n", ????? "d?? NaN\n", ????? "a?? NaN\n", ????? "e?? NaN\n", ????? "f?? NaN\n", ????? "dtype: float64\n", ????? "c??? 11.000000\n", ????? "d??? 11.000000\n", ????? "a??? 11.000000\n", ????? "e??? 11.000000\n", ????? "f??? 11.000000\n", ????? "0??? 64.755105\n", ????? "1??? 50.714969\n", ????? "2??? 52.834138\n", ????? "3??? 89.628520\n", ????? "4??? 69.999119\n", ????? "5??? 11.000000\n", ????? "dtype: float64\n" ???? ] ??? } ?? ], ?? "source": [ ??? "#重新索引\n", ??? "np.random.seed(88)\n", ??? "s=pd.Series(np.random.rand(5)*100)\n", ??? "print(s)\n", ??? "print('#'*30)\n", ??? "'''重新索引后面value为null'''\n", ??? "s1=s.reindex(['c','d','a','e','f'])\n", ??? "print(s1)\n", ??? "s2=s.reindex(['c','d','a','e','f',0,1,2,3,4,5],fill_value=11)\n", ??? "print(s2)" ?? ] ? }, ? { ?? "cell_type": "code", ?? "execution_count": 94, ?? "id": "43356734", ?? "metadata": {}, ?? "outputs": [ ??? { ???? "name": "stdout", ???? "output_type": "stream", ???? "text": [ ????? "a??? 64.755105\n", ????? "b??? 50.714969\n", ????? "c??? 52.834138\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "a??? 89.628520\n", ????? "e??? 69.999119\n", ????? "f??? 71.429710\n", ????? "dtype: float64\n", ????? "##############################\n", ????? "a??? 154.383625\n", ????? "b?????????? NaN\n", ????? "c?????????? NaN\n", ????? "e?????????? NaN\n", ????? "f?????????? NaN\n", ????? "dtype: float64\n" ???? ] ??? } ?? ], ?? "source": [ ??? "#对齐\n", ??? "np.random.seed(88)\n", ??? "s1=pd.Series(np.random.rand(3)*100,index=['a','b','c'])\n", ??? "s2=pd.Series(np.random.rand(3)*100,index=['a','e','f'])\n", ??? "print(s1)\n", ??? "print('#'*30)\n", ??? "print(s2)\n", ??? "print('#'*30)\n", ??? "print(s1+s2)" ?? ] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "faa8a408", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "e32f916a", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "1d54af1d", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "b5fcdd26", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "28345851", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "d1ff6b29", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "fb30e137", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "51ac6517", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "fa8f3c3a", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "e5e09818", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? }, ? { ?? "cell_type": "code", ?? "execution_count": null, ?? "id": "8fdfb24a", ?? "metadata": {}, ?? "outputs": [], ?? "source": [] ? } ?], ?"metadata": { ? "kernelspec": { ?? "display_name": "Python 3", ?? "language": "python", ?? "name": "python3" ? }, ? "language_info": { ?? "codemirror_mode": { ??? "name": "ipython", ??? "version": 3 ?? }, ?? "file_extension": ".py", ?? "mimetype": "text/x-python", ?? "name": "python", ?? "nbconvert_exporter": "python", ?? "pygments_lexer": "ipython3", ?? "version": "3.8.8" ? } ?}, ?"nbformat": 4, ?"nbformat_minor": 5 }
|