{"id":2576,"date":"2022-09-02T17:16:28","date_gmt":"2022-09-02T09:16:28","guid":{"rendered":"http:\/\/cnliutz.uicp.io\/?p=2576"},"modified":"2022-09-02T17:16:28","modified_gmt":"2022-09-02T09:16:28","slug":"python%e6%95%b0%e6%8d%ae%e6%94%b6%e9%9b%86%ef%bc%8c%e6%95%b4%e7%90%86%ef%bc%8c%e5%8f%98%e6%8d%a2%e3%80%81%e5%88%86%e6%9e%90-%e9%9a%8f%e6%9c%ba%e6%a3%ae%e6%9e%97","status":"publish","type":"post","link":"http:\/\/g1n29wqq.ipyingshe.net:5347\/?p=2576","title":{"rendered":"python\u6570\u636e\u6536\u96c6\uff0c\u6574\u7406\uff0c\u53d8\u6362\u3001\u5206\u6790&#8211;\u968f\u673a\u68ee\u6797"},"content":{"rendered":"\n<pre class=\"wp-block-code\"><code>#\u6570\u636e\u96c6\u4e0b\u8f7d\u5730\u5740wine quality dataset\uff1ahttps:\/\/www.kaggle.com\/datasets\/shelvigarg\/wine-quality-dataset\n#\u5206\u6790\u65f6\u8bf7\u5220\u9664type\u7b2c\u4e00\u5217type\n#\u4ee3\u7801win11\u4e13\u4e1a\u7248\uff0821H2\uff09+python3.8.5\u8fd0\u884c\u901a\u8fc7\nimport pandas as pd\nimport seaborn as sns\nimport matplotlib.pyplot as plt\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import GridSearchCV\nimport warnings\nwarnings.filterwarnings('ignore')\ntry:\n    wine = pd.read_csv('C:\\\\Users\\\\czliu\\\\Documents\\\\python\\\\winequalityN.csv',sep=',')\n    \nexcept:\n    print(\"Cannot find the file!\")\n    \nwine.info() #\u663e\u793a\u6570\u636e\u4fe1\u606f\n\n'''\n&lt;class 'pandas.core.frame.DataFrame'>\nRangeIndex: 6497 entries, 0 to 6496\nData columns (total 13 columns):\n #   Column                Non-Null Count  Dtype  \n---  ------                --------------  -----  \n 0   type                  6497 non-null   object \n 1   fixed acidity         6487 non-null   float64\n 2   volatile acidity      6489 non-null   float64\n 3   citric acid           6494 non-null   float64\n 4   residual sugar        6495 non-null   float64\n 5   chlorides             6495 non-null   float64\n 6   free sulfur dioxide   6497 non-null   float64\n 7   total sulfur dioxide  6497 non-null   float64\n 8   density               6497 non-null   float64\n 9   pH                    6488 non-null   float64\n 10  sulphates             6493 non-null   float64\n 11  alcohol               6497 non-null   float64\n 12  quality               6497 non-null   int64  \ndtypes: float64(11), int64(1), object(1)\nmemory usage: 660.0+ KB\n==================================\ndataframe \u7f3a\u5931\u503c\u7684\u5904\u7406\uff1a\ndf.dropna(how='all')#\u5220\u9664\u6240\u6709\u5185\u5bb9\u5747\u4e3a\u7f3a\u5931\u503c\u7684\u884c\ndf.dropna(axis=1) #\u4e22\u5f03\u6709\u7f3a\u5931\u503c\u7684\u5217\ndf.dropna(axis=1, how = 'all') #\u4e22\u5f03\u6240\u6709\u5217\u4e2d\u6240\u6709\u503c\u5747\u7f3a\u5931\u7684\u5217\ndf.dropna(axis=0, subset=&#91;'name', 'age'])#\u4e22\u5f03name\u548cage\u8fd9\u4e24\u5217\u4e2d\u6709\u7f3a\u5931\u503c\u7684\u884c\n'''\nwine.duplicated().sum()  #\u663e\u793a\u91cd\u590d\u8bb0\u5f55\u8def\u6570\nwine = wine.drop_duplicates()  #\u53bb\u9664\u91cd\u590d\u6570\u636e\n\nprint('-'*25+'\u63cf\u8ff0\u7edf\u8ba1\u8ba1\u7b97')\nprint(wine.describe())  #\u63cf\u8ff0\u7edf\u8ba1\u8ba1\u7b97\n'''\n       fixed acidity  volatile acidity  ...      alcohol      quality\ncount    6487.000000       6489.000000  ...  6497.000000  6497.000000\nmean        7.216579          0.339691  ...    10.491801     5.818378\nstd         1.296750          0.164649  ...     1.192712     0.873255\nmin         3.800000          0.080000  ...     8.000000     3.000000\n25%         6.400000          0.230000  ...     9.500000     5.000000\n50%         7.000000          0.290000  ...    10.300000     6.000000\n75%         7.700000          0.400000  ...    11.300000     6.000000\nmax        15.900000          1.580000  ...    14.900000     9.000000\n\n&#91;8 rows x 12 columns]\n'''\nwine.quality.value_counts()  #\u5bf9qualiy\u5206\u7ec4\u8ba1\u6570\nprint('-'*25)\n'''\n6    2836\n5    2138\n7    1079\n4     216\n8     193\n3      30\n9       5\nName: quality, dtype: int64\n'''\nwine.quality.value_counts().plot(kind='pie',autopct ='%.2f')\n\nprint(wine.corr().quality)\n\n'''\n\u76f8\u5173\u7cfb\u6570\u8ba1\u7b97\nfixed acidity          -0.077031\nvolatile acidity       -0.265953\ncitric acid             0.085706\nresidual sugar         -0.036825\nchlorides              -0.200886\nfree sulfur dioxide     0.055463\ntotal sulfur dioxide   -0.041385\ndensity                -0.305858\npH                      0.019366\nsulphates               0.038729\nalcohol                 0.444319\nquality                 1.000000\nName:quality, dtype: float64\n'''\nsns.barplot(x ='quality',y='volatile acidity',data=wine) \nsns.barplot(x ='quality',y='alcohol',data=wine)\n\n#\u6570\u636e\u5206\u7ec4\u53d8\u6362\uff0c\u4ea7\u751f\u65b0\u53d8\u91cflabel\nfrom sklearn.preprocessing import LabelEncoder\nbins = (2,4,6,11) #zuokai\ngroup_names = &#91;'low','medium','high']\nwine&#91;'quality_lb'] = pd.cut(wine&#91;'quality'],bins=bins,labels = group_names)\nlb_quality = LabelEncoder()\nwine&#91;'label']=lb_quality.fit_transform(wine&#91;'quality_lb'])\nprint(wine.label.value_counts())\nwine_copy = wine.copy()\nwine.drop(&#91;'quality','quality_lb'],axis=1,inplace = True)\nx = wine.iloc&#91;:,:-1]\ny = wine.label\nprint(x)\nprint('-'*25)\nprint(y)\nprint('='*25)     \nfrom sklearn.model_selection import train_test_split\nX_train,X_test,y_train,y_test = train_test_split(x,y,test_size = 0.2)\nfrom sklearn.preprocessing import scale\nX_train = scale(X_train)   #\u6570\u636e\u6807\u5316\nX_test = scale(X_test)\n\n#\u968f\u673a\u68ee\u6797\u6a21\u578b\u9884\u6d4b\u7ed3\u679c--\u6df7\u6dc6\u77e9\u9635\nfrom sklearn.metrics import confusion_matrix\nrfc = RandomForestClassifier(n_estimators = 200) #\u5efa\u7acb\u5b50\u6811\u7684\u6570\u91cf200\nrfc.fit(X_train,y_train)\ny_pred = rfc.predict(X_test)\nprint(confusion_matrix(y_test,y_pred))  #\u6df7\u6dc6\u77e9\u9635\uff0c\u884c\u4ee3\u8868\u5b9e\u9645\u7684\u5185\u5bb9\uff08\u9884\u6d4b\u7ed3\u679c\uff09\uff0c\u5217\u4ee3\u8868\u9884\u6d4b\u503c\uff08\u5206\u7c7b\uff09 \n#\u53c2\u89c1\u5f20\u8389cousera.org&lt;\u7528python\u73a9\u8f6c\u6570\u636e>\u8bfe\u7a0b\uff0c\u7b2c\u56db\u5468\u7b2c\u4e8c\u90e8\u5206\u3002\n@\u6ce8\u610f\u53bb\u9664\u6587\u4ef6\u4e2d\u7684\u7f3a\u5931\u6570\u636e<\/code><\/pre>\n\n\n\n<div class=\"wp-block-file\"><a id=\"wp-block-file--media-99131ab8-1857-4639-8319-f908d5b67272\" href=\"http:\/\/cnliutz.uicp.io\/wp-content\/uploads\/2022\/09\/winequalityN.csv\">\u6240\u9700\u6570\u636e\u96c6  winequalityN<\/a><a href=\"http:\/\/cnliutz.uicp.io\/wp-content\/uploads\/2022\/09\/winequalityN.csv\" class=\"wp-block-file__button\" download aria-describedby=\"wp-block-file--media-99131ab8-1857-4639-8319-f908d5b67272\">\u4e0b\u8f7d<\/a><\/div>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[2,10],"tags":[],"class_list":["post-2576","post","type-post","status-publish","format-standard","hentry","category-2","category-python"],"_links":{"self":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/2576","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2576"}],"version-history":[{"count":0,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/2576\/revisions"}],"wp:attachment":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2576"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2576"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2576"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}