22043104+范纬.zip

行业研究

3.52MB

66 需要积分: 1

立即下载

资源介绍:

22043104+范纬.zip

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D

import warnings

warnings.filterwarnings(action = 'ignore')

%matplotlib inline

plt.rcParams['font.sans-serif']=['SimHei']

plt.rcParams['axes.unicode_minus']=False

from sklearn.datasets import make_blobs

from sklearn.feature_selection import f_classif

from sklearn import decomposition

from sklearn.cluster import KMeans,AgglomerativeClustering

from sklearn.metrics import silhouette_score,calinski_harabasz_score

import scipy.cluster.hierarchy as sch

from itertools import cycle

from matplotlib.patches import Ellipse

from sklearn.mixture import GaussianMixture

N=100

X1, y1 = make_blobs(n_samples=N, centers=4, n_features=2,random_state=0) #2 特征

X2, y2 = make_blobs(n_samples=N, centers=4, n_features=3,random_state=123) #3 特征

print('y1=',y1)

print('y2=',y2)

y1= [0 3 0 0 0 0 2 3 0 3 3 3 3 3 3 1 1 2 2 1 0 3 2 1 0 2 2 0 1 1 1 3 1

1 2 0 3

1 3 2 0 2 3 2 2 3 1 2 0 0 0 1 2 2 2 3 3 1 1 3 3 1 1 0 1 3 2 2 1 0 3 1

0 3

0 0 2 2 1 1 1 3 2 0 1 2 1 1 0 0 0 2 0 2 2 3 3 2 3 0]

y2= [2 0 1 3 2 2 1 0 2 1 0 1 1 0 1 3 0 0 3 1 0 3 1 0 3 1 1 0 2 2 0 3 3

3 3 2 0

0 3 1 2 0 3 0 2 2 2 2 0 2 1 0 1 3 0 1 2 3 0 1 1 2 2 3 2 3 3 3 1 1 0 3

2 2

0 1 2 3 2 3 1 1 0 2 0 2 3 3 0 1 1 1 3 3 2 0 1 2 3 0]

plt.figure(figsize=(18,12))

plt.subplot(121)

plt.scatter(X1[:,0],X1[:,1],s=50)

plt.xlabel("X1-1")

plt.ylabel("X1-2")

plt.title("%d 个样本观测点的分布"%N)

ax=plt.subplot(122, projection='3d')

ax.scatter(X2[:,0],X2[:,1],X2[:,2],c='blue')

ax.set_xlabel("X2-1")

ax.set_ylabel("X2-2")

ax.set_zlabel("X2-3")

ax.set_title("%d 个样本观测点的分布"%N)

Text(0.5, 0.92, '100 个样本观测点的分布')

KM= KMeans(n_clusters=4, max_iter = 500) # 建立二特征数据 KMeans 模型

KM.fit(X1) #训练

labels=np.unique(KM.labels_) #预测

print('labels=',labels)

#预测结果可视化

markers='o*^+'

for i,label in enumerate(labels): #分别绘制每一个小类数据

plt.scatter(X1[KM.labels_==label,0],X1[KM.labels_==label,1],

label="cluster %d"%label,marker=markers[i],s=50)

plt.scatter(KM.cluster_centers_[:,0],KM.cluster_centers_[:,1],marker='X',

s=60,c='r',label="小类中心") #绘制小类中心

plt.legend(loc="best",framealpha=0.5)

plt.xlabel("X1-1")

plt.ylabel("X1-2")

plt.title("%d 个样本观测点的聚类结果"%N)

labels= [0 1 2 3]

Out[9]:

Text(0.5, 1.0, '100 个样本观测点的聚类结果')

KM= KMeans(n_clusters=4, max_iter = 500) # 建立三特征数据

KMeans 模型

KM.fit(X2) #训练

labels=np.unique(KM.labels_) #预测

#(获得聚类标签，聚类解存储在 K-均值聚类对象的.labels_属性中)

#预测结果可视化

#(利用 for 循环可视化聚类解，即以不同颜色和形状的符号分别绘制各小类的

散点图)

ax=plt.subplot(111, projection='3d')

markers='o*^+'

for i,label in enumerate(labels): #分别绘制每一个小类

数据

ax.scatter(X2[KM.labels_==label,0],X2[KM.labels_==labe

l,1],X2[KM.labels_==label,2],

label="cluster %d"%label,marker=markers[i],s=50)

ax.scatter(KM.cluster_centers_[:,0],KM.cluster_centers_[:

,1],KM.cluster_centers_[:,2], marker='X',s=60,c='r',label

="小类中心") #绘制小类中心

#小类的类质心坐标存储在 K-均值对象的 cluster_centers_属性中

ax.legend(loc="best",framealpha=0.5)

ax.set_xlabel("X2-1")

ax.set_ylabel("X2-2")

ax.set_zlabel("X2-3")

ax.set_title("%d 个样本观测点的聚类结果"%N)

资源文件列表:

22043104+范纬.zip 大约有11个文件

22043104+范纬/
22043104+范纬/第一次作业.docx 279.18KB
22043104+范纬/第七次作业.docx 141.03KB
22043104+范纬/第三次作业.docx 124.77KB
22043104+范纬/第九次作业.docx 1.01MB
22043104+范纬/第二次作业.docx 80.66KB
22043104+范纬/第五次作业.docx 552.54KB
22043104+范纬/第八次作业.docx 664.06KB
22043104+范纬/第六次作业.docx 149.53KB
22043104+范纬/第十次作业.docx 594.46KB
22043104+范纬/第四次作业.docx 133.33KB