python代码大全和用法(python代码命令大全)
2023-01-21 canyinms.com 【 字体:大 中 小 】
python代码大全和用法,python,代码命令大全。小编来告诉你更多相关信息。
一些常用的python代码合集,方便检索引用
模块1:读写excel文件
from datetime import datetime
import odps
import xlwt
import os
from odps import DataFrame
import pandas as pd
import xlrd
import numpy as np
from collections import defaultdict
from collections import Counter
def write_imf(fl_save_path, data):
sh = wb.add_sheet(u\'data\', cell_overwrite_ok=True)
colnames = data.columns.values
for i in range(0, data.shape[1]):
sh.write(0, i, colnames[i])
for i in range(1, len(data) + 1):
for j in range(0, data.shape[1]):
value = data.iloc[i - 1, j]
try:
value.dtype
if value.dtype == \'int64\':
value = int(value)
if value.dtype == \'float64\':
value = float(value)
except(RuntimeError, TypeError, NameError, ValueError, AttributeError):
pass
sh.write(i, j, value)
wb.save(fl_save_path)
print(\'congratulation save successful!\')
def save_pd_to_csv(fl_save_path, data):
return True
except:
return False
def get_excel_content(file_path):
wb = xlrd.open_workbook(file_path, encoding_override=\'utf-8\')
wb_cont_imf = []
df = pd.DataFrame(wb_cont_imf[1:], columns=wb_cont_imf[0])
return df
模块2:获取各种时间
def getMonthFirstDayAndLastDay(year=None, month=None):
:param year: 年份,默认是本年,可传int或str类型
:param month: 月份,默认是本月,可传int或str类型
:return: firstDay: 当月的第一天,datetime.date类型
lastDay: 当月的最后一天,datetime.date类型
if year:
year = int(year)
else:
year = datetime.date.today().year
if month:
month = int(month)
else:
month = datetime.date.today().month
firstDayWeekDay, monthRange = calendar.monthrange(year, month)
firstDay = datetime.date(year=year, month=month, day=1)
lastDay = datetime.date(year=year, month=month, day=monthRange)
return lastDay
模块3:pd中的dataframe转png
def render_mpl_table(data, col_width=5.0, row_height=0.625, font_size=1,
bbox=[0, 0, 1, 1], header_columns=0,
ax=None,**kwargs):
if ax is None:
plt.style.use(\'ggplot\')
ax.axis(\'off\')
mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
mpl_table.auto_set_font_size(False)
mpl_table.set_fontsize(font_size)
for k, cell in six.iteritems(mpl_table._cells):
cell.set_edgecolor(edge_color)
nrow = k[0]
ncol = k[1]
if nrow == 0 or ncol < header_columns:
cell.set_text_props(weight=\'bold\', color=\'w\')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0] % len(row_colors)])
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0)
plt.margins(0, 0)
return ax
模块4:绘制词云
_author_ = \'xisuo\'
import datetime
import calendar
import xlwt
import os
import pandas as pd
import xlrd
import openpyxl
import numpy as np
from collections import defaultdict
import platform
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt
from PIL import Image
def create_wordcloud(docs=None,imgs=None,filename=None):
:param docs:读入词汇txt,尽量不重复
:param imgs: 读入想要生成的图形,网上随便找
:param filename: 保存图片文件名
:return:
text = open(os.path.join(current_file, docs)).read()
alice_mask = np.array(Image.open(os.path.join(current_file, imgs)))
print(font_path)
wc = WordCloud(background_color=\"white\",
max_words=2000,
mask=alice_mask,
stopwords=STOPWORDS.add(\"said\")
)
wc.generate(text)
if filename is None:filename=\"词云结果.png\"
wc.to_file(os.path.join(current_file, filename))
def main():
create_wordcloud(docs=docs,imgs=imgs,filename=filename)
print(\'create wordcloud successful\')
if __name__ == \'__main__\':
start_time = datetime.datetime.now()
print(\'start running program at:%s\' % start_time)
systemp_type = platform.system()
if (systemp_type == \'Windows\'):
font_path=\'simfang.ttf\'
try:
current_path = os.getcwd()
except:
current_path = os.path.dirname(__file__)
current_file = os.path.join(current_path, \'docs\')
current_file = current_path
elif (systemp_type == \'Linux\'):
font_path = \'Arial Unicode MS.ttf\'
else:
quit()
if not os.path.exists(current_file):
os.mkdir(current_file)
print(\'目录中部存在docs文件夹,完成新文件夹创建过程。\')
print(\'当前操作系统:%s,文件存储路径为:%s\' % (systemp_type, current_file))
main()
end_time = datetime.datetime.now()
tt = end_time - start_timepython
print(\'ending time:%s\', end_time)
print(\'this analysis total spend time:%s\' % tt.seconds)
模块5:下载ppt素材
_author_ = \'xisuo\'
import urllib.request
import requests
from bs4 import BeautifulSoup
from lxml import etree
import os
response=requests.get(url).text
html=etree.HTML(response)
src_list=html.xpath(\'//div/article/p/img/@src\')
current_path=os.path.dirname(__file__)
save_path=os.path.join(current_path,\'ppt_img\')
if os.path.exists(save_path):
os.mkdir(save_path)
print(\'img folder create successful\')
for src in src_list:
save_img_path=os.path.join(save_path,\'%d.jpg\'%i)
with open(save_img_path,\'wb\') as f:
f.write(urllib.request.urlopen(src).read())
f.close()
i=i+1
print(\'save true\')
except Exception as e:
print(\'save img fail\')
模块6:模型存储和读取
rom sklearn import joblib
from sklearn import svm
from sklearn2pmml import PMMLPipeline, sklearn2pmml
import pickle
def save_model(train_X,train_y):
save model
:return:
clf = svm.SVC()
clf.fit(X, y)
joblib.dump(clf, \"train_model.m\")
sklearn2pmml(clf, \"train_model.pmml\")
with open(\'train_model.pickle\', \'wb\') as f:
pickle.dump(clf, f)
return True
def load_model():
laod model
:return:
clf_joblib=joblib.load(\'train_model.m\')
clf_pickle== pickle.load(open(\'linearregression.pickle\',\'rb\'))
return clf_joblib,clf_pickle
模块7:TF-IDF
import time
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
print(\'开始读取KeyTag标签...\')
read_data_path = \'D:/untitled/incomelevel_kwtag_20190801.txt\'
load_data = pd.read_csv(read_data_path, sep=\'\\t\',encoding=\'utf-8\')
data = pd.DataFrame(load_data,columns = [\'income_level\',\'kw_tag\'])
print(\'...读取KeyTag标签完成\')
print(\'开始分组处理KeyTag标签...\')
incomelevel_top = data[data[\'income_level\'] == \'高\']
kw_tag_top = \' \'.join(incomelevel_top[\'kw_tag\'])
print(\'kw_tag_top : \\n\',kw_tag_top)
incomelevel_mid = data[data[\'income_level\'] == \'中\']
kw_tag_mid = \' \'.join(incomelevel_mid[\'kw_tag\'])
print(\'kw_tag_mid : \\n\',kw_tag_mid)
incomelevel_low = data[data[\'income_level\'] == \'低\']
kw_tag_low = \' \'.join(incomelevel_low[\'kw_tag\'])
print(\'kw_tag_low : \\n\',kw_tag_low)
print(\'...分组处理KeyTag标签完成\')
vectorizer = CountVectorizer()
result = vectorizer.fit_transform([kw_tag_top, kw_tag_mid, kw_tag_low])
transformer = TfidfVectorizer()
kw_tag_score = transformer.fit_transform([kw_tag_top, kw_tag_mid, kw_tag_low])
print(\'...KeyTag分词结束\')
kw_tag_value = transformer.get_feature_names()
result_target = pd.DataFrame(kw_tag_value,columns = [\'kw_tag\'])
print(\'result_target : \\n\',result_target)
tf_score = kw_tag_score.toarray()
print(\'tf_score : \\n\',tf_score)
kw_tag_score_mid = pd.DataFrame(tf_score[1],columns = [\'kw_tag_score_mid\'])
kw_tag_score_low = pd.DataFrame(tf_score[2],columns = [\'kw_tag_score_low\'])
print(len(kw_tag_score_top))
模块8:生成省市地图
import time
import pandas as pd
import xlrd
import re
import matplotlib.pyplot as plt
import six
import numpy as np
from pyecharts.render import make_snapshot
from snapshot_phantomjs import snapshot
from pyecharts import options as opts
from collections import defaultdict
from pyecharts.charts import Bar, Geo, Map, Line,Funnel,Page
import os
from example.commons import Faker
def create_zjs_map():
folder_path = os.getcwd()
file_name = \"白皮书数据地图.xlsx\"
file_path = os.path.join(folder_path, file_name)
dat = get_excel_content(file_path, sheet_name=\"省份地图\")
df = dat[[\'城市\', \'渗透率\']]
df.columns = [\'city\', \'penarate\']
print(df)
citys = df[\'city\'].values.tolist()
values = df[\'penarate\'].values.tolist()
print(citys)
print(\'{:.0f}%\'.format(max(values)*100),\'{:.0f}%\'.format(min(values)*100))
city_name=\'浙江\'
penetration_map = (
Map(init_opts=opts.InitOpts(width=\'1200px\', height=\'1000px\', bg_color=\'white\'))
.add(\"{}透率分布\".format(city_name), [list(z) for z in zip(citys, values)], city_name)
.set_series_opts(
label_opts=opts.LabelOpts(
is_show=True,
font_size=15
)
)
.set_global_opts(
visualmap_opts=opts.VisualMapOpts(
is_show=True,
max_=max(values),
min_=min(values),
is_calculable=False,
orient=\'horizontal\',
split_number=3,
range_text=[\'{:.0f}%\'.format(max(values)*100),\'{:.0f}%\'.format(min(values)*100)],
pos_left=\'10%\',
pos_bottom=\'15%\'
),
legend_opts=opts.LegendOpts(is_show=False)
)
make_snapshot(snapshot, penetration_map.render(), \"zj_map.png\")
print(\'保存 zj_map.png\')
return penetration_map
def create_county_map(city_name):
folder_path = os.getcwd()
file_name = \"白皮书数据地图.xlsx\"
file_path = os.path.join(folder_path, file_name)
dat = get_excel_content(file_path, sheet_name=\"城市地图\")
df = dat[[\'city\', \'county\', \'penarate\']][dat.city == city_name]
citys = df[\'county\'].values.tolist()
values = df[\'penarate\'].values.tolist()
max_insurance = max(values)
print(citys)
province_penetration_map = (
Map(init_opts=opts.InitOpts(width=\'1200px\', height=\'1000px\', bg_color=\'white\'))
.add(\"{}透率分布\".format(city_name), [list(z) for z in zip(citys, values)], reg.sub(\'\',city_name))
.set_series_opts(
label_opts=opts.LabelOpts(
is_show=True,
font_size=15
)
)
.set_global_opts(
visualmap_opts=opts.VisualMapOpts(
is_show=True,
max_=max(values),
min_=min(values),
is_calculable=False,
orient=\'horizontal\',
split_number=3,
range_text=[\'{:.0f}%\'.format(max(values) * 100), \'{:.0f}%\'.format(min(values) * 100)],
pos_left=\'10%\',
pos_bottom=\'5%\'
),
legend_opts=opts.LegendOpts(is_show=False)
)
make_snapshot(snapshot, province_penetration_map.render(), \"city_map_{}.png\".format(city_name))
print(\'保存 city_map_{}.png\'.format(city_name))
return province_penetration_map
def create_funnel_label():
folder_path=os.getcwd()
file_name = \"白皮书数据地图.xlsx\"
file_path = os.path.join(folder_path, file_name)
dat = get_excel_content(file_path, sheet_name=\"漏斗图\")
df = dat[[\'category\', \'cnt\']]
print(df)
category = df[\'category\'].values.tolist()
values = df[\'cnt\'].values.tolist()
funnel_map = (
Funnel(init_opts=opts.InitOpts(width=\'1200px\', height=\'1000px\', bg_color=\'white\'))
.add(\"漏斗图\", [list(z) for z in zip(category, values)])
.set_series_opts(
label_opts=opts.LabelOpts(
position=\'inside\',
font_size=16,
)
)
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=False)
)
make_snapshot(snapshot, funnel_map.render(), \"funnel.png\")
print(\'保存 funnel.png\')
return funnel_map
city_list=[\'温州市\',\'杭州市\',\'绍兴市\',\'嘉兴市\',\'湖州市\',\'宁波市\',\'金华市\',\'台州市\',\'衢州市\',\'丽水市\',\'舟山市\']
for city_name in city_list:
create_county_map(city_name)
猜你喜欢
云南省玉溪市2023-03-24 15:26发布大风蓝色预警
福建省漳州市2023-03-24 16:17发布雷电黄色预警
福建省龙岩市2023-03-24 15:48发布雷电黄色预警
广西壮族自治区桂林市2023-03-24 17:35发布雷电黄色预警
甘肃省陇南市2023-03-24 18:00发布道路结冰黄色预警
吉林省松原市2023-03-24 15:55发布森林火险黄色预警
福建省福州市2023-03-24 15:50发布暴雨蓝色预警
广西壮族自治区贺州市2023-03-24 16:34发布雷电黄色预警
广西壮族自治区桂林市2023-03-24 15:48发布雷电黄色预警
江西省赣州市2023-03-24 18:38发布雷电黄色预警
如何自制蛋挞
肉火烧面怎么和面
奶茶几分糖好喝
过桥米线是哪里的?
面霜和乳液的区别
怎么摘隐形眼镜
粉饼和散粉的区别
冷烫和热烫的区别
高品质香水如何鉴别?用三步就可以解决
如何去除黑眼圈