'''
クラス名: rssi_data
- コンストラクタ
パラメータセットのみ(年月日, 地点名, エリア名, デバイス(mon, all, ... ))
- read_data() : データの読み込み
引数無し、ファイルがないかサイズ0のときFalseを返す
- get_segmented_rssi():一日のrssi強度出現頻度を返す
返り値 DataFrame('rssi', 頻度, 頻度の割合) 列名は、rssi, 年月日, rate年月日
- get_hourly_segmented_rssi(glbit) 指定日の時間ごとの頻度を返す
引数 glbit (デフォルト 指定なし)
返り値 DataFrame ('rssi', '0', '1', ..., '23')
- get_rssi_ave_med(): rssiの平均値と中央値を返す
- get_hourly_rssi_ave():時間ごとのrssi平均値を返す
'''
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter # 時間軸のフォーマットを自在に
import numpy as np
import os
import matplotlib.pyplot as plt
import sys
sys.path.append("/home/raspimngr/py_modules")
from wifi_sensors import * # ローカルモジュール
class rssi_data:
file_str = "/home/raspimngr/csv/%s/points/%s/daily/%s_%s.csv"
def __init__(self, day, point, area="fuefuki", dev_ext="mon"):
self.day = day
self.point = point
self.area =area
self.dev_ext=dev_ext
def read_data (self):
# データ読み込み
self.cut_range = list(range(-100, -30,5))
self.labels = list(range(-100, -35, 5))
filename = self.file_str % (self.area, self.point, self.day, self.dev_ext)
if not os.path.isfile(filename) or os.path.getsize(filename) == 0:
return False
self.point_df = pd.read_csv(filename, sep=",",
names=["unix_time","mac", "freq",
"rssi", "mac_resolv", "glbit"])
#unix_timeをdatetime型に変換
self.point_df["datetime"] = self.point_df.unix_time.apply(lambda x:
datetime.fromtimestamp(x))
self.point_df["hour"] = self.point_df['datetime'].dt.hour # データに時刻の列を付加
return True
def get_segmented_rssi(self):
# rssi値ごとの出現頻度を返す
day = self.day
toyota_cut = pd.cut(self.point_df.rssi, self.cut_range, labels=self.labels).value_counts()
tmp_df = pd.DataFrame(toyota_cut).reset_index()
freq_df = tmp_df.sort_values("index")
freq_df = freq_df.rename(columns={"index":"rssi", "rssi":day})
freq_df["rate"+day] = freq_df[day]/freq_df[day].sum()
freq_df =freq_df.astype({"rssi": int})
return freq_df
def get_hourly_segmented_rssi(self, glbit=-1):
# 1時間ごとのrssi強度分布を得る
self.hourly_freq_df = pd.DataFrame({"rssi": self.labels})
if glbit==1:
self.point_df = self.point_df[self.point_df["glbit"]==1]
for i in range(24):
temp_df = self.point_df[self.point_df['hour']==i]
temp_df = pd.DataFrame(pd.cut(temp_df.rssi, self.cut_range, labels=self.labels)
.value_counts()).reset_index().sort_values("index")
temp_df = temp_df.rename(columns={"index": "rssi", "rssi": str(i)})
self.hourly_freq_df = pd.merge(self.hourly_freq_df, temp_df, on="rssi")
self.hourly_freq_df = self.hourly_freq_df.astype({"rssi": int})
return self.hourly_freq_df
def get_rssi_ave_med(self):
# 日、地点指定でrssiの平均値、中央値を計算
return {"average": self.point_df["rssi"].mean(),
"median": self.point_df["rssi"].median()}
def get_hourly_rssi_ave(self):
# 1時間ごとの平均
rssi_mean = pd.DataFrame(point_df.groupby("hour").mean())
rssi_mean["size"] = point_df.groupby("hour").size()
#result = pd.merge(rssi_mean, total_num, on="hour")
return rssi_mean[{"rssi", "size"}]
# return {"average": point_df["rssi"].mean(), "median": point_df["rssi"].median()}
day1 = "20191005"
rssi1 = rssi_data(day1, "ff01", "fuefuki")
rssi1.read_data()
query1 = rssi1.get_segmented_rssi()
query1
# 複数地点のrssi強度分布を並べて作図
def plot_rssi_distribution(points, days, area="fuefuki",
fig_size=(20,17), col_num_ini=3, rate=False, svg=False):
import matplotlib.pyplot as plt
fontsize = 14
plt.rcParams.update({'font.size': fontsize})
fig = plt.figure( figsize=fig_size)
plt.subplots_adjust(hspace=0.25)
if len(points) < col_num_ini:
col_num = len(points)
row_num = 1
else:
col_num = col_num_ini
row_num = int(len(points) / col_num_ini) + 1
if rate: # 比率を書くか、生のパケット数を書くか
col_pre = "rate"
else:
col_pre = ""
colorlist = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3',
'#ff7f00', '#ffff33', '#a65628', '#f781bf']
for fig_num, point in enumerate(points):
ax = fig.add_subplot(row_num, col_num, fig_num+1)
rssi_src = rssi_data(days[0], point, area=area)
if not rssi_src.read_data(): # データ読み込みに失敗したらスキップ
continue
result = rssi_src.get_segmented_rssi()
result.plot(x="rssi", y=col_pre + days[0], ax = ax,
title = "rssi強度分布(" + getPointById("ff",point)["短縮名"] + ")")
for i in range(1, len(days)):
rssi_src = rssi_data(days[i], point, area=area)
if not rssi_src.read_data(): # データ読み込みに失敗したらスキップ
continue
result = rssi_src.get_segmented_rssi()
result.plot(x="rssi", y = col_pre + days[i], ax = ax)
ax.set_xlabel("rssi (db)", fontsize=fontsize)
# ax.set_ylabel("")
if svg: # svg形式ファイルに保存
outfile_body = "rssi_distrib"
if rate:
outfile_body += "_rate"
plt.savefig(outfile_body + ".svg", bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
return True
points = ["ff01", "ff02", "ff03", "ff04", "ff05", "ff08"]
days = ["20191102", "20191103", "20191109", "20191110"]
plot_rssi_distribution(points, days, rate=False, svg=True)
# 出力テスト
day1 = "20191005"
rssi1 = rssi_data(day1, "ff01", "fuefuki")
rssi1.read_data()
query1 = rssi1.get_hourly_segmented_rssi()
query1
2019年分のデータをcsv形式で格納した (2020/07/04)
下のように読み込んでもよい
pd.read_csv("/home/raspimngr/csv/fuefuki/points/ff01/rssi/rssi_20191005_all_ff01.csv", sep=",")
temp_df = pd.read_csv("/home/raspimngr/csv/fuefuki/points/ff01/rssi/rssi_20191005_all_ff01.csv", sep=",")
(100*57+95*148+90*53+85*14)/(57+148+53+14)
# 作図
def plot_rssi_dist_hours(day, point, ax, hours):
#### 上のクラスを使う場合
# rssi1 = rssi_data(day, point, "fuefuki")
# if not rssi1.read_data():
# return False
# dt = rssi1.get_hourly_segmented_rssi()
#### ファイルから読み込む場合
filename = ("/home/raspimngr/csv/fuefuki/points/%s/rssi/rssi_%s_mon_%s.csv" %
(point, day, point))
dt = pd.read_csv(filename, sep=",")
dt = dt.set_index("rssi")
dt = dt.reset_index()
# dt.head()
dt.plot(x="rssi", y=hours, ax = ax)
# 地点名をローカルモジュールからゲット
ax.set_title(getPointById("ff",point)["短縮名"] + ", " + day)
ax.legend(title="時")
fontsize = 14
plt.rcParams.update({'font.size': fontsize})
fig = plt.figure( figsize=(20,5))
fig.suptitle("時間ごとのパケット数分布")
day = "20191110"
hours = ["10","11", "12", "14","15", "16", "17", "20", "22"]
# 一地点、特定日の時間ごとの強度分布
point = "ff01"
ax = fig.add_subplot(1,3,1)
plot_rssi_dist_hours(day, point, ax, hours)
ax = fig.add_subplot(1,3,2)
point = "ff03"
plot_rssi_dist_hours(day, point, ax, hours)
ax = fig.add_subplot(1,3,3)
point = "ff08"
plot_rssi_dist_hours(day, point, ax, hours)
fig = plt.figure( figsize=(20,5))
fig.suptitle("時間ごとのパケット数分布")
day = "20191103"
hours = ["10","12", "14","15", "16", "17", "20", "22"]
points = ["ff01", "ff03", "ff08"]
for i in range(3):
ax = fig.add_subplot(1,3,i+1)
plot_rssi_dist_hours(day, points[i], ax, hours)
わんぱくドームの比較的多い日のrssi分布
fig = plt.figure( figsize=(20,5))
fig.suptitle("時間ごとのパケット数分布")
days = ["20190915", "20190922", "20190929"]
point ="ff01"
hours = ["10","11", "12", "14","15", "16", "17"]
for i in range(3):
ax = fig.add_subplot(1,3,i+1)
plot_rssi_dist_hours(days[i], point, ax, hours)
fig = plt.figure( figsize=(20,5))
fig.suptitle("時間ごとのパケット数分布")
day = "20191110"
hours = ["10","12", "14","15", "16", "17", "20", "22"]
points = ["ff02", "ff05", "ff06"]
for i in range(3):
ax = fig.add_subplot(1,3,i+1)
plot_rssi_dist_hours(day, points[i], ax, hours)
# 目視データ
visual_data = pd.read_csv("/home/toyoki/work/fuefuki/ff_surveydata_inside.csv")
visual_data.rename(columns={"Unnamed: 0": "day_original"}, inplace=True)
# 日付の書式を合わせる
visual_data["day_str"] = pd.to_datetime(visual_data['day_original']).dt.strftime("%Y%m%d")
visual_data["day"] = visual_data['day_str'].astype(int)
visual_data.query('20191101 <= day <= 20191130')
ff01のrssiデータ読み込みと時間ごとのrssi平均値の算出
import numpy as np
def rssi_mean(point, day, hours=[11,14]):
filename = "/home/raspimngr/csv/fuefuki/points/%s/rssi/rssi_%s_mon_%s.csv" % (point, day, point)
# 平均値の計算
mean_val = {}
try:
temp_df = pd.read_csv(filename, sep=",")
except:
for i in hours:
mean_val[str(i)] = np.nan
return mean_val
for i in hours:
mean_val[point+ "_" + str(i)] = (temp_df['rssi']*temp_df[str(i)]).sum()/temp_df[str(i)].sum()
return mean_val
ff08_rssi = []
for d in visual_data["day_str"]:
tmp = rssi_mean("ff08",d, hours=[11,14])
tmp["day_str"] = str(d)
ff08_rssi.append(tmp)
tmp_data = pd.DataFrame(ff08_rssi)
rssi_data = pd.merge(visual_data, tmp_data, on="day_str")
rssi_data
rssi_data.plot(kind="scatter", x=["くだもの広場", "くだもの広場.1"], y=["ff08_11", "ff08_14"])
rssi_data.query('ff08_11 > -80 or ff08_14 > -80')
fig = plt.figure( figsize=(20,18))
fig.suptitle("時間ごとのパケット数分布")
days = ["20190915", "20191025", "20191029"]
points =["ff01", "ff03", "ff06"]
hours = ["10","11", "12", "14","15", "16", "17"]
num_axes = 1
for p in range(len(points)):
point = points[p]
for i in range(3):
ax = fig.add_subplot(len(points), len(days),num_axes)
plot_rssi_dist_hours(days[i], point, ax, hours)
num_axes +=1
わんぱくドームは、混み方によらずダブルピークになっている。
アクア(ff05)、噴水広場(ff06)など
# 目視データ
visual_data = pd.read_csv("/home/toyoki/work/fuefuki/ff_surveydata_outside.csv")
visual_data.rename(columns={"Unnamed: 0": "day_original"}, inplace=True)
# 日付の書式を合わせる
visual_data["day_str"] = pd.to_datetime(visual_data['day_original']).dt.strftime("%Y%m%d")
visual_data["day"] = visual_data['day_str'].astype(int)
visual_data.query('アクアアスレチック > 200')
ff05(アクア)のWi-Fiデータと比較
ff05_rssi = []
for d in visual_data["day_str"]:
tmp = rssi_mean("ff05",d, hours=[11,14])
tmp["day_str"] = str(d)
ff05_rssi.append(tmp)
tmp_data = pd.DataFrame(ff05_rssi)
rssi_data = pd.merge(visual_data, tmp_data, on="day_str")
rssi_data.query('アクアアスレチック > 100').plot(
kind="scatter", x=["アクアアスレチック", "アクアアスレチック.1"], y=["ff05_11", "ff05_14"])
センサから離れているため?、rssi平均値はほとんど変化がない
センサ名
import pandas as pd
pd.read_csv("/var/www/html/ff/sensor_points.csv")
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter # 時間軸のフォーマットを自在に
import numpy as np
import os
file_str = "/home/raspimngr/csv/%s/points/%s/daily/%s_%s.csv"
def get_segmented_rssi(day, point, area="fuefuki", dev_ext="mon"):
# データ読み込み
#day = "20191005"
#point = "TTRI17"
cut_range = list(range(-100, -30,5))
labels = list(range(-100, -35, 5))
filename = file_str % (area, point, day, dev_ext)
point_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
toyota_cut = pd.cut(point_df.rssi, cut_range, labels=labels).value_counts()
tmp_df = pd.DataFrame(toyota_cut).reset_index()
freq_df = tmp_df.sort_values("index")
freq_df = freq_df.rename(columns={"index":"rssi", "rssi":day})
freq_df["rate"+day] = freq_df[day]/freq_df[day].sum()
freq_df =freq_df.astype({"rssi": int})
return freq_df
def get_hourly_segmented_rssi(day, point, area="fuefuki", dev_ext="mon"):
# 1時間ごとのrssi強度分布を得る
from datetime import datetime as dt
cut_range = list(range(-100, -30,5))
labels = list(range(-100, -35, 5))
filename = file_str % (area, point, day, dev_ext)
point_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
# 1時間ごとに分割
point_df["datetime"] = point_df.unix_time.apply(lambda x: dt.fromtimestamp(x))
point_df["hour"] = point_df['datetime'].dt.hour
freq_df = pd.DataFrame({"rssi": labels})
for i in range(24):
temp_df = point_df[point_df['hour']==i]
temp_df = pd.DataFrame(pd.cut(temp_df.rssi, cut_range, labels=labels)
.value_counts()).reset_index().sort_values("index")
temp_df = temp_df.rename(columns={"index": "rssi", "rssi": str(i)})
freq_df = pd.merge(freq_df, temp_df, on="rssi")
freq_df =freq_df.astype({"rssi": int})
return freq_df
def get_rssi_ave_med(day, point, area="fuefuki", dev_ext="mon"):
# 日、地点指定でrssiの平均値、中央値を計算
filename = file_str % (area, point, day, dev_ext)
if not os.path.isfile(filename):
return {"average": np.nan, "median": np.nan}
point_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
return {"average": point_df["rssi"].mean(), "median": point_df["rssi"].median()}
def get_hourly_rssi_ave_med(day, point, area="fuefuki", glbit=1, dev_ext="mon"):
from datetime import datetime as dt
# 指定日の時間ごと地点ごとのrssiの平均値、中央値を計算
# glbit=1ならランダムアドレスのみ、それ以外はすべて
filename = file_str % (area, point, day, dev_ext)
if not os.path.isfile(filename):
return {"average": np.nan, "median": np.nan}
point_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
if glbit==1:
point_df = point_df[point_df["glbit"]==1]
# 1時間ごとに分割
point_df["datetime"] = point_df.unix_time.apply(lambda x: dt.fromtimestamp(x))
point_df["hour"] = point_df['datetime'].dt.hour
rssi_mean = pd.DataFrame(point_df.groupby("hour").mean())
rssi_mean["size"] = point_df.groupby("hour").size()
#result = pd.merge(rssi_mean, total_num, on="hour")
return rssi_mean[{"rssi", "size"}]
# return {"average": point_df["rssi"].mean(), "median": point_df["rssi"].median()}