日にち範囲指定をできるようにした改良版
地点、日時によって、両者の割合がどのように異なるか、相関するかをチェックする。
付録: getPointName(area) ... area = kofu, hakushu or fuefuki (デバイス名と地点名のリストを返す)
import matplotlib
# matplotlib.use('Agg') # バッチ処理用(ディスプレイに表示しない)
import matplotlib.pyplot as plt
from datetime import datetime as dt
import numpy as np
import csv
import glob
import re
import os
import sys
plt.rcParams['font.family'] = 'IPAPGothic' # 全体のフォントを設定
class point_daily:
def __init__(self, area, point, sdate, edate, width=10): #
"""
年月日の範囲を(sdate,edate)で指定
width: 時間間隔(分)
"""
self.area = area
self.point = point
self.sdate = sdate
self.edate = edate
self.width = width
self.permanent_term = [] # 常設端末のアドレスリスト (rec_aps()で作成)
def readData(self, ext=""):
"""
daily csvを読み込んで、パケット量をグローバル・ランダムごとに比較する
width: 時間刻み (分)
"""
self.num_dev_global = np.zeros(int(np.ceil(24 * 60 / self.width))) # width(分)単位のボックス
self.num_dev_random = np.zeros(int(np.ceil(24 * 60 / self.width)))
dir_name = ("/home/raspimngr/csv/" + self.area + "/points/" + self.point +
"/daily/")
# ファイル一覧を取得
if ext == "":
ext_str = "[0-9]"
else:
ext_str = "[0-9]_" + ext
files = glob.glob(dir_name + "*" + ext_str + ".csv")
files.sort()
for file in files:
dateStr = re.split('[_.]', file.split('/')[-1])[0]
if dateStr < self.sdate or dateStr > self.edate:
continue
# print(file)
if not os.path.exists(file):
print("cannot open file " + file)
continue
with open(file, 'r') as f:
reader = csv.reader(f)
header = next(reader)
for line in reader:
# 時刻 (width(分)単位)
rec_t = dt.fromtimestamp(float(line[0]))
n_box = int((int(rec_t.strftime("%H")) * 60 +
int(rec_t.strftime("%M"))) / self.width)
if int(line[5]) == 0:
self.num_dev_global[n_box] += 1
else:
self.num_dev_random[n_box] += 1
self.global_ratio = (self.num_dev_global /
(self.num_dev_global + self.num_dev_random))
def readHourlyDwellNum(self, ext="", yearMonthDay=""):
"""
hourly_count内のファイル名の命名がdailyのと異なっていることに対応する必要がある
1日のデータを分析、yearMonthDayに年月日を入れる
"""
if yearMonthDay != "":
self.yearMonthDay = yearMonthDay
if not "yearMonthDay" in locals():
print("引数にyearMonthDayを指定してください。")
sys.exit()
if ext != "":
tmp = self.yearMonthDay.split("_")
dayStr = tmp[0] + "_" + ext
else:
dayStr = self.yearMonthDay
filename = (
"/home/raspimngr/csv/" + self.area + "/points/" + self.point +
"/hourly_count/hourly_" + dayStr + "_" + self.point + ".csv")
if not os.path.exists(filename):
print("cannot open file " + filename)
return 0
self.hourlyCount = np.zeros((24, 2))
# print(filename)
with open(filename, 'r') as f:
reader = csv.reader(f)
for line in reader:
self.hourlyCount[int(line[0])] = np.array((int(line[1]),
int(line[2])))
def showAddrRatio(self):
idx = [
'%02d:%02d' % (int(d / 60), int(d % 60))
for d in range(0, 24 * 60, self.width)
]
for i in range(len(idx)):
print(idx[i], self.global_ratio[i])
def plot_gl_numbers(self, scale="fixed"):
# 描画
x = np.arange(len(self.num_dev_global))
fig = plt.figure(figsize=(12, 4))
ax1 = fig.add_subplot(121)
ax1.set_xlabel('*' + str(self.width) + " min.", fontsize=18)
ax1.set_title(getPointName(self.area)[self.point]
+ " (" + self.sdate + "-" + self.edate + ")")
ax1.plot(x, self.num_dev_global)
ax1.plot(x, self.num_dev_random)
ax1.legend(('global', 'random'))
ax2 = fig.add_subplot(122)
ax2.set_xlabel('*' + str(self.width) + " min.", fontsize=18)
ax2.plot(x, self.global_ratio)
ax2.legend(('global rate'))
if scale=='fixed':
ax2.set_ylim(0.0,1.0)
plt.show()
def rec_aps(self, dwell_time_max=12, ext=""):
'''
dwell_timeファイルから長時間観測アドレスを抽出
アクセスポイントなどの常設端末などの検出(長時間滞在者も含むと思われる)
dwellt_time_max: (hour)
'''
delta_t = int(dwell_time_max) * 3600 # 滞在時間(秒)
dir_name = ("/home/raspimngr/csv/" + self.area + "/points/" + self.point +
"/dwell_time/")
# ファイル一覧を取得
if ext == "":
ext_str = "[0-9]" + "_" + self.point
else:
ext_str = "[0-9]_" + ext + "_" + self.point
print(ext_str)
files = glob.glob(dir_name + "*" + ext_str + ".csv")
files.sort()
temp_list = []
self.total_number = 0
for file in files:
dateStr = re.split('[_.]', file.split('/')[-1])[1]
#print(dateStr)
if dateStr < self.sdate or dateStr > self.edate:
continue
if not os.path.exists(file):
print("cannot open file " + file)
continue
with open(file, 'r') as f:
#print(file)
reader = csv.reader(f)
for line in reader:
self.total_number += 1
if (line[5]=="0" and (float(line[3]) - float(line[2]) > delta_t
or int(line[1]) > 10)):
temp_list.append(line[0])
self.permanent_term = sorted(set(temp_list))
def getPointName(area):
filename = {"kofu": "/var/www/html/kofu/kofu_position.csv",
"fuefuki": "/var/www/html/ff/sensor_points.csv",
"hakushu": "/home/toyoki/public_html/hakushu/points_hakushu.csv"}
pointName = {}
with open(filename[area],"r") as f:
reader = csv.reader(f)
header = next(reader)
for line in reader:
if len(line) < 3:
continue
pointName[line[0]] = line[3]
return pointName
kofu_dir = "/home/raspimngr/csv/kofu/points/"
hakushu_dir = "/home/raspimngr/csv/hakushu/points/"
fuefuki_dir = "/home/raspimngr/csv/fuefuki/points/"
# 長時間観測された端末のアドレス抽出を行ってみる
d = point_daily("kofu", "kofu22", "20181001","20181007",width=60)
d.rec_aps(2, ext="all")
print(d.permanent_term)
print(str(len(d.permanent_term)) + " of " + str(d.total_number))
# dwell_dataを計算する前の生のデータ(daily)のパケット数比率
d = point_daily("fuefuki","ff08","20190810", "20190815", width=60)
d.readData("mon")
d.plot_gl_numbers()
#print(d.global_ratio)
d = point_daily("fuefuki","ff07","20190812", "20190820", width=60)
d.readData("mon")
d.plot_gl_numbers()
d = point_daily("kofu", "kofu22", "20190801","20190810",width=60)
d.readData()
d.plot_gl_numbers()
d = point_daily("kofu", "kofu22", "20190801","20190810",width=60)
d.readData()
d.plot_gl_numbers()
# 22 = 奥藤
d= point_daily("kofu", "kofu12", "20181101","20181110", width=60)
d.readData()
d.plot_gl_numbers()
d = point_daily("kofu", "kofu9", "20190801", "20190810",60)
d.readData()
d.plot_gl_numbers()
d = point_daily("kofu", "kofu13", "20190801","20190810",60)
d.readData()
d.plot_gl_numbers()
d = point_daily("kofu", "kofu27", "20190801", "20190810",60)
d.readData("mon")
d.plot_gl_numbers()
北口コンコースは20%~30%であり、笛吹の地点に比べて比率は高い。
夜間は少なくなるので常時稼働している家庭やオフィスのルータ等とは考えにくい。
車からのパケットの可能性がある。
南口は固定アドレスが24時間を通じてほぼ一定値である(固定局がある?)。⇒通行者はランダムのみの方がよく相関するだろう。
d = point_daily("kofu", "kofu25", "20190810","20190820",5)
d.readData("mon")
d.plot_gl_numbers()
d = point_daily("kofu", "kofu26", "20190810","20190820",5)
d.readData("mon")
d.plot_gl_numbers()
d = point_daily("kofu", "kofu16", "20181201","20181220",60)
d.readData()
d.plot_gl_numbers()
クラフトラボの振る舞いは特異だ。(???)
固定が多い時間帯は高齢者が優勢、ランダムは若者によるもの?
# dwell_timeから算出した時間ごとのアドレス数(hourly_count)のプロット
area = "fuefuki"
point = "ff08"
day = "20190714"
dev_str = "all"
a = point_daily(area, point, day, day, width=60)
# データの読み込み
a.readHourlyDwellNum(dev_str, day) # 2019年度以降に設置した箇所(kofu以外)はallが望ましい
# アドレス数の時間変化
fig = plt.figure(figsize=(14,4))
ax1 = fig.add_subplot(131)
ax1.plot(a.hourlyCount[:,0])
ax2 = fig.add_subplot(132)
ax2.plot(a.hourlyCount[:,1])
ax1.set_title("all addresses")
ax2.set_title("global addresses")
ax1.set_xlabel("hour")
ax1.set_ylabel("number of adresses")
ax2.set_xlabel("hour")
ax2.set_ylabel("number of adresses")
# global/localアドレス数の相関
ax3 = fig.add_subplot(133)
ax3.scatter(a.hourlyCount[:,0], a.hourlyCount[:,1])
ax3.set_title(area + " " + getPointName(area)[point] + " " + day)
ax3.set_xlabel("local")
ax3.set_ylabel("global")
plt.show()
pointList = ["ff01", "ff02", "ff03", "ff04", "ff05", "ff06", "ff08"]
dayList = ["20190713", "20190714","20190715"]
# 時間範囲指定 [0:24]
sHour = 8
eHour = 18
dataList = {}
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
for p in pointList:
dataList[p] = {}
for d in dayList:
dataList[p][d] = point_daily("fuefuki",p,d,d, width=60)
dataList[p][d].readHourlyDwellNum("all", yearMonthDay=d)
ax.scatter(dataList[p][d].hourlyCount[sHour:eHour, 0], dataList[p][d].hourlyCount[sHour:eHour, 1],
label=getPointName("fuefuki")[p] + " " + d)
ax.set_xlabel("local")
ax.set_ylabel("global")
ax.set_title("笛吹 " + str(sHour) + "時~" + str(eHour) + "時")
ax.legend( bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
pointList = ["kofu2", "kofu3", "kofu4", "kofu8", "kofu9", "kofu12", "kofu22", "kofu23"]
#pointList = ["kofu4", "kofu21", "kofu5"]
#pointList = ["kofu9", "kofu22", "kofu17","kofu12" ]
dayList = ["20190713","20190707"]
dataList = {}
fig = plt.figure(figsize=(14,6))
ax = fig.add_subplot(131)
ax1 = fig.add_subplot(133)
for p in pointList:
dataList[p] = {}
for d in dayList:
dataList[p][d] = point_daily("kofu",p, d, d, width=60)
dataList[p][d].readHourlyDwellNum("", d)
ax.scatter(dataList[p][d].hourlyCount[:,0], dataList[p][d].hourlyCount[:,1],
label=getPointName("kofu")[p] + " " + d)
ax1.scatter(dataList[p][d].hourlyCount[8:18,0], dataList[p][d].hourlyCount[8:18,1],
label=getPointName("kofu")[p] + " " + d) # 1列目の範囲指定により昼間のみ限定可能
ax.set_xlabel("number of local addresses")
ax.set_ylabel("number of global addresses")
ax1.set_xlabel("number of local addresses")
ax1.set_ylabel("number of global addresses")
ax1.set_title("8時から18時")
ax.legend( bbox_to_anchor=(1.05, 1), loc='upper left')
# ax.legend()
plt.show()
getPointName("kofu")
#getPointName("kofu")['kofu3']
# 連続処理用 ディレクトリ・ファイル一覧 (必要なくなった)
# area, 地点ID
import glob
import os
def getFileList(area, dev_str=""):
# ディレクトリを検索しファイル一覧を取得する
# dev_strはファイル名に含まれるデバイス名
root_dir = "/home/raspimngr/csv/" + area + "/points/"
pointDirList = []
fileList = []
for point in os.listdir(root_dir):
if os.path.isdir(root_dir + point):
pointDirList.append(root_dir + point)
for f in glob.glob(root_dir + point + "/hourly_count/*[0-9]"
+ dev_str + "_" + point +".csv" ):
fileList.append(f)
return pointDirList, fileList
# テスト
pointList, fileList = getFileList("kofu")
# fileListから日にちと地点名を切り出す例
# dummy, day, pt = fileList[0].split("/")[-1].split("_")
# pt = pt.split(".")[0]
# print(pt)
# print(day)
def get_daily_count(area, point, day, dev_str=""):
filename = ("/home/raspimngr/csv/" + area + "/points/"
+ point + "hourly_count/" + day + dev_str
+ "_" + point + ".csv")
with open(filename, 'r') as f:
reader = csv.reader(f)
header = next(reader)
# データの読み込みと回帰係数の計算
import csv
for file in fiLeList:
dummy, day, pt = fileList[0].split("/")[-1].split("_")
point = pt.split(".")[0]
with open(filename, 'r') as f:
reader = csv.reader(f)
header = next(reader)
for line in reader:
# 時刻 (width(分)単位)
rec_t = dt.fromtimestamp(float(line[0]))
n_box = int((int(rec_t.strftime("%H"))*60 +
int(rec_t.strftime("%M")))/width)
if int(line[5]) == 0:
self.num_dev_global[n_box] += 1
else:
self.num_dev_random[n_box] += 1
self.global_ratio = self.num_dev_global/(self.num_dev_global + self.num_dev_random)