介绍

iPhone 的 health APP 存储着我们的私人健康数据, 这里有一篇帖子是用 Python 分析 health APP 的数据Apple Health Data How to Export Analyze Visualize Guide - ryanpraski.com , 而我更喜欢 R 的版本.

让我们赶紧开始吧!!

首先获取数据并读取

  1. 从你的 health APP 应用中导出数据
  2. 在 R 中读取数据

加载包并读入数据

library(XML)
library(tidyverse)
library(lubridate)
library(scales)
library(here)
library(ggthemes)
xml <- xmlParse(here("data/apple_health_export/export.xml"))
summary(xml)
## $nameCounts
## 
##     Record ExportDate HealthData         Me    Workout 
##      90037          1          1          1          1 
## 
## $numNodes
## [1] 90041

Record 是我的主要数据, 有 90,037 条

df_record <-   XML:::xmlAttrsToDataFrame(xml["//Record"])

Record 数据查看

library(kableExtra)
head(df_record) %>%
  kable("html") %>%
  kable_styling("hover", full_width = F)
type sourceName sourceVersion unit creationDate startDate endDate value device
HKQuantityTypeIdentifierDietaryWater WaterMinder 3.1.17 mL 2017-02-02 18:27:52 +0800 2017-02-02 18:27:52 +0800 2017-02-02 18:27:52 +0800 250 NA
HKQuantityTypeIdentifierBodyMassIndex 小米运动 201712191618 count 2017-12-24 19:07:08 +0800 2017-12-24 19:06:47 +0800 2017-12-24 19:06:47 +0800 22 NA
HKQuantityTypeIdentifierBodyMassIndex 小米运动 201712191618 count 2017-12-24 19:08:59 +0800 2017-12-24 19:08:54 +0800 2017-12-24 19:08:54 +0800 22 NA
HKQuantityTypeIdentifierBodyMassIndex 小米运动 201712191618 count 2017-12-28 21:44:31 +0800 2017-12-27 18:44:12 +0800 2017-12-27 18:44:12 +0800 22.6049 NA
HKQuantityTypeIdentifierBodyMassIndex 小米运动 201712191618 count 2017-12-30 10:13:09 +0800 2017-12-30 10:12:46 +0800 2017-12-30 10:12:46 +0800 22 NA
HKQuantityTypeIdentifierBodyMassIndex 小米运动 201712191618 count 2017-12-30 10:13:19 +0800 2017-12-30 10:13:14 +0800 2017-12-30 10:13:14 +0800 22 NA

数据清洗

注: 查看自及在哪个时区 “base::Sys.timezone()”

 df <- df_record %>%
  mutate(device = gsub(".*(name:)|,.*", "",device),
         value = as.numeric(as.character(value)),
         endDate = ymd_hms(endDate,tz="Asia/Shanghai"),
         date = date(endDate),
         year = year(endDate),
         month = month(endDate),
         day = day(endDate),
         yday = yday(endDate),
         wday = wday(endDate),
         hour = hour(endDate),
         minute = minute(endDate),
         type = str_remove(type, "HKQuantityTypeIdentifier")
         )

都有些什么数据呢?

df %>% select(type) %>% distinct()
##                     type
## 1           DietaryWater
## 2          BodyMassIndex
## 3                 Height
## 4               BodyMass
## 5      BodyFatPercentage
## 6           LeanBodyMass
## 7              StepCount
## 8 DistanceWalkingRunning
## 9     ActiveEnergyBurned

先看看体重变化吧

p_weight <- 
df %>%
  arrange(endDate) %>% 
  filter(type == 'BodyMass') %>% 
  # Had to reduce sourceName to these 2 sources to avoid double-counting
  # by other apps that use BodyMass and then store it back into Health
  #filter(sourceName %in% c("健康", "小米运动")) %>% 
  
  ggplot(aes(x= date, y = value)) +
    geom_point(alpha = 0.3) +
    geom_smooth(span = 0.2, col = "grey30", se = FALSE) +
    theme_minimal()+ scale_color_tableau() + scale_fill_tableau() +
    labs(title = "Apple Health Weight Chart Sample",
         caption = "Zero Student") +
    theme(axis.text.x = element_text(angle = 0, hjust = 1), 
        legend.position = "bottom",
        plot.caption=element_text(size=12,family = "Arial",face = "bold",
                                  hjust=0, margin=margin(t=15)))
plotly::ggplotly(p_weight)

额, 这个数据点好像有点少, 在 18 年 10 月之后就没用过小米体重计了

看看步数

p_stepCount <- 
df %>%
  filter(type == 'StepCount') %>% 
  group_by(date,wday,hour) %>% 
  summarize(steps=sum(value)) %>% 
  group_by(hour,wday) %>% 
  summarize(steps=sum(steps)) %>% 
  arrange(desc(steps)) %>%
  ggplot(aes(x=hour, y=wday,  fill=steps)) + 
    geom_tile(col = 'grey40') + 
    scale_fill_continuous(labels = scales::comma, low = 'grey95', high = '#008FD5') +
    theme(panel.grid.major = element_blank()) +
    scale_x_continuous(
      breaks = c(0, 6, 12, 18),
      label = c("Midnight", "6 AM", "Midday", "6 PM")
    ) +
    scale_y_reverse(
      breaks = c(1,2,3,4,5,6,7),
      label = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
    ) +
    theme_minimal()+ 
    theme(axis.text.x = element_text(angle = 0, hjust = 1), 
        legend.position = "bottom",
        plot.caption=element_text(size=12,family = "Arial",face = "bold",
                                  hjust=0, margin=margin(t=15))) +
    labs(title = "Weekly Step Count Heatmap",
         caption = 'Zero Student') +
    guides(fill=FALSE)+
    coord_equal()
plotly::ggplotly(p_stepCount)

😱 😱 😱 惊呆了我的小伙伴, 这样下去可怎么办?