跳过正文

serde

··7077 字
Rust Rust-Crate
目录
rust crate - 这篇文章属于一个选集。
§ 2: 本文

serde crate 包含两层:

  1. data structures : 定义了 Serialize 和 Deserialize trait 类型;
  2. data format : data structures 保存格式;

data format 在各种单独的 serde_XX crate 中提供, 如 serde_json/serde_yaml 等, 它们实现了 data structures 的 Serialize/Deserialize trait

serde 为 Rust 内置 29 种类型都提供了 data structure 实现, 所以一般只需要为自定义类型使用 #[derive] 宏来自动生成 Serialize/Deserialize 的实现:

Cargo.toml(需要开启 derive feature):

[package]
name = "my-crate"
version = "0.1.0"
authors = ["Me <[email protected]>"]

[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"

src/main.rs:

use serde::{Serialize, Deserialize};

#[derive(Serialize, Deserialize, Debug)]
struct Point {
    x: i32,
    y: i32,
}

fn main() {
    let point = Point { x: 1, y: 2 };

    let serialized = serde_json::to_string(&point).unwrap();

    // Prints serialized = {"x":1,"y":2}
    println!("serialized = {}", serialized);

    let deserialized: Point = serde_json::from_str(&serialized).unwrap();

    // Prints deserialized = Point { x: 1, y: 2 }
    println!("deserialized = {:?}", deserialized);
}

serde 默认按照 Field Name 的原样命名情况来序列化和反序列化, 通过给 struct、enum 或 filed 或 variant 添加 serde 的 rename/rename_all/rename_all_fields 等属性,可以来改变字段序列化和反序列化后的字段名称。

#[derive(Serialize, Deserialize, Debug)]
enum Gentle {
    Male, // 序列化为 Male 字符串
    Female,
}

#[derive(Serialize, Deserialize, Debug)]
struct Person {
    name: String,
    gentle: Gentle,
    info: (i32, String),
}

fn main() {
    let person = Person {
      name: "zhang".to_string(),
      gentle: Gentle::Male,
      info: (1, "test".to_string()),
    };

    let ps = serde_json::to_string(&person).unwrap();
    // ps: {"name":"zhang","gentle":"Male","info":[1,"test"]}
    println!("ps: {}", ps);
}

#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "lowercase")]
enum Gentle {
    Male, // 序列化为 male 字符串
    Female,
}

// ps: {"name":"zhang","gentle":"male","info":[1,"test"]}

struct:序列化为 JSON Map;

enum:取决于 variant 类型:

  1. 不带值的 variant:序列化为字符串,字符串值为 variant name;
  2. 单个元素的 tuple variant(ontype 类型):序列化为 Map,key 为 variant name, value 为单个值;
  3. 多个元素 tuple variant: 序列化为 Map,key 为 variant name,value 为列表;
  4. struct variat:序列化为 Map,key 为 variant name,value 为 Map;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "lowercase")]
enum Gentle {
    Male, // 序列化为字符串,male
    Female,
    Other(i32, String), // 序列化为 Map,值为 JSON 列表:{"other": [1, "unnown"]}
    OneType(i32)// 序列化为 Map:{"onetype": 12}
    Complex { Id: i32, name: String }, // 序列化为 Map,Value 还是 Map:{"complex":{"Id":1,"name":"test"}}
}

#[derive(Serialize, Deserialize, Debug)]
struct Person {
    name: String,
    gentle: Gentle, // 根据使用的 Gentle 类型可能是 字符串 或 Map
    info: (i32, String),
}

fn test_serde() {
    let person = Person {
        name: "zhang".to_string(),
        gentle: Gentle::Other(1, "unknown".to_string()),
        info: (1, "test".to_string()),
    };

    let ps = serde_json::to_string(&person).unwrap();
    // ps: {"name":"zhang","gentle":{"other":[1,"unknown"]},"info":[1,"test"]}
    println!("ps: {}", ps);
}

struct W {
    a: i32,
    b: i32,
}
let w = W { a: 0, b: 0 }; // Represented as `{"a":0,"b":0}`

struct X(i32, i32);
let x = X(0, 0); // Represented as `[0,0]`

struct Y(i32);
let y = Y(0); // Represented as just the inner value `0`

struct Z;
let z = Z; // Represented as `null`

enum E {
    W { a: i32, b: i32 },
    X(i32, i32),
    Y(i32),
    Z,
}
let w = E::W { a: 0, b: 0 }; // Represented as `{"W":{"a":0,"b":0}}`
let x = E::X(0, 0);          // Represented as `{"X":[0,0]}`
let y = E::Y(0);             // Represented as `{"Y":0}`
let z = E::Z;                // Represented as `"Z"`

使用三类属性宏对 Serialize/Deserialize 进行更灵活的配置:

  • Container attributes — 对 struct/enum 类型整体有效;
  • Variant attributes — 只对 enum variant 有效;
  • Field attributes — 只对 struct filed 有效;
#[derive(Serialize, Deserialize)]
#[serde(deny_unknown_fields)]  // <-- this is a container attribute
struct S {
    #[serde(default)]  // <-- this is a field attribute
    f: i32,
}

#[derive(Serialize, Deserialize)]
#[serde(rename = "e")]  // <-- this is also a container attribute
enum E {
    #[serde(rename = "a")]  // <-- this is a variant attribute
    A(String),
}

1 Container attributes
#

对 struct/enum 类型整体进行重命名(而非各字段):

  • #[serde(rename = “name”)]
  • #[serde(rename(serialize = “ser_name”))]
  • #[serde(rename(deserialize = “de_name”))]
  • #[serde(rename(serialize = “ser_name”, deserialize = “de_name”))]

rename_all: 对 struct/enum 各字段成员按统一风格进行重命名:

  • #[serde(rename_all = “…”)]
  • #[serde(rename_all(serialize = “…”))]
  • #[serde(rename_all(deserialize = “…”))]
  • #[serde(rename_all(serialize = “…”, deserialize = “…”))]

rename_all_fields:只对 enum 的 struct variant 类型的各字段重命名。

  • #[serde(rename_all_fields = “…”)]
  • #[serde(rename_all_fields(serialize = “…”))]
  • #[serde(rename_all_fields(deserialize = “…”))]
  • #[serde(rename_all_fields(serialize = “…”, deserialize = “…”))]

统一风格类型: “lowercase”, “UPPERCASE”, “PascalCase”,“camelCase”, “snake_case”, “SCREAMING_SNAKE_CASE”,“kebab-case”, “SCREAMING-KEBAB-CASE”.

fn test_serde_rename() {
    use serde::{Deserialize, Serialize};
    use serde_json;

    // rename_all 将 enum 的各 variant 字段名称按照指定风格重写,但是 struct variant 的各 field 字段名称不变。
    #[derive(Serialize, Deserialize, Debug)]
    #[serde(rename_all = "UPPERCASE")]
    enum Message {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
        Other(u8),
    }
    println!(
        "enum Message rename_all: struct {}, simple: {}, tuple: {}",
        serde_json::to_string(&Message::Request {
            id: "abc".into(),
            method: "get".into()
        }).unwrap(),
        serde_json::to_string(&Message::Unknown).unwrap(),
        serde_json::to_string(&Message::Other(8)).unwrap()
    );
    // enum Message rename: struct {"REQUEST":{"id":"abc","method":"get"}}, simple: "UNKNOWN", tuple: {"OTHER":8}

    // rename_all_fields 只适用于 enum,而且只是对 struct variant 的各 field key 重命名。
    #[derive(Serialize, Deserialize, Debug)]
    #[serde(rename_all_fields = "UPPERCASE"]
    enum MessageV2 {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
        Other(u8),
    }
    println!(
        "enum MessageV2 rename_all_fields: struct {}, simple: {}, tuple: {}",
        serde_json::to_string(&MessageV2::Request {
            id: "abc".into(),
            method: "get".into()
        }).unwrap(),
        serde_json::to_string(&MessageV2::Unknown).unwrap(),
        serde_json::to_string(&MessageV2::Other(8)).unwrap()
    );
    // enum MessageV2 rename_all_fields: struct {"Request":{"ID":"abc","METHOD":"get"}}, simple: "Unknown", tuple: {"Other":8}

    // 同时使用 rename_all_fields 和 rename_all, 这样各字段都是重命名
    #[derive(Serialize, Deserialize, Debug)]
    #[serde(rename_all_fields = "UPPERCASE", rename_all = "UPPERCASE")]
    enum MessageV3 {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
        Other(u8),
    }
    println!(
        "enum MessageV3 rename_all_fields and rename_all: struct {}, simple: {}, tuple: {}",
        serde_json::to_string(&MessageV3::Request {
            id: "abc".into(),
            method: "get".into()
        })
        .unwrap(),
        serde_json::to_string(&MessageV3::Unknown).unwrap(),
        serde_json::to_string(&MessageV3::Other(8)).unwrap()
    );
    // enum MessageV3 rename_all_fields and rename_all: struct
    // {"REQUEST":{"ID":"abc","METHOD":"get"}}, simple: "UNKNOWN", tuple: {"OTHER":8}

    // rename_all 也适用于 struct,对它的各字段有效。
    #[derive(Serialize, Deserialize, Debug)]
    #[serde(rename_all = "UPPERCASE")]
    struct Student {
        name: String,
        age: u8,
    }
    let s = Student {
        name: "zhang".into(),
        age: 28,
    };
    println!(
        "serde rename_all: struct: {}",
        serde_json::to_string(&s).unwrap()
    );
    // serde rename_all: struct: {"NAME":"zhang","AGE":28}
}

#[serde(deny_unknown_fields)]:反序列化时,如果输入中有 unknown field默认忽略 ,设置该属性后则报错。

注意:序列化时,如果 field 缺失,如没有指定 struct 的所有 field,则默认 =报错=。

序列化 enum 时:对于 struct variant 或 tuple variant(onetype tuple):

  • 默认使用 map,key 为 struct field 或 tuple field 名称,value 为 map 或 onetype 值。

  • tag 属性:将 field name 作为 tag 值打平到结果 map 中,对于 tuple variant 只能使用 onetype 类型。

  • content 属性:只适用于 enum 类型,而且不能有 tuple variant。

  • #[serde(tag = “type”)]

  • #[serde(tag = “t”, content = “c”)]

  • #[serde(untagged)] ,struct 默认是 untagged。

fn test_serde_tag() {
    use serde::{Deserialize, Serialize};
    use serde_json;

    // enum: 不加 tag 时,对于 struct variant 和 tuple variant,Serialize 为 JSON Map,key 为 variant 字段名称。
    // 对于普通的 variant,显示为字段名称字符串。
    //
    #[derive(Serialize, Deserialize, Debug)]
    enum Message {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
        Other(u8),
    }
    println!(
        "enum Message: struct {}, simple: {}, tuple: {}",
        serde_json::to_string(&Message::Request {
            id: "abc".into(),
            method: "get".into()
        })
        .unwrap(),
        serde_json::to_string(&Message::Unknown).unwrap(),
        serde_json::to_string(&Message::Other(8)).unwrap()
    );
    // enum Message: struct {"Request":{"id":"abc","method":"get"}}, simple:
    // "Unknown", tuple: {"Other":8}

    // 如果加了 tag, 则不会引入最外层的 variant 字段名称 key,而是将该字段名称作为 tag 的 value 并打
    // 平到结果中。tag 不支持 tuple variant。
    #[derive(Serialize, Deserialize, Debug)]
    #[serde(tag = "t")]
    enum MessageV2 {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
        // called `Result::unwrap()` on an `Err` value: Error("cannot
        // serialize tagged newtype variant MessageV2::Other containing an
        // integer", line: 0, column: 0)
        //
        // Other(u8),
    }
    println!(
        "enum MessageV2: struct: {}, simple: {}, tuple: {}",
        serde_json::to_string(&MessageV2::Request {
            id: "abc".into(),
            method: "get".into()
        })
        .unwrap(),
        serde_json::to_string(&MessageV2::Unknown).unwrap(),
        "unsupported tuple variant with tag",
        // serde_json::to_string(&MessageV2::Other(8u8)).unwrap(),
    );
    // enum MessageV2: struct: {"t":"Request","id":"abc","method":"get"},
    // simple: {"t":"Unknown"}, tuple: unsupported tuple variant with tag

    // 还可以加 content,这时 struct variant 的各字段将位于 content 的 key 之下。
    // content 只能用于 enum,不能用于 struct。
    #[derive(Serialize, Deserialize, Debug)]
    #[serde(tag = "t", content = "c")]
    enum MessageV3 {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
    }
    println!(
        "enum MessageV3: struct: {}, simple: {}, tuple: {}",
        serde_json::to_string(&MessageV3::Request {
            id: "abc".into(),
            method: "get".into()
        })
        .unwrap(),
        serde_json::to_string(&MessageV3::Unknown).unwrap(),
        "unsupported tuple variant with tag",
        // serde_json::to_string(&MessageV3::Other(8u8)).unwrap(),
    );
    // enum MessageV3: struct: {"t":"Request","c":{"id":"abc","method":"get"}},
    // simple: {"t":"Unknown"}, tuple: unsupported tuple variant with tag

    #[derive(Serialize, Deserialize, Debug)]
    // 对于 struct,默认是 untagged,如果指定 tag,则用于添加一个字段,值为 struct 类型名称。
    //
    // error: #[serde(content = "...")] can only be used on enums
    // #[serde(tag = "t", content = "c")]
    #[serde(tag = "t")]
    struct Student {
        name: String,
        age: u8,
    }
    let s = Student {
        name: "zhang".into(),
        age: 28,
    };
    println!("serde tag: struct: {}", serde_json::to_string(&s).unwrap());
    // serde tag: struct: {"t":"Student","name":"zhang","age":28}
}

bound 用来手动指定实现 Serialize/Deserialize Trait 的类型的限界,而不是由 serde 来进行推导:

  • #[serde(bound = “T: MyTrait”)]
  • #[serde(bound(serialize = “T: MySerTrait”))]
  • #[serde(bound(deserialize = “T: MyDeTrait”))]
  • #[serde(bound(serialize = “T: MySerTrait”, deserialize = “T: MyDeTrait”))]
#[derive(Serialize, Deserialize)]
#[serde(bound = "D: SerializeWith + DeserializeWith")]
struct WithTraits1<D, E> {
    #[serde(
        serialize_with = "SerializeWith::serialize_with",
        deserialize_with = "DeserializeWith::deserialize_with"
    )]
    d: D,
    #[serde(
        serialize_with = "SerializeWith::serialize_with",
        deserialize_with = "DeserializeWith::deserialize_with",
        bound = "E: SerializeWith + DeserializeWith"
    )]
    e: E,
}

在进行反序列化时,如果输入的字段有缺失,默认会失败。但如果使用缺省值或调用函数生成,可以避免字段缺失报错。(序列化时,因为 Rust 要求 struct 所有字段都要初始化,所以字段不可能缺失)。 #[serde(default)] 或 #[serde(default=“path”)]

只对 struct 有效:当 deserializing 时,对于缺失的 fields 使用 struct 实现的 Default trait 来补充。

  • #[serde(default)]

只对 struct 有效:当 deserializing 时,对于缺失的 fields 使用 path 指定的函数或方法来返回,函数签名必须是 fn() -> T ; 如:

  1. default = "my_default" 调用 my_default()
  2. default = "SomeTrait::some_default" 调用 SomeTrait::some_default()
  3. #[serde(default = “path”)]
pub fn test_serde_deserialize() {
    #[derive(Serialize, Deserialize, Debug)]
    enum Message {
        Request { id: String, method: String },
        Response { id: String },
        Unknown,
        Other(u8),
    }

    // 在 Deserialize 时,如果 filed name 大小写不一致,相当于 field 缺失,则报错。
    //
    // let m: Message =
    //     serde_json::from_str(r#"{"request": {"id": "abcd"}}"#).unwrap();
    // println!("serde deserialize: {:?}", m);
    //
    // called `Result::unwrap()` on an `Err` value: Error("unknown variant
    // `request`, expected one of `Request`, `Response`, `Unknown`, `Other`",
    // line: 1, column: 10)

    // 在 Deserialize 时,如果 filed 缺失,则报错。
    //
    // let m: Message =
    //     serde_json::from_str(r#"{"Request": {"id": "abcd"}}"#).unwrap();
    // println!("serde deserialize: {:?}", m);
    //
    // called `Result::unwrap()` on an `Err` value: Error("missing field
    // `method`", line: 1, column: 26)

    // 使用 serde(default) 允许 Deserialize 时 field 缺失的情况。这时用 field 类型的 Default 来补充。
    // serde(default) 只适用于 struct 类型,而且该 struct 整体必须实现 Default trait
    #[derive(Serialize, Deserialize, Debug, Default)]
    #[serde(default)]
    struct MessageV2 {
        request: String,
        response: String,
        unknown: u8,
    }

    // 字符串多了一个 field,默认被忽略。但添加 #[serde(deny_unknown_fields)] 后,在遇到未知 field 时报错。
    let m: MessageV2 = serde_json::from_str(
        r#"{"Request": {"id": "abcd"}, "request": "abcd"}"#,
    )
        .unwrap();
    println!("serde default: {:?}", m);
}

// 另一个例子: https://riptutorial.com/rust/example/8980/default-value-for-field
#[derive(Deserialize, Debug)]
struct Request {
    // 使用 default_resource() 函数返回缺省值
    #[serde(default="default_resource")]
    resource: String,

    // 使用 Timeout 类型的 Default trait 实现来补齐缺省值
    #[serde(default)]
    timeout: Timeout,

    // 使用 Priority 类型的关联方法 lowest() 返回缺省值
    #[serde(default="Priority::lowest")]
    priority: Priority,
}

fn default_resource() -> String {
    "/".to_string()
}

#[derive(Deserialize, Debug)]
struct Timeout(u32);
impl Default for Timeout {
    fn default() -> Self {
        Timeout(30)
    }
}

#[derive(Deserialize, Debug)]
enum Priority { ExtraHigh, High, Normal, Low, ExtraLow }
impl Priority {
    fn lowest() -> Self { Priority::ExtraLow }
}

fn main() {
    let json = r#"
        [
          {
            "resource": "/users"
          },
          {
            "timeout": 5,
            "priority": "High"
          }
        ]
    "#;

    let requests: Vec<Request> = serde_json::from_str(json).unwrap();

    // The first request has resource="/users", timeout=30, priority=ExtraLow
    println!("{:?}", requests[0]);

    // The second request has resource="/", timeout=5, priority=High
    println!("{:?}", requests[1]);
}

#[serde(remote = “…”)] 和 #[serde(with = “…”)] 主要解决为其它 crate 中定义的类型实现 Serialize 和 Deserialize 的问题(由于 Rust 的 Orphan rule 的限制)。

  • 在本 crate 中定义一个类型 AR,可以将其它 crate 中的类型 A 转换为 AR;
  • 使用 remote 来修饰本 crate 的自定义类型 AR,值为其它 crate 中的类型 A。
  • 在本 crate 中定义其它待 ser/de 的类型 S,内部字段使用 AR 类型,但是使用 with 修饰,值为 AR;

后续序列化和反序列化 S 中的 A 类型字段时,均转换为 AR 类型值来进行:

  1. 序列化时,将 A 类型值转换为 AR,然后按 AR 序列化;
  2. 反序列化时,先反序列化为 AR,再转换为 A 类型值;
// https://serde.rs/remote-derive.html

// Pretend that this is somebody else's crate, not a module.
mod other_crate {
    // Neither Serde nor the other crate provides Serialize and Deserialize
    // impls for this struct.
    pub struct Duration {
        pub secs: i64,
        pub nanos: i32,
    }
}

////////////////////////////////////////////////////////////////////////////////

use other_crate::Duration;
use serde::{Serialize, Deserialize};

// Serde calls this the definition of the remote type. It is just a copy of the
// remote data structure. The `remote` attribute gives the path to the actual
// type we intend to derive code for.
#[derive(Serialize, Deserialize)]
#[serde(remote = "Duration")]
struct DurationDef {
    secs: i64,
    nanos: i32,
}

// Provide a conversion to construct the remote type.
impl From<DurationDef> for Duration {
    fn from(def: DurationDef) -> Duration {
        Duration::new(def.secs, def.nanos)
    }
}

// Now the remote type can be used almost like it had its own Serialize and
// Deserialize impls all along. The `with` attribute gives the path to the
// definition for the remote type. Note that the real type of the field is the
// remote type, not the definition type.
#[derive(Serialize, Deserialize)]
struct Process {
    command_line: String,

    #[serde(with = "DurationDef")] // with 类型可以使用 From 转换为 Duration 类型
    wall_time: Duration,
}

#[serde(transparent)]:只能用于只有一个 field 的 struct,在序列化时忽略该 field key,只输出值。

#[derive(Serialize, Deserialize, Debug)]
#[serde(transparent)]
struct MessageV3 {
    f: u8,
};
let m = serde_json::to_string(&MessageV3 { f: 8u8 }).unwrap();
println!("serde transparet: {}", m);
// 不加 #[serde(transparent)]: serde transparet: {"f":8}
//
// 加 #[serde(transparent)]:serde transparet: 8

#[serde(from = “FromType”)] 和 #[serde(try_from = “FromType”)] :反序列化使用,将字符串先反序列化为 FromType 类型值 value(FromType 需要实现 Deserialize),然后再转换为修饰的类型值。修饰的类型值需要实现 From<FromType>.

#[serde(into = “IntoType”)]:序列化时使用,修饰的类型需要实现 Clone 和 Into<IntoType>, 序列化时先将修饰的类型值转换为 IntoType 类型值,然后对它进行序列化(IntoType 需要实现Serialize)。

  • #[serde(crate = “…”)]
  • #[serde(expecting = “…”)]

2 Variant attributes
#

针对 enum 内各 variant 使用。

指定序列化和反序列化时,对该 variant 字段名称(不含 struct variant 的各内部 field 名称)进行重命名:

  • #[serde(rename = “name”)]
  • #[serde(rename(serialize = “ser_name”))]
  • #[serde(rename(deserialize = “de_name”))]
  • #[serde(rename(serialize = “ser_name”, deserialize = “de_name”))]

反序列化时使用,从这个 name 或字段名来解析字段内容。指定多次时表示多个别名。

  • #[serde(alias = “name”)]:

只对 struct variant 有效,用于将 struct 中各 field 按照指定的风格进行重命名:“lowercase”,“UPPERCASE”, “PascalCase”, “camelCase”, “snake_case”,“SCREAMING_SNAKE_CASE”, “kebab-case”,“SCREAMING-KEBAB-CASE”:

  • #[serde(rename_all = “…”)]
  • #[serde(rename_all(serialize = “…”))]
  • #[serde(rename_all(deserialize = “…”))]
  • #[serde(rename_all(serialize = “…”, deserialize = “…”))]

序列化或反序列化时忽略该 variant。

  • #[serde(skip)]
  • #[serde(skip_serializing)]
  • #[serde(skip_deserializing)]

使用指定的函数来对 variant 字段进行序列化和反序列化,而不是字段类型默认实现的 Serialize 或 Deserialize trait:

  • #[serde(serialize_with = “path”)]: path 对应的函数签名(FIELDXX 为 variant 字段类型)

    fn<S>(&FIELD0, &FIELD1, ..., S) -> Result<S::Ok, S::Error> where S: Serializer
    
  • #[serde(deserialize_with = “path”)]: path 对应的函数签名(D 为 variant 字段类型, FIELDS 为字段类型)

    fn<'de, D>(D) -> Result<FIELDS, D::Error> where D: Deserializer<'de>
    
  • #[serde(with = “module”)]: module 需要定义 $module::serialize 和 $module::deserialize 函数, 分别用于序列化和反序列化;

另外 serde-aux 等 crate 也提供了很多 with 函数或 module,如从字符串值反序列化为数值类型等场景。

extern crate serde;
#[macro_use]
extern crate serde_derive;
extern crate serde_json;

use serde::{Deserialize, Deserializer};

#[derive(Debug, Deserialize)]
struct WrappedI32(#[serde(deserialize_with = "callback")] i32);

// callback, as provided by the external library
fn callback<'de, D>(deserializer: D) -> Result<i32, D::Error> where D: Deserializer<'de>,
{
    Ok(i32::deserialize(deserializer)? * 2)
}

// Our final output struct, that we want to get after parsing the JSON
#[derive(Debug, Deserialize)]
struct S {
    #[serde(deserialize_with = "callback_opt")]
    s: Option<i32>,
    #[serde(deserialize_with = "callback_opt")]
    s2: Option<i32>
}

pub fn callback_opt<'de, D>(deserializer: D) -> Result<Option<i32>, D::Error> where D: Deserializer<'de>,
{
    Option::<WrappedI32>::deserialize(deserializer)
        .map(|opt_wrapped: Option<WrappedI32>| {
            opt_wrapped.map(|wrapped: WrappedI32| wrapped.0)
        })
}

fn main() {
    println!("{:?}", serde_json::from_str::<S>(r#"{"s": 42, "s2": null}"#));
}

// 另一个例子
#[macro_use]
extern crate serde_derive;
extern crate serde;
extern crate serde_json;
extern crate chrono;

use chrono::NaiveDate;

// 该 module 下需要定义 serialize() 和 deserialize() 两个函数
mod date_serde {
    use chrono::NaiveDate;
    use serde::{self, Deserialize, Serializer, Deserializer};

    // date 为要序列化的 variant 类型
    pub fn serialize<S>(date: &Option<NaiveDate>, s: S) -> Result<S::Ok, S::Error>
        where S: Serializer {
        if let Some(ref d) = *date {
            return s.serialize_str(&d.format("%Y-%m-%d").to_string())
        }
        s.serialize_none()
    }

    // Result 中的 Option<NaiveDate> 为反序列化生成的 variant 类型
    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<NaiveDate>, D::Error>
        where D: Deserializer<'de> {
            let s: Option<String> = Option::deserialize(deserializer)?;
            if let Some(s) = s {
                return Ok(Some(NaiveDate::parse_from_str(&s, "%Y-%m-%d").map_err(serde::de::Error::custom)?))
            }

            Ok(None)
        }
}

#[derive(Debug, Serialize, Deserialize)]
struct Test {
    pub i: u64,
    #[serde(with = "date_serde")] // data_serde module
    pub date: Option<NaiveDate>,
}

fn main() {
    let mut test: Test = serde_json::from_str(r#"{"i": 3, "date": "2015-02-03"}"#).unwrap();
    assert_eq!(test.i, 3);
    assert_eq!(test.date, Some(NaiveDate::from_ymd(2015, 02, 03)));
    test = serde_json::from_str(r#"{"i": 5}"#).unwrap();
    assert_eq!(test.i, 5);
    assert_eq!(test.date, None);
}

各字段类型需要实现 S: Serializer 和 D: Deserializer,该限界是 serde 自动推导的。但是如果使用自定义的序列化或反序列化函数,则该函数是泛型函数,泛型类型参数为字段的类型,使用 bound 可以为该类型定义新的限界约束。

  • #[serde(bound = “T: MyTrait”)]
  • #[serde(bound(serialize = “T: MySerTrait”))]
  • #[serde(bound(deserialize = “T: MyDeTrait”))]
  • #[serde(bound(serialize = “T: MySerTrait”, deserialize = “T: MyDeTrait”))]
// https://riptutorial.com/rust/example/18224/handwritten-generic-type-bounds
extern crate serde;
extern crate serde_json;
#[macro_use] extern crate serde_derive;

use serde::de::{self, Deserialize, Deserializer};

use std::fmt::Display;
use std::str::FromStr;

#[derive(Deserialize, Debug)]
struct Outer<'a, S, T: 'a + ?Sized> {
    // When deriving the Deserialize impl, Serde would want to generate a bound `S: Deserialize`
    // on the type of this field. But we are going to use the type's `FromStr` impl instead of its
    // `Deserialize` impl by going through `deserialize_from_str`, so we override the
    // automatically generated bound by the one required for `deserialize_from_str`.

    // deserialize_from_str() 是泛型反序列化函数,其中一个泛型类型是 S,
    // 这里指定 S 的限界。
    #[serde(deserialize_with = "deserialize_from_str")]
    #[serde(bound(deserialize = "S: FromStr, S::Err: Display"))]
    // 对 S 类型按照 bound 的参数进行限界。
    //(如果未指定 bound,S 默认限界为 Deserialize)
    s: S,

    // Here Serde would want to generate a bound `T: Deserialize`. That is a stricter condition
    // than is necessary. In fact, the `main` function below uses T=str which does not implement
    // Deserialize. We override the automatically generated bound by a looser one.
    #[serde(bound(deserialize = "Ptr<'a, T>: Deserialize"))]
    // 对 Ptr<'a, T> 类型进行限界,该类型需要实现 Deserialize
    ptr: Ptr<'a, T>,
}

/// Deserialize a type `S` by deserializing a string, then using the `FromStr` impl of `S` to
/// create the result. The generic type `S` is not required to implement `Deserialize`.
fn deserialize_from_str<S, D>(deserializer: D) -> Result<S, D::Error>
    where S: FromStr,
          S::Err: Display,
          D: Deserializer
{
    let s: String = try!(Deserialize::deserialize(deserializer));
    S::from_str(&s).map_err(|e| de::Error::custom(e.to_string()))
}

/// A pointer to `T` which may or may not own the data. When deserializing
/// we always want to produce owned data.
#[derive(Debug)]
enum Ptr<'a, T: 'a + ?Sized> {
    Ref(&'a T),
    Owned(Box<T>),
}

impl<'a, T: 'a + ?Sized> Deserialize for Ptr<'a, T>
    where Box<T>: Deserialize
{
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
        where D: Deserializer
    {
        let box_t = try!(Deserialize::deserialize(deserializer));
        Ok(Ptr::Owned(box_t))
    }
}

fn main() {
    let j = r#"
        {
          "s": "1234567890",
          "ptr": "owned"
        }
    "#;

    let result: Outer<u64, str> = serde_json::from_str(j).unwrap();

    // result = Outer { s: 1234567890, ptr: Owned("owned") }
    println!("result = {:?}", result);
}

对于有 lifetime 的类型,反序列化时,&str 和 &[u8] 类型 field,serde 默认 borrow。对于其它类型 field 需要明确使用 #[serde(borrow)] 来进行 borrow,同时可以指定 borrow 的 lifetime 约束:#[serde(borrow = “‘a + ‘b + …”)]

// https://serde.rs/lifetimes.html#borrowing-data-in-a-derived-impl

use serde::Deserialize;
use std::borrow::Cow;

#[derive(Deserialize)]
struct Inner<'a, 'b> {
    // &str and &[u8] are implicitly borrowed.
    username: &'a str,

    // Other types must be borrowed explicitly.
    #[serde(borrow)]
    comment: Cow<'b, str>,
}

#[derive(Deserialize)]
struct Outer<'a, 'b, 'c> {
    owned: String,

    #[serde(borrow)]
    inner: Inner<'a, 'b>,

    // This field is never borrowed.
    not_borrowed: Cow<'c, str>,
}

// The lifetimes 'a and 'b are borrowed while 'c is not.
impl<'de: 'a + 'b, 'a, 'b, 'c> Deserialize<'de> for Outer<'a, 'b, 'c> {
    /* ... */
}

use std::marker::PhantomData;

// This struct borrows the first two lifetimes but not the third.
#[derive(Deserialize)]
struct Three<'a, 'b, 'c> {
    a: &'a str,
    b: &'b str,
    c: PhantomData<&'c str>,
}

#[derive(Deserialize)]
struct Example<'a, 'b, 'c> {
    // Borrow 'a and 'b only, not 'c.
    #[serde(borrow = "'a + 'b")]
    three: Three<'a, 'b, 'c>,
}

#[serde(other)]:需要和 serde(tag = “xx “) 连用,如 serde(tag = “variant”),当 enum 包含 A,B 和 Unkonwn 三个 variant,且 Unknown 被标记为 serde(other)时, 当反序列化时,如果 variant field 值不为 A 或 B 时,则当作 Unknown 来反序列化。

#[serde(untagged)] 强制将该 variant field 使用 untagged 类型值进行序列化和反序列化(如不管是否设置 serde(tag = “xx”) ):

3 Field attributes
#

针对 struct 成员使用。

对 struct field 进行重命名:

  • #[serde(rename = “name”)]
  • #[serde(rename(serialize = “ser_name”))]
  • #[serde(rename(deserialize = “de_name”))]
  • #[serde(rename(serialize = “ser_name”, deserialize = “de_name”))]
extern crate serde;
extern crate serde_json;
#[macro_use] extern crate serde_derive;

#[derive(Serialize)]
struct Person {
    #[serde(rename="firstName")]
    first_name: String,
    #[serde(rename="lastName")]
    last_name: String,
}

fn main() {
    let person = Person {
        first_name: "Joel".to_string(),
        last_name: "Spolsky".to_string(),
    };

    let json = serde_json::to_string_pretty(&person).unwrap();

    // Prints:
    //
    //    {
    //      "firstName": "Joel",
    //      "lastName": "Spolsky"
    //    }
    println!("{}", json);
}

反序列化时 field 别名,可以指定多个:

  • #[serde(alias = “name”)]

反序列化时,如果 field 不存在,则使用字段类型的 Default::default() 实现:

  • #[serde(default)]

反序列化时,如果 field 不存在,调用 path 函数来获得缺省值:

  • #[serde(default = “path”)]

序列化或反序列化时,将嵌入的 struct 类型 field 或 map 打平到所属 struct 中:

  • #[serde(flatten)]
// https://serde.rs/attr-flatten.html
#[derive(Serialize, Deserialize)]
struct Pagination {
    limit: u64,
    offset: u64,
    total: u64,
}

#[derive(Serialize, Deserialize)]
struct Users {
    users: Vec<User>,

    #[serde(flatten)]
    pagination: Pagination,
}

// {
//   "limit": 100,
//   "offset": 200,
//   "total": 1053,
//   "users": [
//     {"id": "49824073-979f-4814-be10-5ea416ee1c2f", "username": "john_doe"},
//     ...
//   ]
// }


use std::collections::HashMap;
use serde::{Serialize, Deserialize};
use serde_json::Value;

#[derive(Serialize, Deserialize)]
struct User {
    id: String,
    username: String,

    #[serde(flatten)]
    extra: HashMap<String, Value>,
}

// {
//   "id": "49824073-979f-4814-be10-5ea416ee1c2f",
//   "username": "john_doe",
//   "mascot": "Ferris"
// }

序列化时忽略该 field,但是反序列化时使用缺省值(而不看实际传入的值):

  • #[serde(skip)] // 反序列化时使用 Default::default() 或 default = “…” 配置。
  • #[serde(skip_serializing)]
  • #[serde(skip_deserializing)]
  • #[serde(skip_serializing_if = “path”)] // path 为判断函数,如 Option::is_none
extern crate serde;
extern crate serde_json;
#[macro_use] extern crate serde_derive;

use std::collections::BTreeMap as Map;

#[derive(Serialize)]
struct Resource {
    // Always serialized.
    name: String,

    // Never serialized.
    #[serde(skip_serializing)]
    hash: String,

    // Use a method to decide whether the field should be skipped.
    #[serde(skip_serializing_if="Map::is_empty")]
    metadata: Map<String, String>,
}

fn main() {
    let resources = vec![
        Resource {
            name: "Stack Overflow".to_string(),
            hash: "b6469c3f31653d281bbbfa6f94d60fea130abe38".to_string(),
            metadata: Map::new(),
        },
        Resource {
            name: "GitHub".to_string(),
            hash: "5cb7a0c47e53854cd00e1a968de5abce1c124601".to_string(),
            metadata: {
                let mut metadata = Map::new();
                metadata.insert("headquarters".to_string(),
                                "San Francisco".to_string());
                metadata
            },
        },
    ];

    let json = serde_json::to_string_pretty(&resources).unwrap();

    // Prints:
    //
    //    [
    //      {
    //        "name": "Stack Overflow"
    //      },
    //      {
    //        "name": "GitHub",
    //        "metadata": {
    //          "headquarters": "San Francisco"
    //        }
    //      }
    //    ]
    println!("{}", json);
}

使用指定函数来序列化或反序列化值:

  • #[serde(serialize_with = “path”)] path 函数签名:

    fn<S>(&T, S) -> Result<S::Ok, S::Error> where S: Serializer
    
  • #[serde(deserialize_with = “path”)] path 函数签名:

    fn<'de, D>(D) -> Result<T, D::Error> where D: Deserializer<'de>
    
  • #[serde(with = “module”)] module 必须同时定义 $module::serialize 和 $module::deserialize 函数。

  • #[serde(bound = “T: MyTrait”)]

  • #[serde(bound(serialize = “T: MySerTrait”))]

  • #[serde(bound(deserialize = “T: MyDeTrait”))]

  • #[serde(bound(serialize = “T: MySerTrait”, deserialize = “T: MyDeTrait”))]

  • #[serde(borrow)] and #[serde(borrow = “‘a + ‘b + …”)]

#[serde(getter = “…”)] 序列化时使用,remote type 且该 type 的 field 非 pub 时,指定获得该 field 值的 getter 函数。

// https://serde.rs/remote-derive.html

// Pretend that this is somebody else's crate, not a module.
mod other_crate {
    // Neither Serde nor the other crate provides Serialize and Deserialize
    // impls for this struct. Oh, and the fields are private.
    pub struct Duration {
        secs: i64,
        nanos: i32,
    }

    impl Duration {
        pub fn new(secs: i64, nanos: i32) -> Self {
            Duration { secs: secs, nanos: nanos }
        }

        pub fn seconds(&self) -> i64 {
            self.secs
        }

        pub fn subsec_nanos(&self) -> i32 {
            self.nanos
        }
    }
}

////////////////////////////////////////////////////////////////////////////////

use other_crate::Duration;
use serde::{Serialize, Deserialize};

// Provide getters for every private field of the remote struct. The getter must
// return either `T` or `&T` where `T` is the type of the field.
#[derive(Serialize, Deserialize)]
#[serde(remote = "Duration")]
struct DurationDef {
    #[serde(getter = "Duration::seconds")]
    secs: i64,
    #[serde(getter = "Duration::subsec_nanos")]
    nanos: i32,
}

// Provide a conversion to construct the remote type.
impl From<DurationDef> for Duration {
    fn from(def: DurationDef) -> Duration {
        Duration::new(def.secs, def.nanos)
    }
}

#[derive(Serialize, Deserialize)]
struct Process {
    command_line: String,

    #[serde(with = "DurationDef")]
    wall_time: Duration,
}

4 序列化
#

自定义类型需要实现 Serialize trait 后才能被序列化,可以通过 #[derive(Serialize)] 宏来自动实现。

pub trait Serialize {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
        where S: Serializer;
}

// 示例
use serde::{Serialize, Deserialize};

#[derive(Serialize, Deserialize, Debug)]
struct Point {
    x: i32,
    y: i32,
}

fn main() {
    let point = Point { x: 1, y: 2 };

    // 调用 point 实现的 serialize() 方法,传入 serde_json 提供的
    //  Serializer trait 实现类型。
    let serialized = serde_json::to_string(&point).unwrap();
    println!("serialized = {}", serialized);

    let deserialized: Point = serde_json::from_str(&serialized).unwrap();
    println!("deserialized = {:?}", deserialized);
}

serde 为 Rust 内置 29 种类型都提供了 Serialize 实现,例如 i32 类型:

impl Serialize for i32 {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
        where S: Serializer,
    {
        // 根据 Self 类型,调用 serialize_XX 方法。
        serializer.serialize_i32(*self)
    }
}

手动实现时,serialize() 方法的 serializer 对象是其它 serde data format 的 crate 实现的,例如 serde_json/serde_yaml crate。

核心是调用 Serialize 的各种 serialize_XX() 方法来序列化。

  1. 序列化 seq/map 类型:

           use serde::ser::{Serialize, Serializer, SerializeSeq, SerializeMap};
    
           impl<T> Serialize for Vec<T> where T: Serialize,
           {
               fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
                   where S: Serializer,
               {
                   // 序列化 顺序 类型,如 Vec/HashSet
                   let mut seq = serializer.serialize_seq(Some(self.len()))?;
                   for e in self {
                       seq.serialize_element(e)?;
                   }
                   seq.end()
               }
           }
    
           impl<K, V> Serialize for MyMap<K, V> where K: Serialize, V: Serialize,
           {
               fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
                   where S: Serializer,
               {
                   // 序列化 map 类型
                   let mut map = serializer.serialize_map(Some(self.len()))?;
                   for (k, v) in self {
                       map.serialize_entry(k, v)?;
                   }
                   map.end()
               }
           }
    
  2. 序列化 struct 类型:

           use serde::ser::{Serialize, Serializer, SerializeStruct};
    
           struct Color {
               r: u8,
               g: u8,
               b: u8,
           }
    
           impl Serialize for Color {
               fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
                   where S: Serializer,
               {
                   // 3 is the number of fields in the struct.
                   let mut state = serializer.serialize_struct("Color", 3)?;
                   state.serialize_field("r", &self.r)?; // 不需要指定 field 类型
                   state.serialize_field("g", &self.g)?;
                   state.serialize_field("b", &self.b)?;
                   state.end()
               }
    
  3. 序列化 enum 类型:

           enum E {
               // Use three-step process:
               //   1. serialize_struct_variant
               //   2. serialize_field
               //   3. end
               Color { r: u8, g: u8, b: u8 },
    
               // Use three-step process:
               //   1. serialize_tuple_variant
               //   2. serialize_field
               //   3. end
               Point2D(f64, f64),
    
               // Use serialize_newtype_variant.
               Inches(u64),
    
               // Use serialize_unit_variant.
               Instance,
           }
    

5 反序列化
#

自定义类型实现 Deserialize trait 来进行反序列化,通过 #[derive(Deserialize)] 宏来自动实现:

pub trait Deserialize<'de>: Sized {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
        where D: Deserializer<'de>;
}

手动实现时,deserialize() 方法的 derializer 参数是其它 crate 实现的,例如 serde_json/serde_yaml crate。

核心是调用 deserializer 的 deserialize_XX() 方法来反序列化,这些方法的参数是一个实现 Visitor trait 的类型对象:

  • Visitor 的关联类型为反序列化生成的对象类型;
  • Visitor 对象需要实现 expecting() 和一系列 visit_XX() 方法,后续由 deserializer 自动调用;
  • deserialize_XX() 不一定调用 Visitor 的 visit_XX() 方法,具体取决于 deserializer 的实现(如 serde_json 提供了 Deserializer 的实现,由它决定);
// 先为 i32 定义一个实现 Visitor trait 的对象, 该对象的关联类型 Value
// 与最终要解码生成的对象类型一致(这里是 i32);
use std::fmt;
use serde::de::{self, Visitor};

struct I32Visitor;

// 关联类型 Value 为 deserilize 的类型,必须要实现的方法是 expecting()
impl<'de> Visitor<'de> for I32Visitor {
    type Value = i32; // 要 deserilize 的类型

    // 异常时显示的字符串。
    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        formatter.write_str("an integer between -2^31 and 2^31")
    }

    fn visit_i8<E>(self, value: i8) -> Result<Self::Value, E> where E: de::Error,
    {
        Ok(i32::from(value))
    }

    fn visit_i32<E>(self, value: i32) -> Result<Self::Value, E> where E: de::Error,
    {
        Ok(value)
    }

    fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> where E: de::Error,
    {
        use std::i32;
        if value >= i64::from(i32::MIN) && value <= i64::from(i32::MAX) {
            Ok(value as i32)
        } else {
            Err(E::custom(format!("i32 out of range: {}", value)))
        }
    }

    // Similar for other methods:
    //   - visit_i16
    //   - visit_u8
    //   - visit_u16
    //   - visit_u32
    //   - visit_u64
}


// 为 i32 类型定义 Deserialize 实现, Deserializer 是 serde_json/yaml 等提供的类型:
impl<'de> Deserialize<'de> for i32 {
    fn deserialize<D>(deserializer: D) -> Result<i32, D::Error>
      where D: Deserializer<'de>,
    {
        deserializer.deserialize_i32(I32Visitor) // 传入为 i32 定义的 Visitor
    }
}
rust crate - 这篇文章属于一个选集。
§ 2: 本文

相关文章

anyhow
··1874 字
Rust Rust-Crate
anyhow crate 提供了自定义 Error 类型和 Result 类型,Error 类型自带 backtrace 和 context,支持用户友好的格式化信息输出。
bytes
··2922 字
Rust Rust-Crate
bytes 提供了高效的 zero-copy 连续内存区域的共享和读写能力。
chrono
··4023 字
Rust Rust-Crate
chrono 提供了丰富的 Date/Time 类型和相关操作。
hyper
··861 字
Rust Rust-Crate
hyper 是高性能的异步 HTTP 1/2 底层库。