device를 read, write의 system call 이외 다른 명령어를 device에 전달을 하고 싶을 때
ioctl를 이용하다라고 요약을 하면 좋을 것 같다.
예를 들어 설명을 하자면, 컴퓨터와 연결된 장치 모뎀이 있습니다. 이 모뎀에 읽고, 쓰기를 할 수 있습니다.
단순히 읽기, 쓰기 뿐만 아니라 다양한 명령을 하고 싶을 사용하는 프로토콜이 ioctl입니다.
또는 ioctl(short for input output control)함수라고도 하죠
전반적인 ioctl를 잘 알고 싶으시면 tldp 참조하기 바랍니다.
also you would like to know more ioctl, please refer to this site
from now on, I will make Ioctl function based on NVMe, So after I analyze the similar source. I write code.
first. I will summarize the similar source.
The following is kernel’s ioctl function of NVMe.
1931 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
1932 unsigned long arg)
1933 {
1934 struct nvme_ns *ns = bdev->bd_disk->private_data;
1935
1936 switch (cmd) {
1937 case NVME_IOCTL_ID:
1938 force_successful_syscall_return();
1939 return ns->ns_id;
1940 case NVME_IOCTL_ADMIN_CMD:
1941 return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
1942 case NVME_IOCTL_IO_CMD:
1943 return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
1944 case NVME_IOCTL_SUBMIT_IO:
1945 return nvme_submit_io(ns, (void __user *)arg);
1946 case SG_GET_VERSION_NUM:
1947 return nvme_sg_get_version_num((void __user *)arg);
1948 case SG_IO:
1949 return nvme_sg_io(ns, (void __user *)arg);
1950 default:
1951 return -ENOTTY;
1952 }
1953 }
in the above source,
60 #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
61 #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
62 #define NVME_IOCTL_RESET _IO('N', 0x44)
63 #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
After you see NVMe-cli tool, you can check up status of nvme driver.
for example, “identify command”
identify command is useful, when you understand status of SSD.
A new part is added into kernel source
Linux/include/uapi/linux/nvme_ioctl.h
35 struct nvme_passthru_cmd {
36 __u8 opcode;
37 __u8 flags;
38 __u16 rsvd1;
39 __u32 nsid;
40 __u32 cdw2;
41 __u32 cdw3;
42 __u64 metadata;
43 __u64 addr;
44 __u32 metadata_len;
45 __u32 data_len;
46 __u32 cdw10;
47 __u32 cdw11;
48 __u32 cdw12;
49 __u32 cdw13;
50 __u32 cdw14;
51 __u32 cdw15;
52 __u32 timeout_ms;
53 __u32 result;
54 };
60 #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
61 #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
62 #define NVME_IOCTL_RESET _IO('N', 0x44)
63 #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
frmo now on, I will use the above code.
The above invetigation is wrong.
791 static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
792 {
793 void __user *argp = (void __user *)arg;
794
795 switch (cmd) {
796 case NVM_INFO:
797 return nvm_ioctl_info(file, argp);
798 case NVM_GET_DEVICES:
799 return nvm_ioctl_get_devices(file, argp);
800 case NVM_DEV_CREATE:
801 return nvm_ioctl_dev_create(file, argp);
802 case NVM_DEV_REMOVE:
803 return nvm_ioctl_dev_remove(file, argp);
804 }
805 return 0;
806 }
807
808 static const struct file_operations _ctl_fops = {
809 .open = nonseekable_open,
810 .unlocked_ioctl = nvm_ctl_ioctl,
811 .owner = THIS_MODULE,
812 .llseek = noop_llseek,
813 };
814
815 static struct miscdevice _nvm_misc = {
816 .minor = MISC_DYNAMIC_MINOR,
817 .name = "lightnvm",
818 .nodename = "lightnvm/control",
819 .fops = &_ctl_fops,
820 };
821
822 MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
823
824 static int __init nvm_mod_init(void)
825 {
826 int ret;
827
828 ret = misc_register(&_nvm_misc);
829 if (ret)
830 pr_err("nvm: misc_register failed for control device");
831
832 return ret;
833 }
834
To use ioctl, you utilize the below header file
Linux/include/uapi/linux/lightnvm.h
1 /*
2 * Copyright (C) 2015 CNEX Labs. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
16 * USA.
17 */
18
19 #ifndef _UAPI_LINUX_LIGHTNVM_H
20 #define _UAPI_LINUX_LIGHTNVM_H
21
22 #ifdef __KERNEL__
23 #include <linux/kernel.h>
24 #include <linux/ioctl.h>
25 #else /* __KERNEL__ */
26 #include <stdio.h>
27 #include <sys/ioctl.h>
28 #define DISK_NAME_LEN 32
29 #endif /* __KERNEL__ */
30
31 #include <linux/types.h>
32 #include <linux/ioctl.h>
33
34 #define NVM_TTYPE_NAME_MAX 48
35 #define NVM_TTYPE_MAX 63
36
37 #define NVM_CTRL_FILE "/dev/lightnvm/control"
38
39 struct nvm_ioctl_info_tgt {
40 __u32 version[3];
41 __u32 reserved;
42 char tgtname[NVM_TTYPE_NAME_MAX];
43 };
44
45 struct nvm_ioctl_info {
46 __u32 version[3]; /* in/out - major, minor, patch */
47 __u16 tgtsize; /* number of targets */
48 __u16 reserved16; /* pad to 4K page */
49 __u32 reserved[12];
50 struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
51 };
52
53 enum {
54 NVM_DEVICE_ACTIVE = 1 << 0,
55 };
56
57 struct nvm_ioctl_device_info {
58 char devname[DISK_NAME_LEN];
59 char bmname[NVM_TTYPE_NAME_MAX];
60 __u32 bmversion[3];
61 __u32 flags;
62 __u32 reserved[8];
63 };
64
65 struct nvm_ioctl_get_devices {
66 __u32 nr_devices;
67 __u32 reserved[31];
68 struct nvm_ioctl_device_info info[31];
69 };
70
71 struct nvm_ioctl_create_simple {
72 __u32 lun_begin;
73 __u32 lun_end;
74 };
75
76 enum {
77 NVM_CONFIG_TYPE_SIMPLE = 0,
78 };
79
80 struct nvm_ioctl_create_conf {
81 __u32 type;
82 union {
83 struct nvm_ioctl_create_simple s;
84 };
85 };
86
87 struct nvm_ioctl_create {
88 char dev[DISK_NAME_LEN]; /* open-channel SSD device */
89 char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */
90 char tgtname[DISK_NAME_LEN]; /* dev to expose target as */
91
92 __u32 flags;
93
94 struct nvm_ioctl_create_conf conf;
95 };
96
97 struct nvm_ioctl_remove {
98 char tgtname[DISK_NAME_LEN];
99
100 __u32 flags;
101 };
102
103
104 /* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
105 enum {
106 /* top level cmds */
107 NVM_INFO_CMD = 0x20,
108 NVM_GET_DEVICES_CMD,
109
110 /* device level cmds */
111 NVM_DEV_CREATE_CMD,
112 NVM_DEV_REMOVE_CMD,
113 };
114
115 #define NVM_IOCTL 'L' /* 0x4c */
116
117 #define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \
118 struct nvm_ioctl_info)
119 #define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
120 struct nvm_ioctl_get_devices)
121 #define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
122 struct nvm_ioctl_create)
123 #define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
124 struct nvm_ioctl_remove)
125
126 #define NVM_VERSION_MAJOR 1
127 #define NVM_VERSION_MINOR 0
128 #define NVM_VERSION_PATCHLEVEL 0
129
130 #endif
131
I looked for solving the error above. if you know NVM Express specification, you look into that.
the below refers to NVM_ExPress_1_2_Specification
and I cite the page.
Status Field Definition
The Status Field defines the status for the command indicated in the completion queue entry, define in Figure 28.
A value of oh for the Status field indicates a successful command completion, with no fatal or non-fatal error condition.
Uless otherwise noted, if a command fails to complete successfully for multiple reasons, then the particular status code returned is chosen by the vendor.
figure 28. Completion Queue Entry : Status Field
Bit | Description |
---|---|
31 | Do Not Retry (DNR) : If set to '1', idicates that if the smae command is re-submitted if is expected to fail. If cleared to '0', indicates that the same command may succeed if retired. if a command is aborted due to time limited error recovery (refer to section 5.14.1.5), this field should be cleared to '0', if the SCT and SC fields are cleared to 0h then this field should be cleared to '0'. |
30 | More (M) : if set to '1', there is more status information for this commands as part of the Error informaition log that may be retrieved with the Get Log Page command. if cleared to '0', there is no additional status infomation for this command. Refer to section 5.10.1.1. |
29:28 | Reserved |
27:25 | Status Code Type (SCT) : Indicates the status code type of the completion queue entry. this indicates the type od status the controller is returning. |
24:17 | Status Code (SC) : Indicates a status code identifying any error or status information for the command indicated.</strong> |
Status Code Type (SCT)
Completion queue entries indicate a status code type for the type of completion being reported.
Figure 29 Specifies the status type value and descriptions.
Figure 29 : Status Code - Status Code Type Values
Value | Description |
---|---|
0h(h means hexa) | Generic Command Status : Indicates that the command specified by the Command and Submission Queue identifiers in the completion queue entry |
1h | Command Specific Status : Indicates a status value that is specific to a particular command opcode. Theses values may indicate additional processing is required. Status values such as invalid firmware image or exceeded maximum number of queue is reported with this type. |
2h | Media and Data Integrity Errors : Any media specific errors that occur in the NVM or data integrity type errors shall be of this type |
3h - 6h | Reserved |
7h | Vender Specific |
Status Code(SC)
The Status Code (SC) field in the completion queue entry indicates more detalied status information about the completion being reported.
Each Status Code set of values is split into three ranges:
- 00h - 7Fh : Applicable to Admin Command Set, or across multiple command sets.
- 80h - BFh : I/O Command Set Specific status codes.
- C0h - FFh : Vendor Specific status codes.
If there are multiple status codes that apply to a particular command and failure, the controller shall report the status code with the lowest numerical value.
이제 ioctl를 하는 작업은 거의 마무리가 되어 간다. 현재 kernel 4.4.6, cenctos 7.1에서 open channel SSD이는 하드웨어 오픈소스이다.
이는 PCI 기반으로 SSD를 이용가능하도록 하는 오픈 하드웨어인데 여기에서 최신 커널에 lihgtnvm이라는 모듈이 올라가 갔다.
이는 소프트웨어 단에서 host가 직접 SSD를 조절이 가능하도록 하기 위한 오픈소스 프로젝트로, 이 lightnvm은 한 인도 대학교에서 개발을 한 것이다.
근데 표준 PCI 기준인 NVM express를 사용하지 않고 vendor만의 독자적인 것을 사용하여 커널 소스에 들어가 있다.
이에 맞게 내가 하드웨어 PCI 기반으로 보드를 만들어 SSD를 하기 보다는 다르게 사용을 해야 하겠다.
when I debuged kernel 4.4.6 for looking into lighnvm, the following is call stack of kernel source
the below is function of kernel :
nvme_nvm_identity (lightnvm.c)
nvme_submit_sync_cmd
__nvme_submit_sync_cmd
nvme_queue_rq
nvme_submit_priv
pci 기반으로 SSD를 테스트 할때 nvme0 라는 캐릭터 디바이스 파일와 nvme0n1이라는 블럭디바이스 파일를 /dev 아래에 생성을 시키는데
디바이스 드라이버를 어떻게 만드나에 따라 다르지만 현재 리눅스 코드는 pci 기반의 SSD 통신을 위한 nvme는 캐릭터 디바이스 파일을 생성하고 이는 거의 사용을 하지 않고 단지 debugging 이나 SSD의 간단한 정보 온도(NVMe, admin)나 이런것을 가지고 오는 방식으로 사용되어 진다.
하지만 저 캐릭터 디바이스를 이용해서 하는 일은 블럭 디바이스로 다 할 수 있다. 실질적으로 read/write작업은 저 블럭디바이스
파일을 통해 SSD에 쓰여진다. 먼저 nvme는 먼저 캐릭터 디바이스를 생성해서 PCI기반의 SSD가 제대로 인식이 되었는지 확인을 할 수 있다.
먼저 NVMe는 캐릭터디바이스 생성 그다음 진행으로 블럭디바이스 (nvme0n1)을 생성하기때문데 저 캐릭터 디바이스 파일이 /dev 아래에 된다면 PCI기반의 SSD가 제대로 인식이 되었다고 확인을 할 수 있다. 하지만 블럭디바이스로 인식도 제대로 된것이라면 nvme0n1도 뜨는지 확인을 해야 한다.