repo_diff.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. // Copyright 2017 The Gogs Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package git
  5. import (
  6. "bufio"
  7. "bytes"
  8. "fmt"
  9. "io"
  10. "io/ioutil"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. // DiffLineType represents the type of a line in diff.
  16. type DiffLineType uint8
  17. const (
  18. DIFF_LINE_PLAIN DiffLineType = iota + 1
  19. DIFF_LINE_ADD
  20. DIFF_LINE_DEL
  21. DIFF_LINE_SECTION
  22. )
  23. // DiffFileType represents the file status in diff.
  24. type DiffFileType uint8
  25. const (
  26. DIFF_FILE_ADD DiffFileType = iota + 1
  27. DIFF_FILE_CHANGE
  28. DIFF_FILE_DEL
  29. DIFF_FILE_RENAME
  30. )
  31. // DiffLine represents a line in diff.
  32. type DiffLine struct {
  33. LeftIdx int
  34. RightIdx int
  35. Type DiffLineType
  36. Content string
  37. }
  38. func (d *DiffLine) GetType() int {
  39. return int(d.Type)
  40. }
  41. // DiffSection represents a section in diff.
  42. type DiffSection struct {
  43. Name string
  44. Lines []*DiffLine
  45. }
  46. // Line returns a specific line by type (add or del) and file line number from a section.
  47. func (diffSection *DiffSection) Line(lineType DiffLineType, idx int) *DiffLine {
  48. var (
  49. difference = 0
  50. addCount = 0
  51. delCount = 0
  52. matchDiffLine *DiffLine
  53. )
  54. LOOP:
  55. for _, diffLine := range diffSection.Lines {
  56. switch diffLine.Type {
  57. case DIFF_LINE_ADD:
  58. addCount++
  59. case DIFF_LINE_DEL:
  60. delCount++
  61. default:
  62. if matchDiffLine != nil {
  63. break LOOP
  64. }
  65. difference = diffLine.RightIdx - diffLine.LeftIdx
  66. addCount = 0
  67. delCount = 0
  68. }
  69. switch lineType {
  70. case DIFF_LINE_DEL:
  71. if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
  72. matchDiffLine = diffLine
  73. }
  74. case DIFF_LINE_ADD:
  75. if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
  76. matchDiffLine = diffLine
  77. }
  78. }
  79. }
  80. if addCount == delCount {
  81. return matchDiffLine
  82. }
  83. return nil
  84. }
  85. // DiffFile represents a file in diff.
  86. type DiffFile struct {
  87. Name string
  88. OldName string
  89. Index string // 40-byte SHA, Changed/New: new SHA; Deleted: old SHA
  90. Addition, Deletion int
  91. Type DiffFileType
  92. IsCreated bool
  93. IsDeleted bool
  94. IsBin bool
  95. IsRenamed bool
  96. IsSubmodule bool
  97. Sections []*DiffSection
  98. IsIncomplete bool
  99. }
  100. func (diffFile *DiffFile) GetType() int {
  101. return int(diffFile.Type)
  102. }
  103. func (diffFile *DiffFile) NumSections() int {
  104. return len(diffFile.Sections)
  105. }
  106. // Diff contains all information of a specific diff output.
  107. type Diff struct {
  108. TotalAddition, TotalDeletion int
  109. Files []*DiffFile
  110. IsIncomplete bool
  111. }
  112. func (diff *Diff) NumFiles() int {
  113. return len(diff.Files)
  114. }
  115. const _DIFF_HEAD = "diff --git "
  116. // ParsePatch takes a reader and parses everything it receives in diff format.
  117. func ParsePatch(done chan<- error, maxLines, maxLineCharacteres, maxFiles int, reader io.Reader) *Diff {
  118. var (
  119. diff = &Diff{Files: make([]*DiffFile, 0)}
  120. curFile *DiffFile
  121. curSection = &DiffSection{
  122. Lines: make([]*DiffLine, 0, 10),
  123. }
  124. leftLine, rightLine int
  125. lineCount int
  126. curFileLinesCount int
  127. )
  128. input := bufio.NewReader(reader)
  129. isEOF := false
  130. for !isEOF {
  131. // TODO: would input.ReadBytes be more memory-efficient?
  132. line, err := input.ReadString('\n')
  133. if err != nil {
  134. if err == io.EOF {
  135. isEOF = true
  136. } else {
  137. done <- fmt.Errorf("ReadString: %v", err)
  138. return nil
  139. }
  140. }
  141. if len(line) > 0 && line[len(line)-1] == '\n' {
  142. // Remove line break.
  143. line = line[:len(line)-1]
  144. }
  145. if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 {
  146. continue
  147. }
  148. curFileLinesCount++
  149. lineCount++
  150. // Diff data too large, we only show the first about maxlines lines
  151. if curFileLinesCount >= maxLines || len(line) >= maxLineCharacteres {
  152. curFile.IsIncomplete = true
  153. }
  154. switch {
  155. case line[0] == ' ':
  156. diffLine := &DiffLine{Type: DIFF_LINE_PLAIN, Content: line, LeftIdx: leftLine, RightIdx: rightLine}
  157. leftLine++
  158. rightLine++
  159. curSection.Lines = append(curSection.Lines, diffLine)
  160. continue
  161. case line[0] == '@':
  162. curSection = &DiffSection{}
  163. curFile.Sections = append(curFile.Sections, curSection)
  164. ss := strings.Split(line, "@@")
  165. diffLine := &DiffLine{Type: DIFF_LINE_SECTION, Content: line}
  166. curSection.Lines = append(curSection.Lines, diffLine)
  167. // Parse line number.
  168. ranges := strings.Split(ss[1][1:], " ")
  169. leftLine, _ = strconv.Atoi(strings.Split(ranges[0], ",")[0][1:])
  170. if len(ranges) > 1 {
  171. rightLine, _ = strconv.Atoi(strings.Split(ranges[1], ",")[0])
  172. } else {
  173. rightLine = leftLine
  174. }
  175. continue
  176. case line[0] == '+':
  177. curFile.Addition++
  178. diff.TotalAddition++
  179. diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine}
  180. rightLine++
  181. curSection.Lines = append(curSection.Lines, diffLine)
  182. continue
  183. case line[0] == '-':
  184. curFile.Deletion++
  185. diff.TotalDeletion++
  186. diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine}
  187. if leftLine > 0 {
  188. leftLine++
  189. }
  190. curSection.Lines = append(curSection.Lines, diffLine)
  191. case strings.HasPrefix(line, "Binary"):
  192. curFile.IsBin = true
  193. continue
  194. }
  195. // Get new file.
  196. if strings.HasPrefix(line, _DIFF_HEAD) {
  197. middle := -1
  198. // Note: In case file name is surrounded by double quotes (it happens only in git-shell).
  199. // e.g. diff --git "a/xxx" "b/xxx"
  200. hasQuote := line[len(_DIFF_HEAD)] == '"'
  201. if hasQuote {
  202. middle = strings.Index(line, ` "b/`)
  203. } else {
  204. middle = strings.Index(line, " b/")
  205. }
  206. beg := len(_DIFF_HEAD)
  207. a := line[beg+2 : middle]
  208. b := line[middle+3:]
  209. if hasQuote {
  210. a = string(UnescapeChars([]byte(a[1 : len(a)-1])))
  211. b = string(UnescapeChars([]byte(b[1 : len(b)-1])))
  212. }
  213. curFile = &DiffFile{
  214. Name: a,
  215. Type: DIFF_FILE_CHANGE,
  216. Sections: make([]*DiffSection, 0, 10),
  217. }
  218. diff.Files = append(diff.Files, curFile)
  219. if len(diff.Files) >= maxFiles {
  220. diff.IsIncomplete = true
  221. io.Copy(ioutil.Discard, reader)
  222. break
  223. }
  224. curFileLinesCount = 0
  225. // Check file diff type and submodule.
  226. CHECK_TYPE:
  227. for {
  228. line, err := input.ReadString('\n')
  229. if err != nil {
  230. if err == io.EOF {
  231. isEOF = true
  232. } else {
  233. done <- fmt.Errorf("ReadString: %v", err)
  234. return nil
  235. }
  236. }
  237. switch {
  238. case strings.HasPrefix(line, "new file"):
  239. curFile.Type = DIFF_FILE_ADD
  240. curFile.IsCreated = true
  241. curFile.IsSubmodule = strings.HasSuffix(line, " 160000\n")
  242. case strings.HasPrefix(line, "deleted"):
  243. curFile.Type = DIFF_FILE_DEL
  244. curFile.IsDeleted = true
  245. curFile.IsSubmodule = strings.HasSuffix(line, " 160000\n")
  246. case strings.HasPrefix(line, "index"):
  247. if curFile.IsDeleted {
  248. curFile.Index = line[6:46]
  249. } else if len(line) >= 88 {
  250. curFile.Index = line[49:88]
  251. } else {
  252. curFile.Index = curFile.Name
  253. }
  254. break CHECK_TYPE
  255. case strings.HasPrefix(line, "similarity index 100%"):
  256. curFile.Type = DIFF_FILE_RENAME
  257. curFile.IsRenamed = true
  258. curFile.OldName = curFile.Name
  259. curFile.Name = b
  260. curFile.Index = b
  261. break CHECK_TYPE
  262. case strings.HasPrefix(line, "old mode"):
  263. break CHECK_TYPE
  264. }
  265. }
  266. }
  267. }
  268. done <- nil
  269. return diff
  270. }
  271. // GetDiffRange returns a parsed diff object between given commits.
  272. func GetDiffRange(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
  273. repo, err := OpenRepository(repoPath)
  274. if err != nil {
  275. return nil, err
  276. }
  277. commit, err := repo.GetCommit(afterCommitID)
  278. if err != nil {
  279. return nil, err
  280. }
  281. cmd := NewCommand()
  282. if len(beforeCommitID) == 0 {
  283. // First commit of repository
  284. if commit.ParentCount() == 0 {
  285. cmd.AddArguments("show", "--full-index", afterCommitID)
  286. } else {
  287. c, _ := commit.Parent(0)
  288. cmd.AddArguments("diff", "--full-index", "-M", c.ID.String(), afterCommitID)
  289. }
  290. } else {
  291. cmd.AddArguments("diff", "--full-index", "-M", beforeCommitID, afterCommitID)
  292. }
  293. stdout, w := io.Pipe()
  294. done := make(chan error)
  295. var diff *Diff
  296. go func() {
  297. diff = ParsePatch(done, maxLines, maxLineCharacteres, maxFiles, stdout)
  298. }()
  299. stderr := new(bytes.Buffer)
  300. err = cmd.RunInDirTimeoutPipeline(2*time.Minute, repoPath, w, stderr)
  301. w.Close() // Close writer to exit parsing goroutine
  302. if err != nil {
  303. return nil, concatenateError(err, stderr.String())
  304. }
  305. return diff, <-done
  306. }
  307. // RawDiffType represents the type of raw diff format.
  308. type RawDiffType string
  309. const (
  310. RAW_DIFF_NORMAL RawDiffType = "diff"
  311. RAW_DIFF_PATCH RawDiffType = "patch"
  312. )
  313. // GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
  314. func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error {
  315. repo, err := OpenRepository(repoPath)
  316. if err != nil {
  317. return fmt.Errorf("OpenRepository: %v", err)
  318. }
  319. commit, err := repo.GetCommit(commitID)
  320. if err != nil {
  321. return err
  322. }
  323. cmd := NewCommand()
  324. switch diffType {
  325. case RAW_DIFF_NORMAL:
  326. if commit.ParentCount() == 0 {
  327. cmd.AddArguments("show", commitID)
  328. } else {
  329. c, _ := commit.Parent(0)
  330. cmd.AddArguments("diff", "-M", c.ID.String(), commitID)
  331. }
  332. case RAW_DIFF_PATCH:
  333. if commit.ParentCount() == 0 {
  334. cmd.AddArguments("format-patch", "--no-signature", "--stdout", "--root", commitID)
  335. } else {
  336. c, _ := commit.Parent(0)
  337. query := fmt.Sprintf("%s...%s", commitID, c.ID.String())
  338. cmd.AddArguments("format-patch", "--no-signature", "--stdout", query)
  339. }
  340. default:
  341. return fmt.Errorf("invalid diffType: %s", diffType)
  342. }
  343. stderr := new(bytes.Buffer)
  344. if err = cmd.RunInDirPipeline(repoPath, writer, stderr); err != nil {
  345. return concatenateError(err, stderr.String())
  346. }
  347. return nil
  348. }
  349. // GetDiffCommit returns a parsed diff object of given commit.
  350. func GetDiffCommit(repoPath, commitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
  351. return GetDiffRange(repoPath, "", commitID, maxLines, maxLineCharacteres, maxFiles)
  352. }